Louis BECQUEY

Average MCCs

...@@ -12,7 +12,6 @@ doc/*.fls ...@@ -12,7 +12,6 @@ doc/*.fls
12 doc/*.fdb_latexmk 12 doc/*.fdb_latexmk
13 13
14 # Compiled Object files 14 # Compiled Object files
15 -*.o
16 obj/* 15 obj/*
17 16
18 # Executables 17 # Executables
......
...@@ -21,7 +21,8 @@ bypdir = "" ...@@ -21,7 +21,8 @@ bypdir = ""
21 biorseoDir = "." 21 biorseoDir = "."
22 exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec')) 22 exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec'))
23 runDir = path.dirname(path.realpath(__file__)) 23 runDir = path.dirname(path.realpath(__file__))
24 -outputDir = biorseoDir + "/results/" 24 +self.outputf = biorseoDir + "/results/"
25 +tempDir = biorseoDir + "/temp/"
25 HLmotifDir = biorseoDir + "/data/modules/BGSU/HL/3.2/lib" 26 HLmotifDir = biorseoDir + "/data/modules/BGSU/HL/3.2/lib"
26 ILmotifDir = biorseoDir + "/data/modules/BGSU/IL/3.2/lib" 27 ILmotifDir = biorseoDir + "/data/modules/BGSU/IL/3.2/lib"
27 descfolder = biorseoDir + "/data/modules/DESC" 28 descfolder = biorseoDir + "/data/modules/DESC"
...@@ -29,6 +30,17 @@ descfolder = biorseoDir + "/data/modules/DESC" ...@@ -29,6 +30,17 @@ descfolder = biorseoDir + "/data/modules/DESC"
29 30
30 # ================== CLASSES AND FUNCTIONS ================================ 31 # ================== CLASSES AND FUNCTIONS ================================
31 32
33 +ignored_nt_dict = {}
34 +def is_canonical_nts(seq):
35 + for c in seq[:-1]:
36 + if c not in "ACGU":
37 + if c in ignored_nt_dict.keys():
38 + ignored_nt_dict[c] += 1
39 + else:
40 + ignored_nt_dict[c] = 1
41 + return False
42 + return True
43 +
32 44
33 class NoDaemonProcess(multiprocessing.Process): 45 class NoDaemonProcess(multiprocessing.Process):
34 @property 46 @property
...@@ -107,19 +119,213 @@ class Job: ...@@ -107,19 +119,213 @@ class Job:
107 self.nthreads = how_many_in_parallel 119 self.nthreads = how_many_in_parallel
108 120
109 121
122 +class RNA:
123 + def __init__(self, header, seq):
124 + self.seq_ = seq
125 + self.header_ = header
126 + self.length = len(seq)
127 +
128 + self.rnasubopt = []
129 + self.biorseoRawA = []
130 + self.biorseoRawB = []
131 + self.biorseoBGSUJAR3DA = []
132 + self.biorseoBGSUJAR3DC = []
133 + self.biorseoBGSUJAR3DD = []
134 + self.biorseoBGSUJAR3DB = []
135 + self.biorseoBayesPairA = []
136 + self.biorseoBayesPairC = []
137 + self.biorseoBayesPairD = []
138 + self.biorseoBayesPairB = []
139 + self.biorseoBGSUBayesPairA = []
140 + self.biorseoBGSUBayesPairC = []
141 + self.biorseoBGSUBayesPairD = []
142 + self.biorseoBGSUBayesPairB = []
143 +
144 + def get_RNAsubopt_results(self):
145 + rna = open(self.outputf + self.basename + ".subopt", "r")
146 + lines = rna.readlines()
147 + rna.close()
148 + for i in range(2, len(lines)):
149 + ss = lines[i].split(' ')[0]
150 + if ss not in self.rnasubopt.predictions:
151 + self.rnasubopt.predictions.append(ss)
152 +
153 + def get_biorseoBayesPairA_results(self, targetdir):
154 + if path.isfile(targetdir+ self.basename + ".bypA"):
155 + rna = open(targetdir+ self.basename + ".bypA", "r")
156 + lines = rna.readlines()
157 + rna.close()
158 + for i in range(2, len(lines)):
159 + ss = lines[i].split(' ')[0].split('\t')[0]
160 + if ss not in self.biorseoBayesPairA.predictions:
161 + self.biorseoBayesPairA.predictions.append(ss)
162 + self.biorseoBayesPairA.ninsertions.append(lines[i].count('+'))
163 +
164 + def get_biorseoBayesPairB_results(self, targetdir):
165 + if path.isfile(targetdir+ self.basename + ".bypB"):
166 + rna = open(targetdir+ self.basename + ".bypB", "r")
167 + lines = rna.readlines()
168 + rna.close()
169 + for i in range(2, len(lines)):
170 + ss = lines[i].split(' ')[0].split('\t')[0]
171 + if ss not in self.biorseoBayesPairB.predictions:
172 + self.biorseoBayesPairB.predictions.append(ss)
173 + self.biorseoBayesPairB.ninsertions.append(lines[i].count('+'))
174 +
175 + def get_biorseoBayesPairC_results(self, targetdir):
176 + if path.isfile(targetdir+ self.basename + ".bypC"):
177 + rna = open(targetdir+ self.basename + ".bypC", "r")
178 + lines = rna.readlines()
179 + rna.close()
180 + for i in range(2, len(lines)):
181 + ss = lines[i].split(' ')[0].split('\t')[0]
182 + if ss not in self.biorseoBayesPairC.predictions:
183 + self.biorseoBayesPairC.predictions.append(ss)
184 + self.biorseoBayesPairC.ninsertions.append(lines[i].count('+'))
185 +
186 + def get_biorseoBayesPairD_results(self, targetdir):
187 + if path.isfile(targetdir+ self.basename + ".bypD"):
188 + rna = open(targetdir+ self.basename + ".bypD", "r")
189 + lines = rna.readlines()
190 + rna.close()
191 + for i in range(2, len(lines)):
192 + ss = lines[i].split(' ')[0].split('\t')[0]
193 + if ss not in self.biorseoBayesPairD.predictions:
194 + self.biorseoBayesPairD.predictions.append(ss)
195 + self.biorseoBayesPairD.ninsertions.append(lines[i].count('+'))
196 +
197 + def get_biorseoRawA_results(self, targetdir):
198 + if path.isfile(targetdir+ self.basename + ".rawA"):
199 + rna = open(targetdir+ self.basename + ".rawA", "r")
200 + lines = rna.readlines()
201 + rna.close()
202 + for i in range(2, len(lines)):
203 + ss = lines[i].split(' ')[0].split('\t')[0]
204 + if ss not in self.biorseoRawA.predictions:
205 + self.biorseoRawA.predictions.append(ss)
206 + self.biorseoRawA.ninsertions.append(lines[i].count('+'))
207 +
208 + def get_biorseoRawB_results(self, targetdir):
209 + if path.isfile(targetdir+ self.basename + ".rawB"):
210 + rna = open(targetdir+ self.basename + ".rawB", "r")
211 + lines = rna.readlines()
212 + rna.close()
213 + for i in range(2, len(lines)):
214 + ss = lines[i].split(' ')[0].split('\t')[0]
215 + if ss not in self.biorseoRawB.predictions:
216 + self.biorseoRawB.predictions.append(ss)
217 + self.biorseoRawB.ninsertions.append(lines[i].count('+'))
218 +
219 + def get_biorseoBGSUJAR3DA_results(self, targetdir):
220 + if path.isfile(targetdir+ self.basename + ".jar3dA"):
221 + rna = open(targetdir+ self.basename + ".jar3dA", "r")
222 + lines = rna.readlines()
223 + rna.close()
224 + for i in range(2, len(lines)):
225 + ss = lines[i].split(' ')[0].split('\t')[0]
226 + if ss not in self.biorseoBGSUJAR3DA.predictions:
227 + self.biorseoBGSUJAR3DA.predictions.append(ss)
228 + self.biorseoBGSUJAR3DA.ninsertions.append(lines[i].count('+'))
229 +
230 + def get_biorseoBGSUJAR3DB_results(self, targetdir):
231 + if path.isfile(targetdir+ self.basename + ".jar3dB"):
232 + rna = open(targetdir+ self.basename + ".jar3dB", "r")
233 + lines = rna.readlines()
234 + rna.close()
235 + for i in range(2, len(lines)):
236 + ss = lines[i].split(' ')[0].split('\t')[0]
237 + if ss not in self.biorseoBGSUJAR3DB.predictions:
238 + self.biorseoBGSUJAR3DB.predictions.append(ss)
239 + self.biorseoBGSUJAR3DB.ninsertions.append(lines[i].count('+'))
240 +
241 + def get_biorseoBGSUJAR3DC_results(self, targetdir):
242 + if path.isfile(targetdir+ self.basename + ".jar3dC"):
243 + rna = open(targetdir+ self.basename + ".jar3dC", "r")
244 + lines = rna.readlines()
245 + rna.close()
246 + for i in range(2, len(lines)):
247 + ss = lines[i].split(' ')[0].split('\t')[0]
248 + if ss not in self.biorseoBGSUJAR3DC.predictions:
249 + self.biorseoBGSUJAR3DC.predictions.append(ss)
250 + self.biorseoBGSUJAR3DC.ninsertions.append(lines[i].count('+'))
251 +
252 + def get_biorseoBGSUJAR3DD_results(self, targetdir):
253 + if path.isfile(targetdir+ self.basename + ".jar3dD"):
254 + rna = open(targetdir+ self.basename + ".jar3dD", "r")
255 + lines = rna.readlines()
256 + rna.close()
257 + for i in range(2, len(lines)):
258 + ss = lines[i].split(' ')[0].split('\t')[0]
259 + if ss not in self.biorseoBGSUJAR3DD.predictions:
260 + self.biorseoBGSUJAR3DD.predictions.append(ss)
261 + self.biorseoBGSUJAR3DD.ninsertions.append(lines[i].count('+'))
262 +
263 + def get_biorseoBGSUBayesPairA_results(self, targetdir):
264 + if path.isfile(targetdir+ self.basename + ".bgsubypA"):
265 + rna = open(targetdir+ self.basename + ".bgsubypA", "r")
266 + lines = rna.readlines()
267 + rna.close()
268 + for i in range(2, len(lines)):
269 + ss = lines[i].split(' ')[0].split('\t')[0]
270 + if ss not in self.biorseoBGSUBayesPairA.predictions:
271 + self.biorseoBGSUBayesPairA.predictions.append(ss)
272 + self.biorseoBGSUBayesPairA.ninsertions.append(lines[i].count('+'))
273 + # else:
274 + # print(targetdir+ self.basename + ".bgsubypA not found !")
275 +
276 + def get_biorseoBGSUBayesPairB_results(self, targetdir):
277 + if path.isfile(targetdir+ self.basename + ".bgsubypB"):
278 + rna = open(targetdir+ self.basename + ".bgsubypB", "r")
279 + lines = rna.readlines()
280 + rna.close()
281 + for i in range(2, len(lines)):
282 + ss = lines[i].split(' ')[0].split('\t')[0]
283 + if ss not in self.biorseoBGSUBayesPairB.predictions:
284 + self.biorseoBGSUBayesPairB.predictions.append(ss)
285 + self.biorseoBGSUBayesPairB.ninsertions.append(lines[i].count('+'))
286 + # else:
287 + # print(targetdir+ self.basename + ".bgsubypB not found !")
288 +
289 + def get_biorseoBGSUBayesPairC_results(self, targetdir):
290 + if path.isfile(targetdir+ self.basename + ".bgsubypC"):
291 + rna = open(targetdir+ self.basename + ".bgsubypC", "r")
292 + lines = rna.readlines()
293 + rna.close()
294 + for i in range(2, len(lines)):
295 + ss = lines[i].split(' ')[0].split('\t')[0]
296 + if ss not in self.biorseoBGSUBayesPairC.predictions:
297 + self.biorseoBGSUBayesPairC.predictions.append(ss)
298 + self.biorseoBGSUBayesPairC.ninsertions.append(lines[i].count('+'))
299 + # else:
300 + # print(targetdir+ self.basename + ".bgsubypC not found !")
301 +
302 + def get_biorseoBGSUBayesPairD_results(self, targetdir):
303 + if path.isfile(targetdir+ self.basename + ".bgsubypD"):
304 + rna = open(targetdir+ self.basename + ".bgsubypD", "r")
305 + lines = rna.readlines()
306 + rna.close()
307 + for i in range(2, len(lines)):
308 + ss = lines[i].split(' ')[0].split('\t')[0]
309 + if ss not in self.biorseoBGSUBayesPairD.predictions:
310 + self.biorseoBGSUBayesPairD.predictions.append(ss)
311 + self.biorseoBGSUBayesPairD.ninsertions.append(lines[i].count('+'))
312 + # else:
313 + # print(targetdir+ self.basename + ".bgsubypD not found !")
314 +
315 +
110 class BiorseoInstance: 316 class BiorseoInstance:
111 def __init__(self, argv): 317 def __init__(self, argv):
112 # set default options 318 # set default options
113 self.type = "dpm" 319 self.type = "dpm"
114 self.modules = "desc" 320 self.modules = "desc"
115 self.func = 'B' 321 self.func = 'B'
116 - self.outputf = outputDir 322 + self.outputf = self.outputf
117 self.jobcount = 0 323 self.jobcount = 0
118 324
119 # Parse options 325 # Parse options
120 try: 326 try:
121 opts, args = getopt.getopt( 327 opts, args = getopt.getopt(
122 - argv, "hil::o:", ["type=", "func=", "modules="]) 328 + argv, "hi:o:", ["type=", "func=", "modules="])
123 except getopt.GetoptError: 329 except getopt.GetoptError:
124 print("Please provide arguments !") 330 print("Please provide arguments !")
125 sys.exit(2) 331 sys.exit(2)
...@@ -130,9 +336,6 @@ class BiorseoInstance: ...@@ -130,9 +336,6 @@ class BiorseoInstance:
130 elif opt == "-i": 336 elif opt == "-i":
131 self.inputfile = arg 337 self.inputfile = arg
132 self.mode = 0 # single sequence mode 338 self.mode = 0 # single sequence mode
133 - elif opt == "-l":
134 - self.inputfile = arg
135 - self.mode = 1 # batch mode
136 elif opt == "-o": 339 elif opt == "-o":
137 self.outputf = arg # output file or folder... 340 self.outputf = arg # output file or folder...
138 elif opt == "--func": 341 elif opt == "--func":
...@@ -153,6 +356,9 @@ class BiorseoInstance: ...@@ -153,6 +356,9 @@ class BiorseoInstance:
153 else: 356 else:
154 raise "Unknown option " + opt 357 raise "Unknown option " + opt
155 358
359 + # create jobs
360 + self.list_jobs()
361 +
156 if self.mode: 362 if self.mode:
157 # Create a job manager 363 # Create a job manager
158 self.manager = Manager() 364 self.manager = Manager()
...@@ -321,7 +527,7 @@ class BiorseoInstance: ...@@ -321,7 +527,7 @@ class BiorseoInstance:
321 def launch_JAR3D(self, seq_, basename): 527 def launch_JAR3D(self, seq_, basename):
322 rnasubopt_preds = [] 528 rnasubopt_preds = []
323 # Extracting probable loops from RNA-subopt structures 529 # Extracting probable loops from RNA-subopt structures
324 - rna = open(outputDir + basename + ".subopt", "r") 530 + rna = open(self.outputf + basename + ".subopt", "r")
325 lines = rna.readlines() 531 lines = rna.readlines()
326 rna.close() 532 rna.close()
327 for i in range(2, len(lines)): 533 for i in range(2, len(lines)):
...@@ -352,7 +558,7 @@ class BiorseoInstance: ...@@ -352,7 +558,7 @@ class BiorseoInstance:
352 insertion_sites.sort(reverse=True) 558 insertion_sites.sort(reverse=True)
353 # Writing results to CSV file 559 # Writing results to CSV file
354 c = 0 560 c = 0
355 - resultsfile = open(outputDir+basename+".sites.csv", "w") 561 + resultsfile = open(self.outputf+basename+".sites.csv", "w")
356 resultsfile.write("Motif,Rotation,Score,Start1,End1,Start2,End2\n") 562 resultsfile.write("Motif,Rotation,Score,Start1,End1,Start2,End2\n")
357 for site in insertion_sites: 563 for site in insertion_sites:
358 if site.score > 10: 564 if site.score > 10:
...@@ -372,7 +578,7 @@ class BiorseoInstance: ...@@ -372,7 +578,7 @@ class BiorseoInstance:
372 def launch_BayesPairing(self, module_type, seq_, header_, basename): 578 def launch_BayesPairing(self, module_type, seq_, header_, basename):
373 chdir(bypdir) 579 chdir(bypdir)
374 580
375 - cmd = ["python3", "parse_sequences.py", "-seq", outputDir + 581 + cmd = ["python3", "parse_sequences.py", "-seq", self.outputf +
376 basename + ".fa", "-d", module_type, "-interm", "1"] 582 basename + ".fa", "-d", module_type, "-interm", "1"]
377 583
378 logfile = open("log_of_the_run.sh", 'a') 584 logfile = open("log_of_the_run.sh", 'a')
...@@ -389,9 +595,9 @@ class BiorseoInstance: ...@@ -389,9 +595,9 @@ class BiorseoInstance:
389 l = BypLog[idx] 595 l = BypLog[idx]
390 insertion_sites = [x for x in ast.literal_eval(l.split(":")[1][1:])] 596 insertion_sites = [x for x in ast.literal_eval(l.split(":")[1][1:])]
391 if module_type == "rna3dmotif": 597 if module_type == "rna3dmotif":
392 - rna = open(outputDir + basename + ".byp.csv", "w") 598 + rna = open(self.outputf + basename + ".byp.csv", "w")
393 else: 599 else:
394 - rna = open(outputDir + basename + ".bgsubyp.csv", "w") 600 + rna = open(self.outputf + basename + ".bgsubyp.csv", "w")
395 rna.write("Motif,Score,Start1,End1,Start2,End2...\n") 601 rna.write("Motif,Score,Start1,End1,Start2,End2...\n")
396 for i, module in enumerate(insertion_sites): 602 for i, module in enumerate(insertion_sites):
397 if len(module): 603 if len(module):
...@@ -477,6 +683,80 @@ class BiorseoInstance: ...@@ -477,6 +683,80 @@ class BiorseoInstance:
477 raise "Unknown data type !" 683 raise "Unknown data type !"
478 return path.isfile(self.outputf + basename + extension) 684 return path.isfile(self.outputf + basename + extension)
479 685
686 + def list_jobs(self):
687 +
688 + # Read fasta file, which can contain one or several RNAs
689 + RNAcontainer = []
690 + print("loading file(s)...")
691 +
692 + db = open(self.inputfile, "r")
693 + c = 0
694 + header = ""
695 + seq = ""
696 + while True:
697 + l = db.readline()
698 + if l == "":
699 + break
700 + c += 1
701 + c = c % 2
702 + if c == 1:
703 + if header != "": # This is our second RNA in the fasta file
704 + self.mode = 1
705 + header = l[:-1]
706 + if c == 0:
707 + seq = l[:-1].upper()
708 + if is_canonical_nts(seq):
709 + header = header.replace('/', '_')
710 + RNAcontainer.append(RNA(header, seq))
711 + if not path.isfile(self.outputf + header + ".fa"):
712 + rna = open(self.outputf + header + ".fa", "w")
713 + rna.write(">" + header +'\n')
714 + rna.write(seq +'\n')
715 + rna.close()
716 + db.close()
717 +
718 + for nt, number in ignored_nt_dict.items():
719 + print("ignored %d sequences because of char %c" % (number, nt))
720 + tot = len(RNAcontainer)
721 + print("Loaded %d RNAs." % (tot))
722 +
723 + #define job list
724 + joblist = []
725 + for instance in RNAcontainer:
726 +
727 + executable = biorseoDir + "/bin/biorseo"
728 + fastafile = self.outputf+instance.header+".fa"
729 + method_type = ""
730 + ext = ".raw"
731 +
732 + if self.type == "jar3d":
733 + ext = ".jar3d"
734 + method_type = "--jar3dcsv"
735 + csv = self.outputf + instance.header + ".sites.csv"
736 +
737 + # RNAsubopt
738 + joblist.append(Job(command=["RNAsubopt", "-i", fastafile, "--outfile="+ instance.header + ".subopt"], priority=1, checkFunc=check_RNAsubopt, checkArgs=[instance.header]))
739 + joblist.append(Job(command=["mv", instance.header + ".subopt", self.outputf], priority=2, checkFunc=check_RNAsubopt, checkArgs=[instance.header]))
740 + # JAR3D
741 + joblist.append(Job(function=self.launch_JAR3D, args=[instance.seq_, instance.header], priority=3, how_many_in_parallel=1, checkFunc=check_JAR3D, checkArgs=[instance.header]))
742 +
743 + if self.type == "byp":
744 + method_type = "--bayespaircsv"
745 + if self.modules == "desc":
746 + ext = ".byp"
747 + csv = self.outputf + instance.header + ".byp.csv"
748 + joblist.append(Job(function=self.launch_BayesPairing, args=["rna3dmotif", instance.seq_, instance.header_, instance.header], how_many_in_parallel=-1, priority=1, checkFunc=check_BayesPairing, checkArgs=[instance.header]))
749 + elif self.modules == "bgsu":
750 + ext = ".bgsubyp"
751 + csv = self.outputf + instance.header + ".bgsubyp.csv"
752 + joblist.append(Job(function=self.launch_BayesPairing, args=["3dmotifatlas", instance.seq_, instance.header_, instance.header], how_many_in_parallel=-1, priority=1, checkFunc=check_BGSUBayesPairing, checkArgs=[instance.header]))
753 +
754 + command = [executable, "-s", fastafile ]
755 + if method_type:
756 + command += [ method_type, csv ]
757 + command += [ "-o", self.outputf + instance.header + ext + self.func, "--type", self.func ]
758 + joblist.append(Job(command=command, priority=4, timeout=3600, how_many_in_parallel=3))
759 +
480 760
481 if __name__ == "__main__": 761 if __name__ == "__main__":
482 BiorseoInstance(sys.argv) 762 BiorseoInstance(sys.argv)
......
...@@ -736,7 +736,7 @@ class RNA: ...@@ -736,7 +736,7 @@ class RNA:
736 m.best_pred = p 736 m.best_pred = p
737 if max(m.ninsertions) > 0 and float(n)/max(m.ninsertions) > m.ratio: 737 if max(m.ninsertions) > 0 and float(n)/max(m.ninsertions) > m.ratio:
738 m.ratio = float(n)/max(m.ninsertions) 738 m.ratio = float(n)/max(m.ninsertions)
739 - 739 +
740 def get_biokop_results(self): 740 def get_biokop_results(self):
741 if path.isfile(outputDir + self.basename + ".biok"): 741 if path.isfile(outputDir + self.basename + ".biok"):
742 rna = open(outputDir + self.basename + ".biok", "r") 742 rna = open(outputDir + self.basename + ".biok", "r")
...@@ -1005,93 +1005,93 @@ print("Loaded %d RNAs of length between 10 and 100. %d of them contain pseudokno ...@@ -1005,93 +1005,93 @@ print("Loaded %d RNAs of length between 10 and 100. %d of them contain pseudokno
1005 1005
1006 # #================= PREDICTION OF STRUCTURES =============================== 1006 # #================= PREDICTION OF STRUCTURES ===============================
1007 1007
1008 -# #define job list 1008 +#define job list
1009 -# joblist = [] 1009 +joblist = []
1010 -# for instance in RNAcontainer: 1010 +for instance in RNAcontainer:
1011 -# basename = instance.basename 1011 + basename = instance.basename
1012 -# # RNAsubopt 1012 + # RNAsubopt
1013 -# joblist.append(Job(command=["RNAsubopt", "-i", outputDir + basename + ".fa", "--outfile="+ basename + ".subopt"], priority=1, checkFunc=check_RNAsubopt, checkArgs=[basename])) 1013 + joblist.append(Job(command=["RNAsubopt", "-i", outputDir + basename + ".fa", "--outfile="+ basename + ".subopt"], priority=1, checkFunc=check_RNAsubopt, checkArgs=[basename]))
1014 -# joblist.append(Job(command=["mv", basename + ".subopt", outputDir], priority=2, checkFunc=check_RNAsubopt, checkArgs=[basename])) 1014 + joblist.append(Job(command=["mv", basename + ".subopt", outputDir], priority=2, checkFunc=check_RNAsubopt, checkArgs=[basename]))
1015 -# # JAR3D 1015 + # JAR3D
1016 -# joblist.append(Job(function=launch_JAR3D, args=[instance.seq_, basename], priority=3, how_many_in_parallel=1, checkFunc=check_JAR3D, checkArgs=[basename])) 1016 + joblist.append(Job(function=launch_JAR3D, args=[instance.seq_, basename], priority=3, how_many_in_parallel=1, checkFunc=check_JAR3D, checkArgs=[basename]))
1017 -# # BayesPairing and BGSUBayesPairing 1017 + # BayesPairing and BGSUBayesPairing
1018 -# joblist.append(Job(function=launch_BayesPairing, args=["rna3dmotif", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BayesPairing, checkArgs=[basename])) 1018 + joblist.append(Job(function=launch_BayesPairing, args=["rna3dmotif", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BayesPairing, checkArgs=[basename]))
1019 -# joblist.append(Job(function=launch_BayesPairing, args=["3dmotifatlas", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BGSUBayesPairing, checkArgs=[basename])) 1019 + joblist.append(Job(function=launch_BayesPairing, args=["3dmotifatlas", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BGSUBayesPairing, checkArgs=[basename]))
1020 -# # biorseoBGSUJAR3DA-D 1020 + # biorseoBGSUJAR3DA-D
1021 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, False])) 1021 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, False]))
1022 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, False])) 1022 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, False]))
1023 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, False])) 1023 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, False]))
1024 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, False])) 1024 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, False]))
1025 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, True])) 1025 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, True]))
1026 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, True])) 1026 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, True]))
1027 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, True])) 1027 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, True]))
1028 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, True])) 1028 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, True]))
1029 -# # biorseoBGSUBayesPairA-D 1029 + # biorseoBGSUBayesPairA-D
1030 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, False])) 1030 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, False]))
1031 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, False])) 1031 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, False]))
1032 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, False])) 1032 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, False]))
1033 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, False])) 1033 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, False]))
1034 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, True])) 1034 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, True]))
1035 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, True])) 1035 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, True]))
1036 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, True])) 1036 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, True]))
1037 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, True])) 1037 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, True]))
1038 -# # biorseoBayesPairA-D 1038 + # biorseoBayesPairA-D
1039 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, False])) 1039 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, False]))
1040 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, False])) 1040 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, False]))
1041 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, False])) 1041 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, False]))
1042 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, False])) 1042 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, False]))
1043 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, True])) 1043 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, True]))
1044 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, True])) 1044 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, True]))
1045 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, True])) 1045 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, True]))
1046 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, True])) 1046 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, True]))
1047 -# # biorseoRawA,B 1047 + # biorseoRawA,B
1048 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, False])) 1048 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, False]))
1049 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, False])) 1049 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, False]))
1050 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, True])) 1050 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, True]))
1051 -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, True])) 1051 + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, True]))
1052 -# # RNA-MoIP 1052 + # RNA-MoIP
1053 -# joblist.append(Job(function=launch_RNAMoIP, args=[instance.seq_, instance.header_, basename], priority=3, timeout=3600, checkFunc=check_RNAMoIP, checkArgs=[basename])) 1053 + joblist.append(Job(function=launch_RNAMoIP, args=[instance.seq_, instance.header_, basename], priority=3, timeout=3600, checkFunc=check_RNAMoIP, checkArgs=[basename]))
1054 -# # Biokop 1054 + # Biokop
1055 -# joblist.append(Job(command=[biorseoDir + "/../biokop/biokop", "-n1", "-i", outputDir + basename + ".fa", "-o", outputDir + basename + ".biok"], priority=5, timeout=15000, how_many_in_parallel=3, checkFunc=check_biokop, checkArgs=[basename])) 1055 + joblist.append(Job(command=[biorseoDir + "/../biokop/biokop", "-n1", "-i", outputDir + basename + ".fa", "-o", outputDir + basename + ".biok"], priority=5, timeout=15000, how_many_in_parallel=3, checkFunc=check_biokop, checkArgs=[basename]))
1056 - 1056 +
1057 - 1057 +
1058 -# # execute jobs 1058 +# execute jobs
1059 -# jobs = {} 1059 +jobs = {}
1060 -# jobcount = len(joblist) 1060 +jobcount = len(joblist)
1061 -# for job in joblist: 1061 +for job in joblist:
1062 -# if job.priority_ not in jobs.keys(): 1062 + if job.priority_ not in jobs.keys():
1063 -# jobs[job.priority_] = {} 1063 + jobs[job.priority_] = {}
1064 -# if job.nthreads not in jobs[job.priority_].keys(): 1064 + if job.nthreads not in jobs[job.priority_].keys():
1065 -# jobs[job.priority_][job.nthreads] = [] 1065 + jobs[job.priority_][job.nthreads] = []
1066 -# jobs[job.priority_][job.nthreads].append(job) 1066 + jobs[job.priority_][job.nthreads].append(job)
1067 -# nprio = max(jobs.keys()) 1067 +nprio = max(jobs.keys())
1068 - 1068 +
1069 - 1069 +
1070 -# for i in range(1,nprio+1): 1070 +for i in range(1,nprio+1):
1071 -# if not len(jobs[i].keys()): continue 1071 + if not len(jobs[i].keys()): continue
1072 - 1072 +
1073 -# # check the thread numbers 1073 + # check the thread numbers
1074 -# different_thread_numbers = [n for n in jobs[i].keys()] 1074 + different_thread_numbers = [n for n in jobs[i].keys()]
1075 -# different_thread_numbers.sort() 1075 + different_thread_numbers.sort()
1076 - 1076 +
1077 -# for n in different_thread_numbers: 1077 + for n in different_thread_numbers:
1078 -# bunch = jobs[i][n] 1078 + bunch = jobs[i][n]
1079 -# if not len(bunch): continue 1079 + if not len(bunch): continue
1080 -# pool = MyPool(processes=n) 1080 + pool = MyPool(processes=n)
1081 -# results = pool.map(execute_job, bunch) 1081 + results = pool.map(execute_job, bunch)
1082 -# pool.close() 1082 + pool.close()
1083 -# pool.join() 1083 + pool.join()
1084 - 1084 +
1085 -# if len(fails): 1085 +if len(fails):
1086 -# print() 1086 + print()
1087 -# print("Some jobs failed! :") 1087 + print("Some jobs failed! :")
1088 -# print() 1088 + print()
1089 -# for j in fails: 1089 + for j in fails:
1090 -# print(j.cmd_) 1090 + print(j.cmd_)
1091 -# else: 1091 +else:
1092 -# print() 1092 + print()
1093 -# print("Computations ran successfully.") 1093 + print("Computations ran successfully.")
1094 -# print() 1094 + print()
1095 1095
1096 1096
1097 # ================= Statistics (without pseudoknots) ======================== 1097 # ================= Statistics (without pseudoknots) ========================
...@@ -1183,43 +1183,43 @@ for instance in RNAcontainer: ...@@ -1183,43 +1183,43 @@ for instance in RNAcontainer:
1183 instance.evaluate() 1183 instance.evaluate()
1184 1184
1185 x_PK = [ 1185 x_PK = [
1186 - [ rna.biokop.max_mcc for rna in RNAcontainer if len(rna.biokop.predictions)], 1186 + [ rna.biokop.avg_mcc for rna in RNAcontainer if len(rna.biokop.predictions)],
1187 - [ rna.biokop.max_mcc for rna in RNAcontainer if len(rna.biokop.predictions)], 1187 + [ rna.biokop.avg_mcc for rna in RNAcontainer if len(rna.biokop.predictions)],
1188 - [ rna.biorseoRawA.max_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)], 1188 + [ rna.biorseoRawA.avg_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)],
1189 - [ rna.biorseoRawB.max_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)], 1189 + [ rna.biorseoRawB.avg_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)],
1190 - [ rna.biorseoBayesPairA.max_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairA.predictions)], 1190 + [ rna.biorseoBayesPairA.avg_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairA.predictions)],
1191 - [ rna.biorseoBayesPairB.max_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairB.predictions)], 1191 + [ rna.biorseoBayesPairB.avg_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairB.predictions)],
1192 - [ rna.biorseoBayesPairC.max_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairC.predictions)], 1192 + [ rna.biorseoBayesPairC.avg_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairC.predictions)],
1193 - [ rna.biorseoBayesPairD.max_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairD.predictions)], 1193 + [ rna.biorseoBayesPairD.avg_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairD.predictions)],
1194 - [ rna.biorseoBGSUJAR3DA.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DA.predictions)], 1194 + [ rna.biorseoBGSUJAR3DA.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DA.predictions)],
1195 - [ rna.biorseoBGSUJAR3DB.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DB.predictions)], 1195 + [ rna.biorseoBGSUJAR3DB.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DB.predictions)],
1196 - [ rna.biorseoBGSUJAR3DC.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DC.predictions)], 1196 + [ rna.biorseoBGSUJAR3DC.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DC.predictions)],
1197 - [ rna.biorseoBGSUJAR3DD.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DD.predictions)], 1197 + [ rna.biorseoBGSUJAR3DD.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DD.predictions)],
1198 - [ rna.biorseoBGSUBayesPairA.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairA.predictions)], 1198 + [ rna.biorseoBGSUBayesPairA.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairA.predictions)],
1199 - [ rna.biorseoBGSUBayesPairB.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairB.predictions)], 1199 + [ rna.biorseoBGSUBayesPairB.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairB.predictions)],
1200 - [ rna.biorseoBGSUBayesPairC.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairC.predictions)], 1200 + [ rna.biorseoBGSUBayesPairC.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairC.predictions)],
1201 - [ rna.biorseoBGSUBayesPairD.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairD.predictions)] 1201 + [ rna.biorseoBGSUBayesPairD.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairD.predictions)]
1202 ] 1202 ]
1203 1203
1204 RNAs_fully_predicted = [ x for x in RNAcontainer if x.has_complete_results(True)] 1204 RNAs_fully_predicted = [ x for x in RNAcontainer if x.has_complete_results(True)]
1205 1205
1206 x_PK_fully = [ 1206 x_PK_fully = [
1207 - [ rna.biokop.max_mcc for rna in RNAs_fully_predicted], 1207 + [ rna.biokop.avg_mcc for rna in RNAs_fully_predicted],
1208 - [ rna.biokop.max_mcc for rna in RNAs_fully_predicted], 1208 + [ rna.biokop.avg_mcc for rna in RNAs_fully_predicted],
1209 - [ rna.biorseoRawA.max_mcc for rna in RNAs_fully_predicted], 1209 + [ rna.biorseoRawA.avg_mcc for rna in RNAs_fully_predicted],
1210 - [ rna.biorseoRawB.max_mcc for rna in RNAs_fully_predicted], 1210 + [ rna.biorseoRawB.avg_mcc for rna in RNAs_fully_predicted],
1211 - [ rna.biorseoBayesPairA.max_mcc for rna in RNAs_fully_predicted], 1211 + [ rna.biorseoBayesPairA.avg_mcc for rna in RNAs_fully_predicted],
1212 - [ rna.biorseoBayesPairB.max_mcc for rna in RNAs_fully_predicted], 1212 + [ rna.biorseoBayesPairB.avg_mcc for rna in RNAs_fully_predicted],
1213 - [ rna.biorseoBayesPairC.max_mcc for rna in RNAs_fully_predicted], 1213 + [ rna.biorseoBayesPairC.avg_mcc for rna in RNAs_fully_predicted],
1214 - [ rna.biorseoBayesPairD.max_mcc for rna in RNAs_fully_predicted], 1214 + [ rna.biorseoBayesPairD.avg_mcc for rna in RNAs_fully_predicted],
1215 - [ rna.biorseoBGSUJAR3DA.max_mcc for rna in RNAs_fully_predicted], 1215 + [ rna.biorseoBGSUJAR3DA.avg_mcc for rna in RNAs_fully_predicted],
1216 - [ rna.biorseoBGSUJAR3DB.max_mcc for rna in RNAs_fully_predicted], 1216 + [ rna.biorseoBGSUJAR3DB.avg_mcc for rna in RNAs_fully_predicted],
1217 - [ rna.biorseoBGSUJAR3DC.max_mcc for rna in RNAs_fully_predicted], 1217 + [ rna.biorseoBGSUJAR3DC.avg_mcc for rna in RNAs_fully_predicted],
1218 - [ rna.biorseoBGSUJAR3DD.max_mcc for rna in RNAs_fully_predicted], 1218 + [ rna.biorseoBGSUJAR3DD.avg_mcc for rna in RNAs_fully_predicted],
1219 - [ rna.biorseoBGSUBayesPairA.max_mcc for rna in RNAs_fully_predicted], 1219 + [ rna.biorseoBGSUBayesPairA.avg_mcc for rna in RNAs_fully_predicted],
1220 - [ rna.biorseoBGSUBayesPairB.max_mcc for rna in RNAs_fully_predicted], 1220 + [ rna.biorseoBGSUBayesPairB.avg_mcc for rna in RNAs_fully_predicted],
1221 - [ rna.biorseoBGSUBayesPairC.max_mcc for rna in RNAs_fully_predicted], 1221 + [ rna.biorseoBGSUBayesPairC.avg_mcc for rna in RNAs_fully_predicted],
1222 - [ rna.biorseoBGSUBayesPairD.max_mcc for rna in RNAs_fully_predicted], 1222 + [ rna.biorseoBGSUBayesPairD.avg_mcc for rna in RNAs_fully_predicted],
1223 ] # We ensure having the same number of RNAs in every sample by discarding the one for which computations did not ended/succeeded. 1223 ] # We ensure having the same number of RNAs in every sample by discarding the one for which computations did not ended/succeeded.
1224 1224
1225 print() 1225 print()
...@@ -1296,43 +1296,43 @@ print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop ...@@ -1296,43 +1296,43 @@ print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop
1296 1296
1297 # ================= PLOTS OF RESULTS ======================================= 1297 # ================= PLOTS OF RESULTS =======================================
1298 1298
1299 -merge = [ x_PK_fully[0], # Biokop 1299 +merge = [ x_noPK[0], # RNA subopt
1300 - x_noPK_fully[0], # RNA subopt 1300 + x_noPK[1], # RNA-MoIP
1301 - x_noPK_fully[1], # RNA-MoIP 1301 + x_PK[0], # Biokop
1302 - x_noPK_fully[2], x_PK_fully[2], #biorseoRawA 1302 + x_PK[2], #biorseoRawA
1303 - x_noPK_fully[3], x_PK_fully[3], #biorseoRawB 1303 + x_PK[3], #biorseoRawB
1304 - x_noPK_fully[4], x_PK_fully[4], #biorseoBayesPairA 1304 + x_PK[4], #biorseoBayesPairA
1305 - x_noPK_fully[5], x_PK_fully[5], #biorseoBayesPairB 1305 + x_PK[5], #biorseoBayesPairB
1306 - x_noPK_fully[6], x_PK_fully[6], #biorseoBayesPairC 1306 + x_PK[6], #biorseoBayesPairC
1307 - x_noPK_fully[7], x_PK_fully[7], #biorseoBayesPairD 1307 + x_PK[7], #biorseoBayesPairD
1308 - x_noPK_fully[8], x_PK_fully[8], #biorseoBGSUJAR3DA 1308 + x_PK[8], #biorseoBGSUJAR3DA
1309 - x_noPK_fully[9], x_PK_fully[9], #biorseoBGSUJAR3DB 1309 + x_PK[9], #biorseoBGSUJAR3DB
1310 - x_noPK_fully[10], x_PK_fully[10], #biorseoBGSUJAR3DC 1310 + x_PK[10], #biorseoBGSUJAR3DC
1311 - x_noPK_fully[11], x_PK_fully[11], #biorseoBGSUJAR3DD 1311 + x_PK[11], #biorseoBGSUJAR3DD
1312 - x_noPK_fully[12], x_PK_fully[12], #biorseoBGSUBayesPairA 1312 + x_PK[12], #biorseoBGSUBayesPairA
1313 - x_noPK_fully[13], x_PK_fully[13], #biorseoBGSUBayesPairB 1313 + x_PK[13], #biorseoBGSUBayesPairB
1314 - x_noPK_fully[14], x_PK_fully[14], #biorseoBGSUBayesPairC 1314 + x_PK[14], #biorseoBGSUBayesPairC
1315 - x_noPK_fully[15], x_PK_fully[15], #biorseoBGSUBayesPairD 1315 + x_PK[15], #biorseoBGSUBayesPairD
1316 ] 1316 ]
1317 1317
1318 -colors = [ 'green', 'blue', 'goldenrod', 1318 +colors = [ 'blue', 'goldenrod', 'green',
1319 - 'darkturquoise', 'darkturquoise', 1319 + 'red',
1320 - 'red', 'red', 1320 + 'firebrick',
1321 - 'firebrick', 'firebrick', 1321 + 'limegreen',
1322 - 'limegreen', 'limegreen', 1322 + 'olive',
1323 - 'olive', 'olive', 1323 + 'forestgreen',
1324 - 'forestgreen', 'forestgreen', 1324 + 'lime',
1325 - 'lime', 'lime', 1325 + 'darkcyan',
1326 - 'darkcyan', 'darkcyan', 1326 + 'royalblue',
1327 - 'royalblue', 'royalblue', 1327 + 'navy',
1328 - 'navy', 'navy', 1328 + 'limegreen',
1329 - 'limegreen', 'limegreen', 1329 + 'olive',
1330 - 'olive', 'olive', 1330 + 'forestgreen',
1331 - 'forestgreen', 'forestgreen', 1331 + 'lime'
1332 - 'lime', 'lime'
1333 ] 1332 ]
1334 -labels = [ "Biokop", "RNAsubopt", 1333 +labels = [ "RNAsubopt",
1335 "RNA-MoIP", 1334 "RNA-MoIP",
1335 + "Biokop",
1336 "$f_{1A}$", 1336 "$f_{1A}$",
1337 "$f_{1B}$", 1337 "$f_{1B}$",
1338 "$f_{1A}$", 1338 "$f_{1A}$",
...@@ -1349,49 +1349,64 @@ labels = [ "Biokop", "RNAsubopt", ...@@ -1349,49 +1349,64 @@ labels = [ "Biokop", "RNAsubopt",
1349 "$f_{1D}$" 1349 "$f_{1D}$"
1350 ] 1350 ]
1351 1351
1352 -ax = plt.subplot(211) 1352 +
1353 -ax.tick_params(labelsize=12) 1353 +# for y in [ i/10 for i in range(11) ]:
1354 -for y in [ i/10 for i in range(11) ]: 1354 +# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1355 - plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1356 -colors = [ 'blue','goldenrod',
1357 - 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
1358 - 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
1359 - ]
1360 -bplot = plt.boxplot(x_noPK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
1361 -for patch, color in zip(bplot['boxes'], colors):
1362 - patch.set_facecolor(color)
1363 -# plt.axhline(y=0, color="black", linewidth=1)
1364 -# plt.axhline(y=1, color="black", linewidth=1)
1365 -plt.xticks([1.0+i for i in range(16)], labels[1:])
1366 -plt.ylim((0.5, 1.01))
1367 -plt.ylabel("MCC", fontsize=12)
1368 -plt.subplots_adjust(left=0.05, right=0.95)
1369 -# plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0]))
1370 -
1371 -
1372 -ax = plt.subplot(212)
1373 -ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False, labelsize=12)
1374 -ax.xaxis.set_label_position('top')
1375 -for y in [ i/10 for i in range(11) ]:
1376 - plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1377 -colors = [ 'green','green',
1378 - 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
1379 - 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
1380 - ]
1381 -labels = [ "Biokop"]
1382 -bplot = plt.boxplot(x_PK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
1383 -for patch, color in zip(bplot['boxes'], colors):
1384 - patch.set_facecolor(color)
1385 # plt.axhline(y=0, color="black", linewidth=1) 1355 # plt.axhline(y=0, color="black", linewidth=1)
1386 -# plt.axhline(y=1, color="black", linewidth=1) 1356 +# bplot = plt.boxplot(merge, vert=True, patch_artist=True, notch=False, whis=[3,97])
1387 -plt.xticks([1.0+i for i in range(16)], labels) 1357 +# for patch, color in zip(bplot['boxes'], colors):
1388 -plt.ylim((0.5, 1.01)) 1358 +# patch.set_facecolor(color)
1389 -plt.ylabel("MCC", fontsize=12) 1359 +# plt.xticks([1.0+i for i in range(17)], labels)
1390 -plt.subplots_adjust(left=0.05, right=0.95) 1360 +# plt.ylim((-0.1, 1.01))
1391 -# plt.text(6.2,-0.3,"Performance with pseudoknots (%d RNAs included)" % len(x_PK_fully[0]), fontsize=12) 1361 +# plt.ylabel("MCC", fontsize=12)
1362 +# plt.subplots_adjust(left=0.05, right=0.95)
1363 +# # plt.title("Performance with pseudoknotted dataset (%d RNAs from Pseudobase++)" % len(merge[0]))
1364 +# plt.show()
1365 +
1366 +# # Separating PK and non-PK
1367 +# ax = plt.subplot(211)
1368 +# ax.tick_params(labelsize=12)
1369 +# for y in [ i/10 for i in range(11) ]:
1370 +# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1371 +# colors = [ 'blue','goldenrod',
1372 +# 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
1373 +# 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
1374 +# ]
1375 +# bplot = plt.boxplot(x_noPK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
1376 +# for patch, color in zip(bplot['boxes'], colors):
1377 +# patch.set_facecolor(color)
1378 +# # plt.axhline(y=0, color="black", linewidth=1)
1379 +# # plt.axhline(y=1, color="black", linewidth=1)
1380 +# plt.xticks([1.0+i for i in range(16)], labels[1:])
1381 +# plt.ylim((0.5, 1.01))
1382 +# plt.ylabel("MCC", fontsize=12)
1383 +# plt.subplots_adjust(left=0.05, right=0.95)
1384 +# # plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0]))
1385 +
1386 +
1387 +# ax = plt.subplot(212)
1388 +# ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False, labelsize=12)
1389 +# ax.xaxis.set_label_position('top')
1390 +# for y in [ i/10 for i in range(11) ]:
1391 +# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1392 +# colors = [ 'green','green',
1393 +# 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
1394 +# 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
1395 +# ]
1396 +# labels = [ "Biokop"]
1397 +# bplot = plt.boxplot(x_PK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
1398 +# for patch, color in zip(bplot['boxes'], colors):
1399 +# patch.set_facecolor(color)
1400 +# # plt.axhline(y=0, color="black", linewidth=1)
1401 +# # plt.axhline(y=1, color="black", linewidth=1)
1402 +# plt.xticks([1.0+i for i in range(16)], labels)
1403 +# plt.ylim((0.4, 1.01))
1404 +# plt.ylabel("MCC", fontsize=12)
1405 +# plt.subplots_adjust(left=0.05, right=0.95)
1406 +# # plt.text(6.2,-0.3,"Performance with pseudoknots (%d RNAs included)" % len(x_PK_fully[0]), fontsize=12)
1392 1407
1393 1408
1394 -plt.show() 1409 +# plt.show()
1395 1410
1396 1411
1397 # # ================== MCC performance ==================================== 1412 # # ================== MCC performance ====================================
...@@ -1536,82 +1551,6 @@ plt.show() ...@@ -1536,82 +1551,6 @@ plt.show()
1536 # plt.show() 1551 # plt.show()
1537 1552
1538 1553
1539 -# # MCC boost compared to RNA subopt
1540 -# plt.subplot(143)
1541 -# x = [
1542 -# [ rna.rnamoip.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.rnamoip.predictions)],
1543 -# [ rna.biorseoRawA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)],
1544 -# [ rna.biorseoRawB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)],
1545 -# [ rna.biokop.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biokop.predictions)],
1546 -#]
1547 -# colors = ['xkcd:goldenrod', 'xkcd:red', 'firebrick', 'limegreen']
1548 -# labels = ["$\Delta$MCC(RNAsubopt,RNA-MoIP)","$\Delta$MCC(RNAsubopt,RNA MoBOIP)",
1549 -# "$\Delta$MCC(RNAsubopt,RNA MoBOIP++)","$\Delta$MCC(RNAsubopt,Biokop)"]
1550 -# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97])
1551 -# for patch, color in zip(bplot['boxes'], colors):
1552 -# patch.set_facecolor(color)
1553 -# plt.axvline(x=0, color="black", linewidth=1)
1554 -# plt.yticks([1.0+i for i in range(4)], labels)
1555 -# plt.xlim((-1.1, 1.1))
1556 -# plt.xlabel("Improvement in MCC")
1557 -# plt.title("MCC performance relatively to RNAsubopt")
1558 -# plt.show()
1559 -
1560 -
1561 -# plt.subplot(222)
1562 -# x = [
1563 -# [ rna.biorseoBGSUBayesPairA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairA.predictions)],
1564 -# [ rna.biorseoBGSUBayesPairB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairB.predictions)],
1565 -# [ rna.biorseoBGSUBayesPairC.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairC.predictions)],
1566 -# [ rna.biorseoBGSUBayesPairD.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairD.predictions)],
1567 -#]
1568 -# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97])
1569 -# for patch, color in zip(bplot['boxes'], colors):
1570 -# patch.set_facecolor(color)
1571 -# plt.axvline(x=0, color="black", linewidth=1)
1572 -# plt.yticks([1.0+i for i in range(4)], labels)
1573 -# plt.xlim((-1.1, 1.1))
1574 -# # plt.xlabel("Improvement in MCC")
1575 -# plt.title("(B) The RNA Motif Atlas 3.2 + BayesPairing")
1576 -
1577 -
1578 -# plt.subplot(223)
1579 -# x = [
1580 -# [ rna.biorseoRawA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)],
1581 -# [ rna.biorseoRawB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)],
1582 -#]
1583 -# colors = ['red', 'firebrick']
1584 -# labels = ["$f_{1A}$", "$f_{1B}$"]
1585 -# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97])
1586 -# for patch, color in zip(bplot['boxes'], colors):
1587 -# patch.set_facecolor(color)
1588 -# plt.axvline(x=0, color="black", linewidth=1)
1589 -# plt.yticks([1.0+i for i in range(2)], labels)
1590 -# plt.xlabel("Improvement in MCC")
1591 -# plt.xlim((-1.1, 1.1))
1592 -# plt.title("(C) Rna3Dmotifs + Simple pattern matching")
1593 -
1594 -
1595 -# plt.subplot(224)
1596 -# x = [
1597 -# [ rna.biorseoBGSUJAR3DA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DA.predictions)],
1598 -# [ rna.biorseoBGSUJAR3DB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DB.predictions)],
1599 -# [ rna.biorseoBGSUJAR3DC.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DC.predictions)],
1600 -# [ rna.biorseoBGSUJAR3DD.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DD.predictions)],
1601 -#]
1602 -# colors = ['darkturquoise', 'darkcyan', 'royalblue', 'navy']
1603 -# labels = ["$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$"]
1604 -# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97])
1605 -# for patch, color in zip(bplot['boxes'], colors):
1606 -# patch.set_facecolor(color)
1607 -# plt.axvline(x=0, color="black", linewidth=1)
1608 -# plt.yticks([1.0+i for i in range(4)], labels)
1609 -# plt.xlabel("Improvement in MCC")
1610 -# plt.xlim((-1.1, 1.1))
1611 -# plt.title("(D) The RNA Motif Atlas 3.2 + JAR3D")
1612 -# plt.show()
1613 -
1614 -
1615 # # insertion ratio of the best structure 1554 # # insertion ratio of the best structure
1616 # plt.subplot(221) 1555 # plt.subplot(221)
1617 # x = [ 1556 # x = [
......