Showing
4 changed files
with
499 additions
and
281 deletions
... | @@ -21,7 +21,8 @@ bypdir = "" | ... | @@ -21,7 +21,8 @@ bypdir = "" |
21 | biorseoDir = "." | 21 | biorseoDir = "." |
22 | exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec')) | 22 | exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec')) |
23 | runDir = path.dirname(path.realpath(__file__)) | 23 | runDir = path.dirname(path.realpath(__file__)) |
24 | -outputDir = biorseoDir + "/results/" | 24 | +self.outputf = biorseoDir + "/results/" |
25 | +tempDir = biorseoDir + "/temp/" | ||
25 | HLmotifDir = biorseoDir + "/data/modules/BGSU/HL/3.2/lib" | 26 | HLmotifDir = biorseoDir + "/data/modules/BGSU/HL/3.2/lib" |
26 | ILmotifDir = biorseoDir + "/data/modules/BGSU/IL/3.2/lib" | 27 | ILmotifDir = biorseoDir + "/data/modules/BGSU/IL/3.2/lib" |
27 | descfolder = biorseoDir + "/data/modules/DESC" | 28 | descfolder = biorseoDir + "/data/modules/DESC" |
... | @@ -29,6 +30,17 @@ descfolder = biorseoDir + "/data/modules/DESC" | ... | @@ -29,6 +30,17 @@ descfolder = biorseoDir + "/data/modules/DESC" |
29 | 30 | ||
30 | # ================== CLASSES AND FUNCTIONS ================================ | 31 | # ================== CLASSES AND FUNCTIONS ================================ |
31 | 32 | ||
33 | +ignored_nt_dict = {} | ||
34 | +def is_canonical_nts(seq): | ||
35 | + for c in seq[:-1]: | ||
36 | + if c not in "ACGU": | ||
37 | + if c in ignored_nt_dict.keys(): | ||
38 | + ignored_nt_dict[c] += 1 | ||
39 | + else: | ||
40 | + ignored_nt_dict[c] = 1 | ||
41 | + return False | ||
42 | + return True | ||
43 | + | ||
32 | 44 | ||
33 | class NoDaemonProcess(multiprocessing.Process): | 45 | class NoDaemonProcess(multiprocessing.Process): |
34 | @property | 46 | @property |
... | @@ -107,19 +119,213 @@ class Job: | ... | @@ -107,19 +119,213 @@ class Job: |
107 | self.nthreads = how_many_in_parallel | 119 | self.nthreads = how_many_in_parallel |
108 | 120 | ||
109 | 121 | ||
122 | +class RNA: | ||
123 | + def __init__(self, header, seq): | ||
124 | + self.seq_ = seq | ||
125 | + self.header_ = header | ||
126 | + self.length = len(seq) | ||
127 | + | ||
128 | + self.rnasubopt = [] | ||
129 | + self.biorseoRawA = [] | ||
130 | + self.biorseoRawB = [] | ||
131 | + self.biorseoBGSUJAR3DA = [] | ||
132 | + self.biorseoBGSUJAR3DC = [] | ||
133 | + self.biorseoBGSUJAR3DD = [] | ||
134 | + self.biorseoBGSUJAR3DB = [] | ||
135 | + self.biorseoBayesPairA = [] | ||
136 | + self.biorseoBayesPairC = [] | ||
137 | + self.biorseoBayesPairD = [] | ||
138 | + self.biorseoBayesPairB = [] | ||
139 | + self.biorseoBGSUBayesPairA = [] | ||
140 | + self.biorseoBGSUBayesPairC = [] | ||
141 | + self.biorseoBGSUBayesPairD = [] | ||
142 | + self.biorseoBGSUBayesPairB = [] | ||
143 | + | ||
144 | + def get_RNAsubopt_results(self): | ||
145 | + rna = open(self.outputf + self.basename + ".subopt", "r") | ||
146 | + lines = rna.readlines() | ||
147 | + rna.close() | ||
148 | + for i in range(2, len(lines)): | ||
149 | + ss = lines[i].split(' ')[0] | ||
150 | + if ss not in self.rnasubopt.predictions: | ||
151 | + self.rnasubopt.predictions.append(ss) | ||
152 | + | ||
153 | + def get_biorseoBayesPairA_results(self, targetdir): | ||
154 | + if path.isfile(targetdir+ self.basename + ".bypA"): | ||
155 | + rna = open(targetdir+ self.basename + ".bypA", "r") | ||
156 | + lines = rna.readlines() | ||
157 | + rna.close() | ||
158 | + for i in range(2, len(lines)): | ||
159 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
160 | + if ss not in self.biorseoBayesPairA.predictions: | ||
161 | + self.biorseoBayesPairA.predictions.append(ss) | ||
162 | + self.biorseoBayesPairA.ninsertions.append(lines[i].count('+')) | ||
163 | + | ||
164 | + def get_biorseoBayesPairB_results(self, targetdir): | ||
165 | + if path.isfile(targetdir+ self.basename + ".bypB"): | ||
166 | + rna = open(targetdir+ self.basename + ".bypB", "r") | ||
167 | + lines = rna.readlines() | ||
168 | + rna.close() | ||
169 | + for i in range(2, len(lines)): | ||
170 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
171 | + if ss not in self.biorseoBayesPairB.predictions: | ||
172 | + self.biorseoBayesPairB.predictions.append(ss) | ||
173 | + self.biorseoBayesPairB.ninsertions.append(lines[i].count('+')) | ||
174 | + | ||
175 | + def get_biorseoBayesPairC_results(self, targetdir): | ||
176 | + if path.isfile(targetdir+ self.basename + ".bypC"): | ||
177 | + rna = open(targetdir+ self.basename + ".bypC", "r") | ||
178 | + lines = rna.readlines() | ||
179 | + rna.close() | ||
180 | + for i in range(2, len(lines)): | ||
181 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
182 | + if ss not in self.biorseoBayesPairC.predictions: | ||
183 | + self.biorseoBayesPairC.predictions.append(ss) | ||
184 | + self.biorseoBayesPairC.ninsertions.append(lines[i].count('+')) | ||
185 | + | ||
186 | + def get_biorseoBayesPairD_results(self, targetdir): | ||
187 | + if path.isfile(targetdir+ self.basename + ".bypD"): | ||
188 | + rna = open(targetdir+ self.basename + ".bypD", "r") | ||
189 | + lines = rna.readlines() | ||
190 | + rna.close() | ||
191 | + for i in range(2, len(lines)): | ||
192 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
193 | + if ss not in self.biorseoBayesPairD.predictions: | ||
194 | + self.biorseoBayesPairD.predictions.append(ss) | ||
195 | + self.biorseoBayesPairD.ninsertions.append(lines[i].count('+')) | ||
196 | + | ||
197 | + def get_biorseoRawA_results(self, targetdir): | ||
198 | + if path.isfile(targetdir+ self.basename + ".rawA"): | ||
199 | + rna = open(targetdir+ self.basename + ".rawA", "r") | ||
200 | + lines = rna.readlines() | ||
201 | + rna.close() | ||
202 | + for i in range(2, len(lines)): | ||
203 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
204 | + if ss not in self.biorseoRawA.predictions: | ||
205 | + self.biorseoRawA.predictions.append(ss) | ||
206 | + self.biorseoRawA.ninsertions.append(lines[i].count('+')) | ||
207 | + | ||
208 | + def get_biorseoRawB_results(self, targetdir): | ||
209 | + if path.isfile(targetdir+ self.basename + ".rawB"): | ||
210 | + rna = open(targetdir+ self.basename + ".rawB", "r") | ||
211 | + lines = rna.readlines() | ||
212 | + rna.close() | ||
213 | + for i in range(2, len(lines)): | ||
214 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
215 | + if ss not in self.biorseoRawB.predictions: | ||
216 | + self.biorseoRawB.predictions.append(ss) | ||
217 | + self.biorseoRawB.ninsertions.append(lines[i].count('+')) | ||
218 | + | ||
219 | + def get_biorseoBGSUJAR3DA_results(self, targetdir): | ||
220 | + if path.isfile(targetdir+ self.basename + ".jar3dA"): | ||
221 | + rna = open(targetdir+ self.basename + ".jar3dA", "r") | ||
222 | + lines = rna.readlines() | ||
223 | + rna.close() | ||
224 | + for i in range(2, len(lines)): | ||
225 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
226 | + if ss not in self.biorseoBGSUJAR3DA.predictions: | ||
227 | + self.biorseoBGSUJAR3DA.predictions.append(ss) | ||
228 | + self.biorseoBGSUJAR3DA.ninsertions.append(lines[i].count('+')) | ||
229 | + | ||
230 | + def get_biorseoBGSUJAR3DB_results(self, targetdir): | ||
231 | + if path.isfile(targetdir+ self.basename + ".jar3dB"): | ||
232 | + rna = open(targetdir+ self.basename + ".jar3dB", "r") | ||
233 | + lines = rna.readlines() | ||
234 | + rna.close() | ||
235 | + for i in range(2, len(lines)): | ||
236 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
237 | + if ss not in self.biorseoBGSUJAR3DB.predictions: | ||
238 | + self.biorseoBGSUJAR3DB.predictions.append(ss) | ||
239 | + self.biorseoBGSUJAR3DB.ninsertions.append(lines[i].count('+')) | ||
240 | + | ||
241 | + def get_biorseoBGSUJAR3DC_results(self, targetdir): | ||
242 | + if path.isfile(targetdir+ self.basename + ".jar3dC"): | ||
243 | + rna = open(targetdir+ self.basename + ".jar3dC", "r") | ||
244 | + lines = rna.readlines() | ||
245 | + rna.close() | ||
246 | + for i in range(2, len(lines)): | ||
247 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
248 | + if ss not in self.biorseoBGSUJAR3DC.predictions: | ||
249 | + self.biorseoBGSUJAR3DC.predictions.append(ss) | ||
250 | + self.biorseoBGSUJAR3DC.ninsertions.append(lines[i].count('+')) | ||
251 | + | ||
252 | + def get_biorseoBGSUJAR3DD_results(self, targetdir): | ||
253 | + if path.isfile(targetdir+ self.basename + ".jar3dD"): | ||
254 | + rna = open(targetdir+ self.basename + ".jar3dD", "r") | ||
255 | + lines = rna.readlines() | ||
256 | + rna.close() | ||
257 | + for i in range(2, len(lines)): | ||
258 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
259 | + if ss not in self.biorseoBGSUJAR3DD.predictions: | ||
260 | + self.biorseoBGSUJAR3DD.predictions.append(ss) | ||
261 | + self.biorseoBGSUJAR3DD.ninsertions.append(lines[i].count('+')) | ||
262 | + | ||
263 | + def get_biorseoBGSUBayesPairA_results(self, targetdir): | ||
264 | + if path.isfile(targetdir+ self.basename + ".bgsubypA"): | ||
265 | + rna = open(targetdir+ self.basename + ".bgsubypA", "r") | ||
266 | + lines = rna.readlines() | ||
267 | + rna.close() | ||
268 | + for i in range(2, len(lines)): | ||
269 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
270 | + if ss not in self.biorseoBGSUBayesPairA.predictions: | ||
271 | + self.biorseoBGSUBayesPairA.predictions.append(ss) | ||
272 | + self.biorseoBGSUBayesPairA.ninsertions.append(lines[i].count('+')) | ||
273 | + # else: | ||
274 | + # print(targetdir+ self.basename + ".bgsubypA not found !") | ||
275 | + | ||
276 | + def get_biorseoBGSUBayesPairB_results(self, targetdir): | ||
277 | + if path.isfile(targetdir+ self.basename + ".bgsubypB"): | ||
278 | + rna = open(targetdir+ self.basename + ".bgsubypB", "r") | ||
279 | + lines = rna.readlines() | ||
280 | + rna.close() | ||
281 | + for i in range(2, len(lines)): | ||
282 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
283 | + if ss not in self.biorseoBGSUBayesPairB.predictions: | ||
284 | + self.biorseoBGSUBayesPairB.predictions.append(ss) | ||
285 | + self.biorseoBGSUBayesPairB.ninsertions.append(lines[i].count('+')) | ||
286 | + # else: | ||
287 | + # print(targetdir+ self.basename + ".bgsubypB not found !") | ||
288 | + | ||
289 | + def get_biorseoBGSUBayesPairC_results(self, targetdir): | ||
290 | + if path.isfile(targetdir+ self.basename + ".bgsubypC"): | ||
291 | + rna = open(targetdir+ self.basename + ".bgsubypC", "r") | ||
292 | + lines = rna.readlines() | ||
293 | + rna.close() | ||
294 | + for i in range(2, len(lines)): | ||
295 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
296 | + if ss not in self.biorseoBGSUBayesPairC.predictions: | ||
297 | + self.biorseoBGSUBayesPairC.predictions.append(ss) | ||
298 | + self.biorseoBGSUBayesPairC.ninsertions.append(lines[i].count('+')) | ||
299 | + # else: | ||
300 | + # print(targetdir+ self.basename + ".bgsubypC not found !") | ||
301 | + | ||
302 | + def get_biorseoBGSUBayesPairD_results(self, targetdir): | ||
303 | + if path.isfile(targetdir+ self.basename + ".bgsubypD"): | ||
304 | + rna = open(targetdir+ self.basename + ".bgsubypD", "r") | ||
305 | + lines = rna.readlines() | ||
306 | + rna.close() | ||
307 | + for i in range(2, len(lines)): | ||
308 | + ss = lines[i].split(' ')[0].split('\t')[0] | ||
309 | + if ss not in self.biorseoBGSUBayesPairD.predictions: | ||
310 | + self.biorseoBGSUBayesPairD.predictions.append(ss) | ||
311 | + self.biorseoBGSUBayesPairD.ninsertions.append(lines[i].count('+')) | ||
312 | + # else: | ||
313 | + # print(targetdir+ self.basename + ".bgsubypD not found !") | ||
314 | + | ||
315 | + | ||
110 | class BiorseoInstance: | 316 | class BiorseoInstance: |
111 | def __init__(self, argv): | 317 | def __init__(self, argv): |
112 | # set default options | 318 | # set default options |
113 | self.type = "dpm" | 319 | self.type = "dpm" |
114 | self.modules = "desc" | 320 | self.modules = "desc" |
115 | self.func = 'B' | 321 | self.func = 'B' |
116 | - self.outputf = outputDir | 322 | + self.outputf = self.outputf |
117 | self.jobcount = 0 | 323 | self.jobcount = 0 |
118 | 324 | ||
119 | # Parse options | 325 | # Parse options |
120 | try: | 326 | try: |
121 | opts, args = getopt.getopt( | 327 | opts, args = getopt.getopt( |
122 | - argv, "hil::o:", ["type=", "func=", "modules="]) | 328 | + argv, "hi:o:", ["type=", "func=", "modules="]) |
123 | except getopt.GetoptError: | 329 | except getopt.GetoptError: |
124 | print("Please provide arguments !") | 330 | print("Please provide arguments !") |
125 | sys.exit(2) | 331 | sys.exit(2) |
... | @@ -130,9 +336,6 @@ class BiorseoInstance: | ... | @@ -130,9 +336,6 @@ class BiorseoInstance: |
130 | elif opt == "-i": | 336 | elif opt == "-i": |
131 | self.inputfile = arg | 337 | self.inputfile = arg |
132 | self.mode = 0 # single sequence mode | 338 | self.mode = 0 # single sequence mode |
133 | - elif opt == "-l": | ||
134 | - self.inputfile = arg | ||
135 | - self.mode = 1 # batch mode | ||
136 | elif opt == "-o": | 339 | elif opt == "-o": |
137 | self.outputf = arg # output file or folder... | 340 | self.outputf = arg # output file or folder... |
138 | elif opt == "--func": | 341 | elif opt == "--func": |
... | @@ -153,6 +356,9 @@ class BiorseoInstance: | ... | @@ -153,6 +356,9 @@ class BiorseoInstance: |
153 | else: | 356 | else: |
154 | raise "Unknown option " + opt | 357 | raise "Unknown option " + opt |
155 | 358 | ||
359 | + # create jobs | ||
360 | + self.list_jobs() | ||
361 | + | ||
156 | if self.mode: | 362 | if self.mode: |
157 | # Create a job manager | 363 | # Create a job manager |
158 | self.manager = Manager() | 364 | self.manager = Manager() |
... | @@ -321,7 +527,7 @@ class BiorseoInstance: | ... | @@ -321,7 +527,7 @@ class BiorseoInstance: |
321 | def launch_JAR3D(self, seq_, basename): | 527 | def launch_JAR3D(self, seq_, basename): |
322 | rnasubopt_preds = [] | 528 | rnasubopt_preds = [] |
323 | # Extracting probable loops from RNA-subopt structures | 529 | # Extracting probable loops from RNA-subopt structures |
324 | - rna = open(outputDir + basename + ".subopt", "r") | 530 | + rna = open(self.outputf + basename + ".subopt", "r") |
325 | lines = rna.readlines() | 531 | lines = rna.readlines() |
326 | rna.close() | 532 | rna.close() |
327 | for i in range(2, len(lines)): | 533 | for i in range(2, len(lines)): |
... | @@ -352,7 +558,7 @@ class BiorseoInstance: | ... | @@ -352,7 +558,7 @@ class BiorseoInstance: |
352 | insertion_sites.sort(reverse=True) | 558 | insertion_sites.sort(reverse=True) |
353 | # Writing results to CSV file | 559 | # Writing results to CSV file |
354 | c = 0 | 560 | c = 0 |
355 | - resultsfile = open(outputDir+basename+".sites.csv", "w") | 561 | + resultsfile = open(self.outputf+basename+".sites.csv", "w") |
356 | resultsfile.write("Motif,Rotation,Score,Start1,End1,Start2,End2\n") | 562 | resultsfile.write("Motif,Rotation,Score,Start1,End1,Start2,End2\n") |
357 | for site in insertion_sites: | 563 | for site in insertion_sites: |
358 | if site.score > 10: | 564 | if site.score > 10: |
... | @@ -372,7 +578,7 @@ class BiorseoInstance: | ... | @@ -372,7 +578,7 @@ class BiorseoInstance: |
372 | def launch_BayesPairing(self, module_type, seq_, header_, basename): | 578 | def launch_BayesPairing(self, module_type, seq_, header_, basename): |
373 | chdir(bypdir) | 579 | chdir(bypdir) |
374 | 580 | ||
375 | - cmd = ["python3", "parse_sequences.py", "-seq", outputDir + | 581 | + cmd = ["python3", "parse_sequences.py", "-seq", self.outputf + |
376 | basename + ".fa", "-d", module_type, "-interm", "1"] | 582 | basename + ".fa", "-d", module_type, "-interm", "1"] |
377 | 583 | ||
378 | logfile = open("log_of_the_run.sh", 'a') | 584 | logfile = open("log_of_the_run.sh", 'a') |
... | @@ -389,9 +595,9 @@ class BiorseoInstance: | ... | @@ -389,9 +595,9 @@ class BiorseoInstance: |
389 | l = BypLog[idx] | 595 | l = BypLog[idx] |
390 | insertion_sites = [x for x in ast.literal_eval(l.split(":")[1][1:])] | 596 | insertion_sites = [x for x in ast.literal_eval(l.split(":")[1][1:])] |
391 | if module_type == "rna3dmotif": | 597 | if module_type == "rna3dmotif": |
392 | - rna = open(outputDir + basename + ".byp.csv", "w") | 598 | + rna = open(self.outputf + basename + ".byp.csv", "w") |
393 | else: | 599 | else: |
394 | - rna = open(outputDir + basename + ".bgsubyp.csv", "w") | 600 | + rna = open(self.outputf + basename + ".bgsubyp.csv", "w") |
395 | rna.write("Motif,Score,Start1,End1,Start2,End2...\n") | 601 | rna.write("Motif,Score,Start1,End1,Start2,End2...\n") |
396 | for i, module in enumerate(insertion_sites): | 602 | for i, module in enumerate(insertion_sites): |
397 | if len(module): | 603 | if len(module): |
... | @@ -477,6 +683,80 @@ class BiorseoInstance: | ... | @@ -477,6 +683,80 @@ class BiorseoInstance: |
477 | raise "Unknown data type !" | 683 | raise "Unknown data type !" |
478 | return path.isfile(self.outputf + basename + extension) | 684 | return path.isfile(self.outputf + basename + extension) |
479 | 685 | ||
686 | + def list_jobs(self): | ||
687 | + | ||
688 | + # Read fasta file, which can contain one or several RNAs | ||
689 | + RNAcontainer = [] | ||
690 | + print("loading file(s)...") | ||
691 | + | ||
692 | + db = open(self.inputfile, "r") | ||
693 | + c = 0 | ||
694 | + header = "" | ||
695 | + seq = "" | ||
696 | + while True: | ||
697 | + l = db.readline() | ||
698 | + if l == "": | ||
699 | + break | ||
700 | + c += 1 | ||
701 | + c = c % 2 | ||
702 | + if c == 1: | ||
703 | + if header != "": # This is our second RNA in the fasta file | ||
704 | + self.mode = 1 | ||
705 | + header = l[:-1] | ||
706 | + if c == 0: | ||
707 | + seq = l[:-1].upper() | ||
708 | + if is_canonical_nts(seq): | ||
709 | + header = header.replace('/', '_') | ||
710 | + RNAcontainer.append(RNA(header, seq)) | ||
711 | + if not path.isfile(self.outputf + header + ".fa"): | ||
712 | + rna = open(self.outputf + header + ".fa", "w") | ||
713 | + rna.write(">" + header +'\n') | ||
714 | + rna.write(seq +'\n') | ||
715 | + rna.close() | ||
716 | + db.close() | ||
717 | + | ||
718 | + for nt, number in ignored_nt_dict.items(): | ||
719 | + print("ignored %d sequences because of char %c" % (number, nt)) | ||
720 | + tot = len(RNAcontainer) | ||
721 | + print("Loaded %d RNAs." % (tot)) | ||
722 | + | ||
723 | + #define job list | ||
724 | + joblist = [] | ||
725 | + for instance in RNAcontainer: | ||
726 | + | ||
727 | + executable = biorseoDir + "/bin/biorseo" | ||
728 | + fastafile = self.outputf+instance.header+".fa" | ||
729 | + method_type = "" | ||
730 | + ext = ".raw" | ||
731 | + | ||
732 | + if self.type == "jar3d": | ||
733 | + ext = ".jar3d" | ||
734 | + method_type = "--jar3dcsv" | ||
735 | + csv = self.outputf + instance.header + ".sites.csv" | ||
736 | + | ||
737 | + # RNAsubopt | ||
738 | + joblist.append(Job(command=["RNAsubopt", "-i", fastafile, "--outfile="+ instance.header + ".subopt"], priority=1, checkFunc=check_RNAsubopt, checkArgs=[instance.header])) | ||
739 | + joblist.append(Job(command=["mv", instance.header + ".subopt", self.outputf], priority=2, checkFunc=check_RNAsubopt, checkArgs=[instance.header])) | ||
740 | + # JAR3D | ||
741 | + joblist.append(Job(function=self.launch_JAR3D, args=[instance.seq_, instance.header], priority=3, how_many_in_parallel=1, checkFunc=check_JAR3D, checkArgs=[instance.header])) | ||
742 | + | ||
743 | + if self.type == "byp": | ||
744 | + method_type = "--bayespaircsv" | ||
745 | + if self.modules == "desc": | ||
746 | + ext = ".byp" | ||
747 | + csv = self.outputf + instance.header + ".byp.csv" | ||
748 | + joblist.append(Job(function=self.launch_BayesPairing, args=["rna3dmotif", instance.seq_, instance.header_, instance.header], how_many_in_parallel=-1, priority=1, checkFunc=check_BayesPairing, checkArgs=[instance.header])) | ||
749 | + elif self.modules == "bgsu": | ||
750 | + ext = ".bgsubyp" | ||
751 | + csv = self.outputf + instance.header + ".bgsubyp.csv" | ||
752 | + joblist.append(Job(function=self.launch_BayesPairing, args=["3dmotifatlas", instance.seq_, instance.header_, instance.header], how_many_in_parallel=-1, priority=1, checkFunc=check_BGSUBayesPairing, checkArgs=[instance.header])) | ||
753 | + | ||
754 | + command = [executable, "-s", fastafile ] | ||
755 | + if method_type: | ||
756 | + command += [ method_type, csv ] | ||
757 | + command += [ "-o", self.outputf + instance.header + ext + self.func, "--type", self.func ] | ||
758 | + joblist.append(Job(command=command, priority=4, timeout=3600, how_many_in_parallel=3)) | ||
759 | + | ||
480 | 760 | ||
481 | if __name__ == "__main__": | 761 | if __name__ == "__main__": |
482 | BiorseoInstance(sys.argv) | 762 | BiorseoInstance(sys.argv) | ... | ... |
doc/fig/pseudobase.jpg
0 → 100644
102 KB
... | @@ -736,7 +736,7 @@ class RNA: | ... | @@ -736,7 +736,7 @@ class RNA: |
736 | m.best_pred = p | 736 | m.best_pred = p |
737 | if max(m.ninsertions) > 0 and float(n)/max(m.ninsertions) > m.ratio: | 737 | if max(m.ninsertions) > 0 and float(n)/max(m.ninsertions) > m.ratio: |
738 | m.ratio = float(n)/max(m.ninsertions) | 738 | m.ratio = float(n)/max(m.ninsertions) |
739 | - | 739 | + |
740 | def get_biokop_results(self): | 740 | def get_biokop_results(self): |
741 | if path.isfile(outputDir + self.basename + ".biok"): | 741 | if path.isfile(outputDir + self.basename + ".biok"): |
742 | rna = open(outputDir + self.basename + ".biok", "r") | 742 | rna = open(outputDir + self.basename + ".biok", "r") |
... | @@ -1005,93 +1005,93 @@ print("Loaded %d RNAs of length between 10 and 100. %d of them contain pseudokno | ... | @@ -1005,93 +1005,93 @@ print("Loaded %d RNAs of length between 10 and 100. %d of them contain pseudokno |
1005 | 1005 | ||
1006 | # #================= PREDICTION OF STRUCTURES =============================== | 1006 | # #================= PREDICTION OF STRUCTURES =============================== |
1007 | 1007 | ||
1008 | -# #define job list | 1008 | +#define job list |
1009 | -# joblist = [] | 1009 | +joblist = [] |
1010 | -# for instance in RNAcontainer: | 1010 | +for instance in RNAcontainer: |
1011 | -# basename = instance.basename | 1011 | + basename = instance.basename |
1012 | -# # RNAsubopt | 1012 | + # RNAsubopt |
1013 | -# joblist.append(Job(command=["RNAsubopt", "-i", outputDir + basename + ".fa", "--outfile="+ basename + ".subopt"], priority=1, checkFunc=check_RNAsubopt, checkArgs=[basename])) | 1013 | + joblist.append(Job(command=["RNAsubopt", "-i", outputDir + basename + ".fa", "--outfile="+ basename + ".subopt"], priority=1, checkFunc=check_RNAsubopt, checkArgs=[basename])) |
1014 | -# joblist.append(Job(command=["mv", basename + ".subopt", outputDir], priority=2, checkFunc=check_RNAsubopt, checkArgs=[basename])) | 1014 | + joblist.append(Job(command=["mv", basename + ".subopt", outputDir], priority=2, checkFunc=check_RNAsubopt, checkArgs=[basename])) |
1015 | -# # JAR3D | 1015 | + # JAR3D |
1016 | -# joblist.append(Job(function=launch_JAR3D, args=[instance.seq_, basename], priority=3, how_many_in_parallel=1, checkFunc=check_JAR3D, checkArgs=[basename])) | 1016 | + joblist.append(Job(function=launch_JAR3D, args=[instance.seq_, basename], priority=3, how_many_in_parallel=1, checkFunc=check_JAR3D, checkArgs=[basename])) |
1017 | -# # BayesPairing and BGSUBayesPairing | 1017 | + # BayesPairing and BGSUBayesPairing |
1018 | -# joblist.append(Job(function=launch_BayesPairing, args=["rna3dmotif", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BayesPairing, checkArgs=[basename])) | 1018 | + joblist.append(Job(function=launch_BayesPairing, args=["rna3dmotif", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BayesPairing, checkArgs=[basename])) |
1019 | -# joblist.append(Job(function=launch_BayesPairing, args=["3dmotifatlas", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BGSUBayesPairing, checkArgs=[basename])) | 1019 | + joblist.append(Job(function=launch_BayesPairing, args=["3dmotifatlas", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BGSUBayesPairing, checkArgs=[basename])) |
1020 | -# # biorseoBGSUJAR3DA-D | 1020 | + # biorseoBGSUJAR3DA-D |
1021 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, False])) | 1021 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, False])) |
1022 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, False])) | 1022 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, False])) |
1023 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, False])) | 1023 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, False])) |
1024 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, False])) | 1024 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, False])) |
1025 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, True])) | 1025 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, True])) |
1026 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, True])) | 1026 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, True])) |
1027 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, True])) | 1027 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, True])) |
1028 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, True])) | 1028 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, True])) |
1029 | -# # biorseoBGSUBayesPairA-D | 1029 | + # biorseoBGSUBayesPairA-D |
1030 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, False])) | 1030 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, False])) |
1031 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, False])) | 1031 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, False])) |
1032 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, False])) | 1032 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, False])) |
1033 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, False])) | 1033 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, False])) |
1034 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, True])) | 1034 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, True])) |
1035 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, True])) | 1035 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, True])) |
1036 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, True])) | 1036 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, True])) |
1037 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, True])) | 1037 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, True])) |
1038 | -# # biorseoBayesPairA-D | 1038 | + # biorseoBayesPairA-D |
1039 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, False])) | 1039 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, False])) |
1040 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, False])) | 1040 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, False])) |
1041 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, False])) | 1041 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, False])) |
1042 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, False])) | 1042 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, False])) |
1043 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, True])) | 1043 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, True])) |
1044 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, True])) | 1044 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, True])) |
1045 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, True])) | 1045 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, True])) |
1046 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, True])) | 1046 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, True])) |
1047 | -# # biorseoRawA,B | 1047 | + # biorseoRawA,B |
1048 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, False])) | 1048 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, False])) |
1049 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, False])) | 1049 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, False])) |
1050 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, True])) | 1050 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, True])) |
1051 | -# joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, True])) | 1051 | + joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, True])) |
1052 | -# # RNA-MoIP | 1052 | + # RNA-MoIP |
1053 | -# joblist.append(Job(function=launch_RNAMoIP, args=[instance.seq_, instance.header_, basename], priority=3, timeout=3600, checkFunc=check_RNAMoIP, checkArgs=[basename])) | 1053 | + joblist.append(Job(function=launch_RNAMoIP, args=[instance.seq_, instance.header_, basename], priority=3, timeout=3600, checkFunc=check_RNAMoIP, checkArgs=[basename])) |
1054 | -# # Biokop | 1054 | + # Biokop |
1055 | -# joblist.append(Job(command=[biorseoDir + "/../biokop/biokop", "-n1", "-i", outputDir + basename + ".fa", "-o", outputDir + basename + ".biok"], priority=5, timeout=15000, how_many_in_parallel=3, checkFunc=check_biokop, checkArgs=[basename])) | 1055 | + joblist.append(Job(command=[biorseoDir + "/../biokop/biokop", "-n1", "-i", outputDir + basename + ".fa", "-o", outputDir + basename + ".biok"], priority=5, timeout=15000, how_many_in_parallel=3, checkFunc=check_biokop, checkArgs=[basename])) |
1056 | - | 1056 | + |
1057 | - | 1057 | + |
1058 | -# # execute jobs | 1058 | +# execute jobs |
1059 | -# jobs = {} | 1059 | +jobs = {} |
1060 | -# jobcount = len(joblist) | 1060 | +jobcount = len(joblist) |
1061 | -# for job in joblist: | 1061 | +for job in joblist: |
1062 | -# if job.priority_ not in jobs.keys(): | 1062 | + if job.priority_ not in jobs.keys(): |
1063 | -# jobs[job.priority_] = {} | 1063 | + jobs[job.priority_] = {} |
1064 | -# if job.nthreads not in jobs[job.priority_].keys(): | 1064 | + if job.nthreads not in jobs[job.priority_].keys(): |
1065 | -# jobs[job.priority_][job.nthreads] = [] | 1065 | + jobs[job.priority_][job.nthreads] = [] |
1066 | -# jobs[job.priority_][job.nthreads].append(job) | 1066 | + jobs[job.priority_][job.nthreads].append(job) |
1067 | -# nprio = max(jobs.keys()) | 1067 | +nprio = max(jobs.keys()) |
1068 | - | 1068 | + |
1069 | - | 1069 | + |
1070 | -# for i in range(1,nprio+1): | 1070 | +for i in range(1,nprio+1): |
1071 | -# if not len(jobs[i].keys()): continue | 1071 | + if not len(jobs[i].keys()): continue |
1072 | - | 1072 | + |
1073 | -# # check the thread numbers | 1073 | + # check the thread numbers |
1074 | -# different_thread_numbers = [n for n in jobs[i].keys()] | 1074 | + different_thread_numbers = [n for n in jobs[i].keys()] |
1075 | -# different_thread_numbers.sort() | 1075 | + different_thread_numbers.sort() |
1076 | - | 1076 | + |
1077 | -# for n in different_thread_numbers: | 1077 | + for n in different_thread_numbers: |
1078 | -# bunch = jobs[i][n] | 1078 | + bunch = jobs[i][n] |
1079 | -# if not len(bunch): continue | 1079 | + if not len(bunch): continue |
1080 | -# pool = MyPool(processes=n) | 1080 | + pool = MyPool(processes=n) |
1081 | -# results = pool.map(execute_job, bunch) | 1081 | + results = pool.map(execute_job, bunch) |
1082 | -# pool.close() | 1082 | + pool.close() |
1083 | -# pool.join() | 1083 | + pool.join() |
1084 | - | 1084 | + |
1085 | -# if len(fails): | 1085 | +if len(fails): |
1086 | -# print() | 1086 | + print() |
1087 | -# print("Some jobs failed! :") | 1087 | + print("Some jobs failed! :") |
1088 | -# print() | 1088 | + print() |
1089 | -# for j in fails: | 1089 | + for j in fails: |
1090 | -# print(j.cmd_) | 1090 | + print(j.cmd_) |
1091 | -# else: | 1091 | +else: |
1092 | -# print() | 1092 | + print() |
1093 | -# print("Computations ran successfully.") | 1093 | + print("Computations ran successfully.") |
1094 | -# print() | 1094 | + print() |
1095 | 1095 | ||
1096 | 1096 | ||
1097 | # ================= Statistics (without pseudoknots) ======================== | 1097 | # ================= Statistics (without pseudoknots) ======================== |
... | @@ -1183,43 +1183,43 @@ for instance in RNAcontainer: | ... | @@ -1183,43 +1183,43 @@ for instance in RNAcontainer: |
1183 | instance.evaluate() | 1183 | instance.evaluate() |
1184 | 1184 | ||
1185 | x_PK = [ | 1185 | x_PK = [ |
1186 | - [ rna.biokop.max_mcc for rna in RNAcontainer if len(rna.biokop.predictions)], | 1186 | + [ rna.biokop.avg_mcc for rna in RNAcontainer if len(rna.biokop.predictions)], |
1187 | - [ rna.biokop.max_mcc for rna in RNAcontainer if len(rna.biokop.predictions)], | 1187 | + [ rna.biokop.avg_mcc for rna in RNAcontainer if len(rna.biokop.predictions)], |
1188 | - [ rna.biorseoRawA.max_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)], | 1188 | + [ rna.biorseoRawA.avg_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)], |
1189 | - [ rna.biorseoRawB.max_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)], | 1189 | + [ rna.biorseoRawB.avg_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)], |
1190 | - [ rna.biorseoBayesPairA.max_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairA.predictions)], | 1190 | + [ rna.biorseoBayesPairA.avg_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairA.predictions)], |
1191 | - [ rna.biorseoBayesPairB.max_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairB.predictions)], | 1191 | + [ rna.biorseoBayesPairB.avg_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairB.predictions)], |
1192 | - [ rna.biorseoBayesPairC.max_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairC.predictions)], | 1192 | + [ rna.biorseoBayesPairC.avg_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairC.predictions)], |
1193 | - [ rna.biorseoBayesPairD.max_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairD.predictions)], | 1193 | + [ rna.biorseoBayesPairD.avg_mcc for rna in RNAcontainer if len(rna.biorseoBayesPairD.predictions)], |
1194 | - [ rna.biorseoBGSUJAR3DA.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DA.predictions)], | 1194 | + [ rna.biorseoBGSUJAR3DA.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DA.predictions)], |
1195 | - [ rna.biorseoBGSUJAR3DB.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DB.predictions)], | 1195 | + [ rna.biorseoBGSUJAR3DB.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DB.predictions)], |
1196 | - [ rna.biorseoBGSUJAR3DC.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DC.predictions)], | 1196 | + [ rna.biorseoBGSUJAR3DC.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DC.predictions)], |
1197 | - [ rna.biorseoBGSUJAR3DD.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DD.predictions)], | 1197 | + [ rna.biorseoBGSUJAR3DD.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DD.predictions)], |
1198 | - [ rna.biorseoBGSUBayesPairA.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairA.predictions)], | 1198 | + [ rna.biorseoBGSUBayesPairA.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairA.predictions)], |
1199 | - [ rna.biorseoBGSUBayesPairB.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairB.predictions)], | 1199 | + [ rna.biorseoBGSUBayesPairB.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairB.predictions)], |
1200 | - [ rna.biorseoBGSUBayesPairC.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairC.predictions)], | 1200 | + [ rna.biorseoBGSUBayesPairC.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairC.predictions)], |
1201 | - [ rna.biorseoBGSUBayesPairD.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairD.predictions)] | 1201 | + [ rna.biorseoBGSUBayesPairD.avg_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairD.predictions)] |
1202 | ] | 1202 | ] |
1203 | 1203 | ||
1204 | RNAs_fully_predicted = [ x for x in RNAcontainer if x.has_complete_results(True)] | 1204 | RNAs_fully_predicted = [ x for x in RNAcontainer if x.has_complete_results(True)] |
1205 | 1205 | ||
1206 | x_PK_fully = [ | 1206 | x_PK_fully = [ |
1207 | - [ rna.biokop.max_mcc for rna in RNAs_fully_predicted], | 1207 | + [ rna.biokop.avg_mcc for rna in RNAs_fully_predicted], |
1208 | - [ rna.biokop.max_mcc for rna in RNAs_fully_predicted], | 1208 | + [ rna.biokop.avg_mcc for rna in RNAs_fully_predicted], |
1209 | - [ rna.biorseoRawA.max_mcc for rna in RNAs_fully_predicted], | 1209 | + [ rna.biorseoRawA.avg_mcc for rna in RNAs_fully_predicted], |
1210 | - [ rna.biorseoRawB.max_mcc for rna in RNAs_fully_predicted], | 1210 | + [ rna.biorseoRawB.avg_mcc for rna in RNAs_fully_predicted], |
1211 | - [ rna.biorseoBayesPairA.max_mcc for rna in RNAs_fully_predicted], | 1211 | + [ rna.biorseoBayesPairA.avg_mcc for rna in RNAs_fully_predicted], |
1212 | - [ rna.biorseoBayesPairB.max_mcc for rna in RNAs_fully_predicted], | 1212 | + [ rna.biorseoBayesPairB.avg_mcc for rna in RNAs_fully_predicted], |
1213 | - [ rna.biorseoBayesPairC.max_mcc for rna in RNAs_fully_predicted], | 1213 | + [ rna.biorseoBayesPairC.avg_mcc for rna in RNAs_fully_predicted], |
1214 | - [ rna.biorseoBayesPairD.max_mcc for rna in RNAs_fully_predicted], | 1214 | + [ rna.biorseoBayesPairD.avg_mcc for rna in RNAs_fully_predicted], |
1215 | - [ rna.biorseoBGSUJAR3DA.max_mcc for rna in RNAs_fully_predicted], | 1215 | + [ rna.biorseoBGSUJAR3DA.avg_mcc for rna in RNAs_fully_predicted], |
1216 | - [ rna.biorseoBGSUJAR3DB.max_mcc for rna in RNAs_fully_predicted], | 1216 | + [ rna.biorseoBGSUJAR3DB.avg_mcc for rna in RNAs_fully_predicted], |
1217 | - [ rna.biorseoBGSUJAR3DC.max_mcc for rna in RNAs_fully_predicted], | 1217 | + [ rna.biorseoBGSUJAR3DC.avg_mcc for rna in RNAs_fully_predicted], |
1218 | - [ rna.biorseoBGSUJAR3DD.max_mcc for rna in RNAs_fully_predicted], | 1218 | + [ rna.biorseoBGSUJAR3DD.avg_mcc for rna in RNAs_fully_predicted], |
1219 | - [ rna.biorseoBGSUBayesPairA.max_mcc for rna in RNAs_fully_predicted], | 1219 | + [ rna.biorseoBGSUBayesPairA.avg_mcc for rna in RNAs_fully_predicted], |
1220 | - [ rna.biorseoBGSUBayesPairB.max_mcc for rna in RNAs_fully_predicted], | 1220 | + [ rna.biorseoBGSUBayesPairB.avg_mcc for rna in RNAs_fully_predicted], |
1221 | - [ rna.biorseoBGSUBayesPairC.max_mcc for rna in RNAs_fully_predicted], | 1221 | + [ rna.biorseoBGSUBayesPairC.avg_mcc for rna in RNAs_fully_predicted], |
1222 | - [ rna.biorseoBGSUBayesPairD.max_mcc for rna in RNAs_fully_predicted], | 1222 | + [ rna.biorseoBGSUBayesPairD.avg_mcc for rna in RNAs_fully_predicted], |
1223 | ] # We ensure having the same number of RNAs in every sample by discarding the one for which computations did not ended/succeeded. | 1223 | ] # We ensure having the same number of RNAs in every sample by discarding the one for which computations did not ended/succeeded. |
1224 | 1224 | ||
1225 | print() | 1225 | print() |
... | @@ -1296,43 +1296,43 @@ print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop | ... | @@ -1296,43 +1296,43 @@ print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop |
1296 | 1296 | ||
1297 | # ================= PLOTS OF RESULTS ======================================= | 1297 | # ================= PLOTS OF RESULTS ======================================= |
1298 | 1298 | ||
1299 | -merge = [ x_PK_fully[0], # Biokop | 1299 | +merge = [ x_noPK[0], # RNA subopt |
1300 | - x_noPK_fully[0], # RNA subopt | 1300 | + x_noPK[1], # RNA-MoIP |
1301 | - x_noPK_fully[1], # RNA-MoIP | 1301 | + x_PK[0], # Biokop |
1302 | - x_noPK_fully[2], x_PK_fully[2], #biorseoRawA | 1302 | + x_PK[2], #biorseoRawA |
1303 | - x_noPK_fully[3], x_PK_fully[3], #biorseoRawB | 1303 | + x_PK[3], #biorseoRawB |
1304 | - x_noPK_fully[4], x_PK_fully[4], #biorseoBayesPairA | 1304 | + x_PK[4], #biorseoBayesPairA |
1305 | - x_noPK_fully[5], x_PK_fully[5], #biorseoBayesPairB | 1305 | + x_PK[5], #biorseoBayesPairB |
1306 | - x_noPK_fully[6], x_PK_fully[6], #biorseoBayesPairC | 1306 | + x_PK[6], #biorseoBayesPairC |
1307 | - x_noPK_fully[7], x_PK_fully[7], #biorseoBayesPairD | 1307 | + x_PK[7], #biorseoBayesPairD |
1308 | - x_noPK_fully[8], x_PK_fully[8], #biorseoBGSUJAR3DA | 1308 | + x_PK[8], #biorseoBGSUJAR3DA |
1309 | - x_noPK_fully[9], x_PK_fully[9], #biorseoBGSUJAR3DB | 1309 | + x_PK[9], #biorseoBGSUJAR3DB |
1310 | - x_noPK_fully[10], x_PK_fully[10], #biorseoBGSUJAR3DC | 1310 | + x_PK[10], #biorseoBGSUJAR3DC |
1311 | - x_noPK_fully[11], x_PK_fully[11], #biorseoBGSUJAR3DD | 1311 | + x_PK[11], #biorseoBGSUJAR3DD |
1312 | - x_noPK_fully[12], x_PK_fully[12], #biorseoBGSUBayesPairA | 1312 | + x_PK[12], #biorseoBGSUBayesPairA |
1313 | - x_noPK_fully[13], x_PK_fully[13], #biorseoBGSUBayesPairB | 1313 | + x_PK[13], #biorseoBGSUBayesPairB |
1314 | - x_noPK_fully[14], x_PK_fully[14], #biorseoBGSUBayesPairC | 1314 | + x_PK[14], #biorseoBGSUBayesPairC |
1315 | - x_noPK_fully[15], x_PK_fully[15], #biorseoBGSUBayesPairD | 1315 | + x_PK[15], #biorseoBGSUBayesPairD |
1316 | ] | 1316 | ] |
1317 | 1317 | ||
1318 | -colors = [ 'green', 'blue', 'goldenrod', | 1318 | +colors = [ 'blue', 'goldenrod', 'green', |
1319 | - 'darkturquoise', 'darkturquoise', | 1319 | + 'red', |
1320 | - 'red', 'red', | 1320 | + 'firebrick', |
1321 | - 'firebrick', 'firebrick', | 1321 | + 'limegreen', |
1322 | - 'limegreen', 'limegreen', | 1322 | + 'olive', |
1323 | - 'olive', 'olive', | 1323 | + 'forestgreen', |
1324 | - 'forestgreen', 'forestgreen', | 1324 | + 'lime', |
1325 | - 'lime', 'lime', | 1325 | + 'darkcyan', |
1326 | - 'darkcyan', 'darkcyan', | 1326 | + 'royalblue', |
1327 | - 'royalblue', 'royalblue', | 1327 | + 'navy', |
1328 | - 'navy', 'navy', | 1328 | + 'limegreen', |
1329 | - 'limegreen', 'limegreen', | 1329 | + 'olive', |
1330 | - 'olive', 'olive', | 1330 | + 'forestgreen', |
1331 | - 'forestgreen', 'forestgreen', | 1331 | + 'lime' |
1332 | - 'lime', 'lime' | ||
1333 | ] | 1332 | ] |
1334 | -labels = [ "Biokop", "RNAsubopt", | 1333 | +labels = [ "RNAsubopt", |
1335 | "RNA-MoIP", | 1334 | "RNA-MoIP", |
1335 | + "Biokop", | ||
1336 | "$f_{1A}$", | 1336 | "$f_{1A}$", |
1337 | "$f_{1B}$", | 1337 | "$f_{1B}$", |
1338 | "$f_{1A}$", | 1338 | "$f_{1A}$", |
... | @@ -1349,49 +1349,64 @@ labels = [ "Biokop", "RNAsubopt", | ... | @@ -1349,49 +1349,64 @@ labels = [ "Biokop", "RNAsubopt", |
1349 | "$f_{1D}$" | 1349 | "$f_{1D}$" |
1350 | ] | 1350 | ] |
1351 | 1351 | ||
1352 | -ax = plt.subplot(211) | 1352 | + |
1353 | -ax.tick_params(labelsize=12) | 1353 | +# for y in [ i/10 for i in range(11) ]: |
1354 | -for y in [ i/10 for i in range(11) ]: | 1354 | +# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1) |
1355 | - plt.axhline(y=y, color="grey", linestyle="--", linewidth=1) | ||
1356 | -colors = [ 'blue','goldenrod', | ||
1357 | - 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime', | ||
1358 | - 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime' | ||
1359 | - ] | ||
1360 | -bplot = plt.boxplot(x_noPK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97]) | ||
1361 | -for patch, color in zip(bplot['boxes'], colors): | ||
1362 | - patch.set_facecolor(color) | ||
1363 | -# plt.axhline(y=0, color="black", linewidth=1) | ||
1364 | -# plt.axhline(y=1, color="black", linewidth=1) | ||
1365 | -plt.xticks([1.0+i for i in range(16)], labels[1:]) | ||
1366 | -plt.ylim((0.5, 1.01)) | ||
1367 | -plt.ylabel("MCC", fontsize=12) | ||
1368 | -plt.subplots_adjust(left=0.05, right=0.95) | ||
1369 | -# plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0])) | ||
1370 | - | ||
1371 | - | ||
1372 | -ax = plt.subplot(212) | ||
1373 | -ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False, labelsize=12) | ||
1374 | -ax.xaxis.set_label_position('top') | ||
1375 | -for y in [ i/10 for i in range(11) ]: | ||
1376 | - plt.axhline(y=y, color="grey", linestyle="--", linewidth=1) | ||
1377 | -colors = [ 'green','green', | ||
1378 | - 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime', | ||
1379 | - 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime' | ||
1380 | - ] | ||
1381 | -labels = [ "Biokop"] | ||
1382 | -bplot = plt.boxplot(x_PK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97]) | ||
1383 | -for patch, color in zip(bplot['boxes'], colors): | ||
1384 | - patch.set_facecolor(color) | ||
1385 | # plt.axhline(y=0, color="black", linewidth=1) | 1355 | # plt.axhline(y=0, color="black", linewidth=1) |
1386 | -# plt.axhline(y=1, color="black", linewidth=1) | 1356 | +# bplot = plt.boxplot(merge, vert=True, patch_artist=True, notch=False, whis=[3,97]) |
1387 | -plt.xticks([1.0+i for i in range(16)], labels) | 1357 | +# for patch, color in zip(bplot['boxes'], colors): |
1388 | -plt.ylim((0.5, 1.01)) | 1358 | +# patch.set_facecolor(color) |
1389 | -plt.ylabel("MCC", fontsize=12) | 1359 | +# plt.xticks([1.0+i for i in range(17)], labels) |
1390 | -plt.subplots_adjust(left=0.05, right=0.95) | 1360 | +# plt.ylim((-0.1, 1.01)) |
1391 | -# plt.text(6.2,-0.3,"Performance with pseudoknots (%d RNAs included)" % len(x_PK_fully[0]), fontsize=12) | 1361 | +# plt.ylabel("MCC", fontsize=12) |
1362 | +# plt.subplots_adjust(left=0.05, right=0.95) | ||
1363 | +# # plt.title("Performance with pseudoknotted dataset (%d RNAs from Pseudobase++)" % len(merge[0])) | ||
1364 | +# plt.show() | ||
1365 | + | ||
1366 | +# # Separating PK and non-PK | ||
1367 | +# ax = plt.subplot(211) | ||
1368 | +# ax.tick_params(labelsize=12) | ||
1369 | +# for y in [ i/10 for i in range(11) ]: | ||
1370 | +# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1) | ||
1371 | +# colors = [ 'blue','goldenrod', | ||
1372 | +# 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime', | ||
1373 | +# 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime' | ||
1374 | +# ] | ||
1375 | +# bplot = plt.boxplot(x_noPK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97]) | ||
1376 | +# for patch, color in zip(bplot['boxes'], colors): | ||
1377 | +# patch.set_facecolor(color) | ||
1378 | +# # plt.axhline(y=0, color="black", linewidth=1) | ||
1379 | +# # plt.axhline(y=1, color="black", linewidth=1) | ||
1380 | +# plt.xticks([1.0+i for i in range(16)], labels[1:]) | ||
1381 | +# plt.ylim((0.5, 1.01)) | ||
1382 | +# plt.ylabel("MCC", fontsize=12) | ||
1383 | +# plt.subplots_adjust(left=0.05, right=0.95) | ||
1384 | +# # plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0])) | ||
1385 | + | ||
1386 | + | ||
1387 | +# ax = plt.subplot(212) | ||
1388 | +# ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False, labelsize=12) | ||
1389 | +# ax.xaxis.set_label_position('top') | ||
1390 | +# for y in [ i/10 for i in range(11) ]: | ||
1391 | +# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1) | ||
1392 | +# colors = [ 'green','green', | ||
1393 | +# 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime', | ||
1394 | +# 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime' | ||
1395 | +# ] | ||
1396 | +# labels = [ "Biokop"] | ||
1397 | +# bplot = plt.boxplot(x_PK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97]) | ||
1398 | +# for patch, color in zip(bplot['boxes'], colors): | ||
1399 | +# patch.set_facecolor(color) | ||
1400 | +# # plt.axhline(y=0, color="black", linewidth=1) | ||
1401 | +# # plt.axhline(y=1, color="black", linewidth=1) | ||
1402 | +# plt.xticks([1.0+i for i in range(16)], labels) | ||
1403 | +# plt.ylim((0.4, 1.01)) | ||
1404 | +# plt.ylabel("MCC", fontsize=12) | ||
1405 | +# plt.subplots_adjust(left=0.05, right=0.95) | ||
1406 | +# # plt.text(6.2,-0.3,"Performance with pseudoknots (%d RNAs included)" % len(x_PK_fully[0]), fontsize=12) | ||
1392 | 1407 | ||
1393 | 1408 | ||
1394 | -plt.show() | 1409 | +# plt.show() |
1395 | 1410 | ||
1396 | 1411 | ||
1397 | # # ================== MCC performance ==================================== | 1412 | # # ================== MCC performance ==================================== |
... | @@ -1536,82 +1551,6 @@ plt.show() | ... | @@ -1536,82 +1551,6 @@ plt.show() |
1536 | # plt.show() | 1551 | # plt.show() |
1537 | 1552 | ||
1538 | 1553 | ||
1539 | -# # MCC boost compared to RNA subopt | ||
1540 | -# plt.subplot(143) | ||
1541 | -# x = [ | ||
1542 | -# [ rna.rnamoip.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.rnamoip.predictions)], | ||
1543 | -# [ rna.biorseoRawA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)], | ||
1544 | -# [ rna.biorseoRawB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)], | ||
1545 | -# [ rna.biokop.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biokop.predictions)], | ||
1546 | -#] | ||
1547 | -# colors = ['xkcd:goldenrod', 'xkcd:red', 'firebrick', 'limegreen'] | ||
1548 | -# labels = ["$\Delta$MCC(RNAsubopt,RNA-MoIP)","$\Delta$MCC(RNAsubopt,RNA MoBOIP)", | ||
1549 | -# "$\Delta$MCC(RNAsubopt,RNA MoBOIP++)","$\Delta$MCC(RNAsubopt,Biokop)"] | ||
1550 | -# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97]) | ||
1551 | -# for patch, color in zip(bplot['boxes'], colors): | ||
1552 | -# patch.set_facecolor(color) | ||
1553 | -# plt.axvline(x=0, color="black", linewidth=1) | ||
1554 | -# plt.yticks([1.0+i for i in range(4)], labels) | ||
1555 | -# plt.xlim((-1.1, 1.1)) | ||
1556 | -# plt.xlabel("Improvement in MCC") | ||
1557 | -# plt.title("MCC performance relatively to RNAsubopt") | ||
1558 | -# plt.show() | ||
1559 | - | ||
1560 | - | ||
1561 | -# plt.subplot(222) | ||
1562 | -# x = [ | ||
1563 | -# [ rna.biorseoBGSUBayesPairA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairA.predictions)], | ||
1564 | -# [ rna.biorseoBGSUBayesPairB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairB.predictions)], | ||
1565 | -# [ rna.biorseoBGSUBayesPairC.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairC.predictions)], | ||
1566 | -# [ rna.biorseoBGSUBayesPairD.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairD.predictions)], | ||
1567 | -#] | ||
1568 | -# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97]) | ||
1569 | -# for patch, color in zip(bplot['boxes'], colors): | ||
1570 | -# patch.set_facecolor(color) | ||
1571 | -# plt.axvline(x=0, color="black", linewidth=1) | ||
1572 | -# plt.yticks([1.0+i for i in range(4)], labels) | ||
1573 | -# plt.xlim((-1.1, 1.1)) | ||
1574 | -# # plt.xlabel("Improvement in MCC") | ||
1575 | -# plt.title("(B) The RNA Motif Atlas 3.2 + BayesPairing") | ||
1576 | - | ||
1577 | - | ||
1578 | -# plt.subplot(223) | ||
1579 | -# x = [ | ||
1580 | -# [ rna.biorseoRawA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)], | ||
1581 | -# [ rna.biorseoRawB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)], | ||
1582 | -#] | ||
1583 | -# colors = ['red', 'firebrick'] | ||
1584 | -# labels = ["$f_{1A}$", "$f_{1B}$"] | ||
1585 | -# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97]) | ||
1586 | -# for patch, color in zip(bplot['boxes'], colors): | ||
1587 | -# patch.set_facecolor(color) | ||
1588 | -# plt.axvline(x=0, color="black", linewidth=1) | ||
1589 | -# plt.yticks([1.0+i for i in range(2)], labels) | ||
1590 | -# plt.xlabel("Improvement in MCC") | ||
1591 | -# plt.xlim((-1.1, 1.1)) | ||
1592 | -# plt.title("(C) Rna3Dmotifs + Simple pattern matching") | ||
1593 | - | ||
1594 | - | ||
1595 | -# plt.subplot(224) | ||
1596 | -# x = [ | ||
1597 | -# [ rna.biorseoBGSUJAR3DA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DA.predictions)], | ||
1598 | -# [ rna.biorseoBGSUJAR3DB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DB.predictions)], | ||
1599 | -# [ rna.biorseoBGSUJAR3DC.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DC.predictions)], | ||
1600 | -# [ rna.biorseoBGSUJAR3DD.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DD.predictions)], | ||
1601 | -#] | ||
1602 | -# colors = ['darkturquoise', 'darkcyan', 'royalblue', 'navy'] | ||
1603 | -# labels = ["$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$"] | ||
1604 | -# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97]) | ||
1605 | -# for patch, color in zip(bplot['boxes'], colors): | ||
1606 | -# patch.set_facecolor(color) | ||
1607 | -# plt.axvline(x=0, color="black", linewidth=1) | ||
1608 | -# plt.yticks([1.0+i for i in range(4)], labels) | ||
1609 | -# plt.xlabel("Improvement in MCC") | ||
1610 | -# plt.xlim((-1.1, 1.1)) | ||
1611 | -# plt.title("(D) The RNA Motif Atlas 3.2 + JAR3D") | ||
1612 | -# plt.show() | ||
1613 | - | ||
1614 | - | ||
1615 | # # insertion ratio of the best structure | 1554 | # # insertion ratio of the best structure |
1616 | # plt.subplot(221) | 1555 | # plt.subplot(221) |
1617 | # x = [ | 1556 | # x = [ | ... | ... |
-
Please register or login to post a comment