Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
biorseo
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2019-06-04 19:14:19 +0200
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
591a59b93dd384e48328564f48e257804ead7d11
591a59b9
1 parent
e3801500
Average MCCs
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
498 additions
and
280 deletions
.gitignore
biorseo.py
doc/fig/pseudobase.jpg
test_on_RNAstrand.py
.gitignore
View file @
591a59b
...
...
@@ -12,7 +12,6 @@ doc/*.fls
doc/*.fdb_latexmk
# Compiled Object files
*.o
obj/*
# Executables
...
...
biorseo.py
View file @
591a59b
...
...
@@ -21,7 +21,8 @@ bypdir = ""
biorseoDir
=
"."
exec
(
compile
(
open
(
biorseoDir
+
"/EditMe"
)
.
read
(),
''
,
'exec'
))
runDir
=
path
.
dirname
(
path
.
realpath
(
__file__
))
outputDir
=
biorseoDir
+
"/results/"
self
.
outputf
=
biorseoDir
+
"/results/"
tempDir
=
biorseoDir
+
"/temp/"
HLmotifDir
=
biorseoDir
+
"/data/modules/BGSU/HL/3.2/lib"
ILmotifDir
=
biorseoDir
+
"/data/modules/BGSU/IL/3.2/lib"
descfolder
=
biorseoDir
+
"/data/modules/DESC"
...
...
@@ -29,6 +30,17 @@ descfolder = biorseoDir + "/data/modules/DESC"
# ================== CLASSES AND FUNCTIONS ================================
ignored_nt_dict
=
{}
def
is_canonical_nts
(
seq
):
for
c
in
seq
[:
-
1
]:
if
c
not
in
"ACGU"
:
if
c
in
ignored_nt_dict
.
keys
():
ignored_nt_dict
[
c
]
+=
1
else
:
ignored_nt_dict
[
c
]
=
1
return
False
return
True
class
NoDaemonProcess
(
multiprocessing
.
Process
):
@property
...
...
@@ -107,19 +119,213 @@ class Job:
self
.
nthreads
=
how_many_in_parallel
class
RNA
:
def
__init__
(
self
,
header
,
seq
):
self
.
seq_
=
seq
self
.
header_
=
header
self
.
length
=
len
(
seq
)
self
.
rnasubopt
=
[]
self
.
biorseoRawA
=
[]
self
.
biorseoRawB
=
[]
self
.
biorseoBGSUJAR3DA
=
[]
self
.
biorseoBGSUJAR3DC
=
[]
self
.
biorseoBGSUJAR3DD
=
[]
self
.
biorseoBGSUJAR3DB
=
[]
self
.
biorseoBayesPairA
=
[]
self
.
biorseoBayesPairC
=
[]
self
.
biorseoBayesPairD
=
[]
self
.
biorseoBayesPairB
=
[]
self
.
biorseoBGSUBayesPairA
=
[]
self
.
biorseoBGSUBayesPairC
=
[]
self
.
biorseoBGSUBayesPairD
=
[]
self
.
biorseoBGSUBayesPairB
=
[]
def
get_RNAsubopt_results
(
self
):
rna
=
open
(
self
.
outputf
+
self
.
basename
+
".subopt"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
if
ss
not
in
self
.
rnasubopt
.
predictions
:
self
.
rnasubopt
.
predictions
.
append
(
ss
)
def
get_biorseoBayesPairA_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".bypA"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".bypA"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBayesPairA
.
predictions
:
self
.
biorseoBayesPairA
.
predictions
.
append
(
ss
)
self
.
biorseoBayesPairA
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoBayesPairB_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".bypB"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".bypB"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBayesPairB
.
predictions
:
self
.
biorseoBayesPairB
.
predictions
.
append
(
ss
)
self
.
biorseoBayesPairB
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoBayesPairC_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".bypC"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".bypC"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBayesPairC
.
predictions
:
self
.
biorseoBayesPairC
.
predictions
.
append
(
ss
)
self
.
biorseoBayesPairC
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoBayesPairD_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".bypD"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".bypD"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBayesPairD
.
predictions
:
self
.
biorseoBayesPairD
.
predictions
.
append
(
ss
)
self
.
biorseoBayesPairD
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoRawA_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".rawA"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".rawA"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoRawA
.
predictions
:
self
.
biorseoRawA
.
predictions
.
append
(
ss
)
self
.
biorseoRawA
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoRawB_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".rawB"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".rawB"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoRawB
.
predictions
:
self
.
biorseoRawB
.
predictions
.
append
(
ss
)
self
.
biorseoRawB
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoBGSUJAR3DA_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".jar3dA"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".jar3dA"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBGSUJAR3DA
.
predictions
:
self
.
biorseoBGSUJAR3DA
.
predictions
.
append
(
ss
)
self
.
biorseoBGSUJAR3DA
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoBGSUJAR3DB_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".jar3dB"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".jar3dB"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBGSUJAR3DB
.
predictions
:
self
.
biorseoBGSUJAR3DB
.
predictions
.
append
(
ss
)
self
.
biorseoBGSUJAR3DB
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoBGSUJAR3DC_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".jar3dC"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".jar3dC"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBGSUJAR3DC
.
predictions
:
self
.
biorseoBGSUJAR3DC
.
predictions
.
append
(
ss
)
self
.
biorseoBGSUJAR3DC
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoBGSUJAR3DD_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".jar3dD"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".jar3dD"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBGSUJAR3DD
.
predictions
:
self
.
biorseoBGSUJAR3DD
.
predictions
.
append
(
ss
)
self
.
biorseoBGSUJAR3DD
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
def
get_biorseoBGSUBayesPairA_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".bgsubypA"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".bgsubypA"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBGSUBayesPairA
.
predictions
:
self
.
biorseoBGSUBayesPairA
.
predictions
.
append
(
ss
)
self
.
biorseoBGSUBayesPairA
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
# else:
# print(targetdir+ self.basename + ".bgsubypA not found !")
def
get_biorseoBGSUBayesPairB_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".bgsubypB"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".bgsubypB"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBGSUBayesPairB
.
predictions
:
self
.
biorseoBGSUBayesPairB
.
predictions
.
append
(
ss
)
self
.
biorseoBGSUBayesPairB
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
# else:
# print(targetdir+ self.basename + ".bgsubypB not found !")
def
get_biorseoBGSUBayesPairC_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".bgsubypC"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".bgsubypC"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBGSUBayesPairC
.
predictions
:
self
.
biorseoBGSUBayesPairC
.
predictions
.
append
(
ss
)
self
.
biorseoBGSUBayesPairC
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
# else:
# print(targetdir+ self.basename + ".bgsubypC not found !")
def
get_biorseoBGSUBayesPairD_results
(
self
,
targetdir
):
if
path
.
isfile
(
targetdir
+
self
.
basename
+
".bgsubypD"
):
rna
=
open
(
targetdir
+
self
.
basename
+
".bgsubypD"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
ss
=
lines
[
i
]
.
split
(
' '
)[
0
]
.
split
(
'
\t
'
)[
0
]
if
ss
not
in
self
.
biorseoBGSUBayesPairD
.
predictions
:
self
.
biorseoBGSUBayesPairD
.
predictions
.
append
(
ss
)
self
.
biorseoBGSUBayesPairD
.
ninsertions
.
append
(
lines
[
i
]
.
count
(
'+'
))
# else:
# print(targetdir+ self.basename + ".bgsubypD not found !")
class
BiorseoInstance
:
def
__init__
(
self
,
argv
):
# set default options
self
.
type
=
"dpm"
self
.
modules
=
"desc"
self
.
func
=
'B'
self
.
outputf
=
outputDir
self
.
outputf
=
self
.
outputf
self
.
jobcount
=
0
# Parse options
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"hi
l:
:o:"
,
[
"type="
,
"func="
,
"modules="
])
argv
,
"hi:o:"
,
[
"type="
,
"func="
,
"modules="
])
except
getopt
.
GetoptError
:
print
(
"Please provide arguments !"
)
sys
.
exit
(
2
)
...
...
@@ -130,9 +336,6 @@ class BiorseoInstance:
elif
opt
==
"-i"
:
self
.
inputfile
=
arg
self
.
mode
=
0
# single sequence mode
elif
opt
==
"-l"
:
self
.
inputfile
=
arg
self
.
mode
=
1
# batch mode
elif
opt
==
"-o"
:
self
.
outputf
=
arg
# output file or folder...
elif
opt
==
"--func"
:
...
...
@@ -153,6 +356,9 @@ class BiorseoInstance:
else
:
raise
"Unknown option "
+
opt
# create jobs
self
.
list_jobs
()
if
self
.
mode
:
# Create a job manager
self
.
manager
=
Manager
()
...
...
@@ -321,7 +527,7 @@ class BiorseoInstance:
def
launch_JAR3D
(
self
,
seq_
,
basename
):
rnasubopt_preds
=
[]
# Extracting probable loops from RNA-subopt structures
rna
=
open
(
outputDir
+
basename
+
".subopt"
,
"r"
)
rna
=
open
(
self
.
outputf
+
basename
+
".subopt"
,
"r"
)
lines
=
rna
.
readlines
()
rna
.
close
()
for
i
in
range
(
2
,
len
(
lines
)):
...
...
@@ -352,7 +558,7 @@ class BiorseoInstance:
insertion_sites
.
sort
(
reverse
=
True
)
# Writing results to CSV file
c
=
0
resultsfile
=
open
(
outputDir
+
basename
+
".sites.csv"
,
"w"
)
resultsfile
=
open
(
self
.
outputf
+
basename
+
".sites.csv"
,
"w"
)
resultsfile
.
write
(
"Motif,Rotation,Score,Start1,End1,Start2,End2
\n
"
)
for
site
in
insertion_sites
:
if
site
.
score
>
10
:
...
...
@@ -372,7 +578,7 @@ class BiorseoInstance:
def
launch_BayesPairing
(
self
,
module_type
,
seq_
,
header_
,
basename
):
chdir
(
bypdir
)
cmd
=
[
"python3"
,
"parse_sequences.py"
,
"-seq"
,
outputDir
+
cmd
=
[
"python3"
,
"parse_sequences.py"
,
"-seq"
,
self
.
outputf
+
basename
+
".fa"
,
"-d"
,
module_type
,
"-interm"
,
"1"
]
logfile
=
open
(
"log_of_the_run.sh"
,
'a'
)
...
...
@@ -389,9 +595,9 @@ class BiorseoInstance:
l
=
BypLog
[
idx
]
insertion_sites
=
[
x
for
x
in
ast
.
literal_eval
(
l
.
split
(
":"
)[
1
][
1
:])]
if
module_type
==
"rna3dmotif"
:
rna
=
open
(
outputDir
+
basename
+
".byp.csv"
,
"w"
)
rna
=
open
(
self
.
outputf
+
basename
+
".byp.csv"
,
"w"
)
else
:
rna
=
open
(
outputDir
+
basename
+
".bgsubyp.csv"
,
"w"
)
rna
=
open
(
self
.
outputf
+
basename
+
".bgsubyp.csv"
,
"w"
)
rna
.
write
(
"Motif,Score,Start1,End1,Start2,End2...
\n
"
)
for
i
,
module
in
enumerate
(
insertion_sites
):
if
len
(
module
):
...
...
@@ -477,6 +683,80 @@ class BiorseoInstance:
raise
"Unknown data type !"
return
path
.
isfile
(
self
.
outputf
+
basename
+
extension
)
def
list_jobs
(
self
):
# Read fasta file, which can contain one or several RNAs
RNAcontainer
=
[]
print
(
"loading file(s)..."
)
db
=
open
(
self
.
inputfile
,
"r"
)
c
=
0
header
=
""
seq
=
""
while
True
:
l
=
db
.
readline
()
if
l
==
""
:
break
c
+=
1
c
=
c
%
2
if
c
==
1
:
if
header
!=
""
:
# This is our second RNA in the fasta file
self
.
mode
=
1
header
=
l
[:
-
1
]
if
c
==
0
:
seq
=
l
[:
-
1
]
.
upper
()
if
is_canonical_nts
(
seq
):
header
=
header
.
replace
(
'/'
,
'_'
)
RNAcontainer
.
append
(
RNA
(
header
,
seq
))
if
not
path
.
isfile
(
self
.
outputf
+
header
+
".fa"
):
rna
=
open
(
self
.
outputf
+
header
+
".fa"
,
"w"
)
rna
.
write
(
">"
+
header
+
'
\n
'
)
rna
.
write
(
seq
+
'
\n
'
)
rna
.
close
()
db
.
close
()
for
nt
,
number
in
ignored_nt_dict
.
items
():
print
(
"ignored
%
d sequences because of char
%
c"
%
(
number
,
nt
))
tot
=
len
(
RNAcontainer
)
print
(
"Loaded
%
d RNAs."
%
(
tot
))
#define job list
joblist
=
[]
for
instance
in
RNAcontainer
:
executable
=
biorseoDir
+
"/bin/biorseo"
fastafile
=
self
.
outputf
+
instance
.
header
+
".fa"
method_type
=
""
ext
=
".raw"
if
self
.
type
==
"jar3d"
:
ext
=
".jar3d"
method_type
=
"--jar3dcsv"
csv
=
self
.
outputf
+
instance
.
header
+
".sites.csv"
# RNAsubopt
joblist
.
append
(
Job
(
command
=
[
"RNAsubopt"
,
"-i"
,
fastafile
,
"--outfile="
+
instance
.
header
+
".subopt"
],
priority
=
1
,
checkFunc
=
check_RNAsubopt
,
checkArgs
=
[
instance
.
header
]))
joblist
.
append
(
Job
(
command
=
[
"mv"
,
instance
.
header
+
".subopt"
,
self
.
outputf
],
priority
=
2
,
checkFunc
=
check_RNAsubopt
,
checkArgs
=
[
instance
.
header
]))
# JAR3D
joblist
.
append
(
Job
(
function
=
self
.
launch_JAR3D
,
args
=
[
instance
.
seq_
,
instance
.
header
],
priority
=
3
,
how_many_in_parallel
=
1
,
checkFunc
=
check_JAR3D
,
checkArgs
=
[
instance
.
header
]))
if
self
.
type
==
"byp"
:
method_type
=
"--bayespaircsv"
if
self
.
modules
==
"desc"
:
ext
=
".byp"
csv
=
self
.
outputf
+
instance
.
header
+
".byp.csv"
joblist
.
append
(
Job
(
function
=
self
.
launch_BayesPairing
,
args
=
[
"rna3dmotif"
,
instance
.
seq_
,
instance
.
header_
,
instance
.
header
],
how_many_in_parallel
=-
1
,
priority
=
1
,
checkFunc
=
check_BayesPairing
,
checkArgs
=
[
instance
.
header
]))
elif
self
.
modules
==
"bgsu"
:
ext
=
".bgsubyp"
csv
=
self
.
outputf
+
instance
.
header
+
".bgsubyp.csv"
joblist
.
append
(
Job
(
function
=
self
.
launch_BayesPairing
,
args
=
[
"3dmotifatlas"
,
instance
.
seq_
,
instance
.
header_
,
instance
.
header
],
how_many_in_parallel
=-
1
,
priority
=
1
,
checkFunc
=
check_BGSUBayesPairing
,
checkArgs
=
[
instance
.
header
]))
command
=
[
executable
,
"-s"
,
fastafile
]
if
method_type
:
command
+=
[
method_type
,
csv
]
command
+=
[
"-o"
,
self
.
outputf
+
instance
.
header
+
ext
+
self
.
func
,
"--type"
,
self
.
func
]
joblist
.
append
(
Job
(
command
=
command
,
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
))
if
__name__
==
"__main__"
:
BiorseoInstance
(
sys
.
argv
)
...
...
doc/fig/pseudobase.jpg
0 → 100644
View file @
591a59b
102 KB
test_on_RNAstrand.py
View file @
591a59b
...
...
@@ -1005,93 +1005,93 @@ print("Loaded %d RNAs of length between 10 and 100. %d of them contain pseudokno
# #================= PREDICTION OF STRUCTURES ===============================
#
#
define job list
#
joblist = []
#
for instance in RNAcontainer:
#
basename = instance.basename
#
# RNAsubopt
#
joblist.append(Job(command=["RNAsubopt", "-i", outputDir + basename + ".fa", "--outfile="+ basename + ".subopt"], priority=1, checkFunc=check_RNAsubopt, checkArgs=[basename]))
#
joblist.append(Job(command=["mv", basename + ".subopt", outputDir], priority=2, checkFunc=check_RNAsubopt, checkArgs=[basename]))
#
# JAR3D
#
joblist.append(Job(function=launch_JAR3D, args=[instance.seq_, basename], priority=3, how_many_in_parallel=1, checkFunc=check_JAR3D, checkArgs=[basename]))
#
# BayesPairing and BGSUBayesPairing
#
joblist.append(Job(function=launch_BayesPairing, args=["rna3dmotif", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BayesPairing, checkArgs=[basename]))
#
joblist.append(Job(function=launch_BayesPairing, args=["3dmotifatlas", instance.seq_, instance.header_, basename], how_many_in_parallel=-1, priority=3, checkFunc=check_BGSUBayesPairing, checkArgs=[basename]))
#
# biorseoBGSUJAR3DA-D
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"noPK/"+basename+".jar3dD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DA, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DB, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DC, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--jar3dcsv", outputDir+basename+".sites.csv", "-o", outputDir+"PK/"+basename+".jar3dD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUJAR3DD, checkArgs=[basename, True]))
#
# biorseoBGSUBayesPairA-D
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"noPK/"+basename+".bgsubypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairA, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairB, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairC, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".bgsubyp.csv", "-o", outputDir+"PK/"+basename+".bgsubypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBGSUBayesPairD, checkArgs=[basename, True]))
#
# biorseoBayesPairA-D
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypC", "--type", "C", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"noPK/"+basename+".bypD", "--type", "D", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairA, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairB, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypC", "--type", "C"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairC, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir+basename+".fa", "--bayespaircsv", outputDir+basename+".byp.csv", "-o", outputDir+"PK/"+basename+".bypD", "--type", "D"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoBayesPairD, checkArgs=[basename, True]))
#
# biorseoRawA,B
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawA", "--type", "A", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"noPK/" + basename + ".rawB", "--type", "B", "-n"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, False]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawA", "--type", "A"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawA, checkArgs=[basename, True]))
#
joblist.append(Job(command=[biorseoDir+"/bin/biorseo", "-s", outputDir + basename + ".fa", "-d", descfolder, "-o", outputDir+"PK/" + basename + ".rawB", "--type", "B"], priority=4, timeout=3600, how_many_in_parallel=3, checkFunc=check_biorseoRawB, checkArgs=[basename, True]))
#
# RNA-MoIP
#
joblist.append(Job(function=launch_RNAMoIP, args=[instance.seq_, instance.header_, basename], priority=3, timeout=3600, checkFunc=check_RNAMoIP, checkArgs=[basename]))
#
# Biokop
#
joblist.append(Job(command=[biorseoDir + "/../biokop/biokop", "-n1", "-i", outputDir + basename + ".fa", "-o", outputDir + basename + ".biok"], priority=5, timeout=15000, how_many_in_parallel=3, checkFunc=check_biokop, checkArgs=[basename]))
#
#
execute jobs
#
jobs = {}
#
jobcount = len(joblist)
#
for job in joblist:
#
if job.priority_ not in jobs.keys():
#
jobs[job.priority_] = {}
#
if job.nthreads not in jobs[job.priority_].keys():
#
jobs[job.priority_][job.nthreads] = []
#
jobs[job.priority_][job.nthreads].append(job)
#
nprio = max(jobs.keys())
#
for i in range(1,nprio+1):
#
if not len(jobs[i].keys()): continue
#
# check the thread numbers
#
different_thread_numbers = [n for n in jobs[i].keys()]
#
different_thread_numbers.sort()
#
for n in different_thread_numbers:
#
bunch = jobs[i][n]
#
if not len(bunch): continue
#
pool = MyPool(processes=n)
#
results = pool.map(execute_job, bunch)
#
pool.close()
#
pool.join()
#
if len(fails):
#
print()
#
print("Some jobs failed! :")
#
print()
#
for j in fails:
#
print(j.cmd_)
#
else:
#
print()
#
print("Computations ran successfully.")
#
print()
#define job list
joblist
=
[]
for
instance
in
RNAcontainer
:
basename
=
instance
.
basename
# RNAsubopt
joblist
.
append
(
Job
(
command
=
[
"RNAsubopt"
,
"-i"
,
outputDir
+
basename
+
".fa"
,
"--outfile="
+
basename
+
".subopt"
],
priority
=
1
,
checkFunc
=
check_RNAsubopt
,
checkArgs
=
[
basename
]))
joblist
.
append
(
Job
(
command
=
[
"mv"
,
basename
+
".subopt"
,
outputDir
],
priority
=
2
,
checkFunc
=
check_RNAsubopt
,
checkArgs
=
[
basename
]))
# JAR3D
joblist
.
append
(
Job
(
function
=
launch_JAR3D
,
args
=
[
instance
.
seq_
,
basename
],
priority
=
3
,
how_many_in_parallel
=
1
,
checkFunc
=
check_JAR3D
,
checkArgs
=
[
basename
]))
# BayesPairing and BGSUBayesPairing
joblist
.
append
(
Job
(
function
=
launch_BayesPairing
,
args
=
[
"rna3dmotif"
,
instance
.
seq_
,
instance
.
header_
,
basename
],
how_many_in_parallel
=-
1
,
priority
=
3
,
checkFunc
=
check_BayesPairing
,
checkArgs
=
[
basename
]))
joblist
.
append
(
Job
(
function
=
launch_BayesPairing
,
args
=
[
"3dmotifatlas"
,
instance
.
seq_
,
instance
.
header_
,
basename
],
how_many_in_parallel
=-
1
,
priority
=
3
,
checkFunc
=
check_BGSUBayesPairing
,
checkArgs
=
[
basename
]))
# biorseoBGSUJAR3DA-D
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--jar3dcsv"
,
outputDir
+
basename
+
".sites.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".jar3dA"
,
"--type"
,
"A"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUJAR3DA
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--jar3dcsv"
,
outputDir
+
basename
+
".sites.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".jar3dB"
,
"--type"
,
"B"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUJAR3DB
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--jar3dcsv"
,
outputDir
+
basename
+
".sites.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".jar3dC"
,
"--type"
,
"C"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUJAR3DC
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--jar3dcsv"
,
outputDir
+
basename
+
".sites.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".jar3dD"
,
"--type"
,
"D"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUJAR3DD
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--jar3dcsv"
,
outputDir
+
basename
+
".sites.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".jar3dA"
,
"--type"
,
"A"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUJAR3DA
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--jar3dcsv"
,
outputDir
+
basename
+
".sites.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".jar3dB"
,
"--type"
,
"B"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUJAR3DB
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--jar3dcsv"
,
outputDir
+
basename
+
".sites.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".jar3dC"
,
"--type"
,
"C"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUJAR3DC
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--jar3dcsv"
,
outputDir
+
basename
+
".sites.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".jar3dD"
,
"--type"
,
"D"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUJAR3DD
,
checkArgs
=
[
basename
,
True
]))
# biorseoBGSUBayesPairA-D
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".bgsubyp.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".bgsubypA"
,
"--type"
,
"A"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUBayesPairA
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".bgsubyp.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".bgsubypB"
,
"--type"
,
"B"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUBayesPairB
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".bgsubyp.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".bgsubypC"
,
"--type"
,
"C"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUBayesPairC
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".bgsubyp.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".bgsubypD"
,
"--type"
,
"D"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUBayesPairD
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".bgsubyp.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".bgsubypA"
,
"--type"
,
"A"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUBayesPairA
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".bgsubyp.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".bgsubypB"
,
"--type"
,
"B"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUBayesPairB
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".bgsubyp.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".bgsubypC"
,
"--type"
,
"C"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUBayesPairC
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".bgsubyp.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".bgsubypD"
,
"--type"
,
"D"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBGSUBayesPairD
,
checkArgs
=
[
basename
,
True
]))
# biorseoBayesPairA-D
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".byp.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".bypA"
,
"--type"
,
"A"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBayesPairA
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".byp.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".bypB"
,
"--type"
,
"B"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBayesPairB
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".byp.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".bypC"
,
"--type"
,
"C"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBayesPairC
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".byp.csv"
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".bypD"
,
"--type"
,
"D"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBayesPairD
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".byp.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".bypA"
,
"--type"
,
"A"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBayesPairA
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".byp.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".bypB"
,
"--type"
,
"B"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBayesPairB
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".byp.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".bypC"
,
"--type"
,
"C"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBayesPairC
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"--bayespaircsv"
,
outputDir
+
basename
+
".byp.csv"
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".bypD"
,
"--type"
,
"D"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoBayesPairD
,
checkArgs
=
[
basename
,
True
]))
# biorseoRawA,B
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"-d"
,
descfolder
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".rawA"
,
"--type"
,
"A"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoRawA
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"-d"
,
descfolder
,
"-o"
,
outputDir
+
"noPK/"
+
basename
+
".rawB"
,
"--type"
,
"B"
,
"-n"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoRawB
,
checkArgs
=
[
basename
,
False
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"-d"
,
descfolder
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".rawA"
,
"--type"
,
"A"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoRawA
,
checkArgs
=
[
basename
,
True
]))
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/bin/biorseo"
,
"-s"
,
outputDir
+
basename
+
".fa"
,
"-d"
,
descfolder
,
"-o"
,
outputDir
+
"PK/"
+
basename
+
".rawB"
,
"--type"
,
"B"
],
priority
=
4
,
timeout
=
3600
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biorseoRawB
,
checkArgs
=
[
basename
,
True
]))
# RNA-MoIP
joblist
.
append
(
Job
(
function
=
launch_RNAMoIP
,
args
=
[
instance
.
seq_
,
instance
.
header_
,
basename
],
priority
=
3
,
timeout
=
3600
,
checkFunc
=
check_RNAMoIP
,
checkArgs
=
[
basename
]))
# Biokop
joblist
.
append
(
Job
(
command
=
[
biorseoDir
+
"/../biokop/biokop"
,
"-n1"
,
"-i"
,
outputDir
+
basename
+
".fa"
,
"-o"
,
outputDir
+
basename
+
".biok"
],
priority
=
5
,
timeout
=
15000
,
how_many_in_parallel
=
3
,
checkFunc
=
check_biokop
,
checkArgs
=
[
basename
]))
# execute jobs
jobs
=
{}
jobcount
=
len
(
joblist
)
for
job
in
joblist
:
if
job
.
priority_
not
in
jobs
.
keys
():
jobs
[
job
.
priority_
]
=
{}
if
job
.
nthreads
not
in
jobs
[
job
.
priority_
]
.
keys
():
jobs
[
job
.
priority_
][
job
.
nthreads
]
=
[]
jobs
[
job
.
priority_
][
job
.
nthreads
]
.
append
(
job
)
nprio
=
max
(
jobs
.
keys
())
for
i
in
range
(
1
,
nprio
+
1
):
if
not
len
(
jobs
[
i
]
.
keys
()):
continue
# check the thread numbers
different_thread_numbers
=
[
n
for
n
in
jobs
[
i
]
.
keys
()]
different_thread_numbers
.
sort
()
for
n
in
different_thread_numbers
:
bunch
=
jobs
[
i
][
n
]
if
not
len
(
bunch
):
continue
pool
=
MyPool
(
processes
=
n
)
results
=
pool
.
map
(
execute_job
,
bunch
)
pool
.
close
()
pool
.
join
()
if
len
(
fails
):
print
()
print
(
"Some jobs failed! :"
)
print
()
for
j
in
fails
:
print
(
j
.
cmd_
)
else
:
print
()
print
(
"Computations ran successfully."
)
print
()
# ================= Statistics (without pseudoknots) ========================
...
...
@@ -1183,43 +1183,43 @@ for instance in RNAcontainer:
instance
.
evaluate
()
x_PK
=
[
[
rna
.
biokop
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biokop
.
predictions
)],
[
rna
.
biokop
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biokop
.
predictions
)],
[
rna
.
biorseoRawA
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoRawA
.
predictions
)],
[
rna
.
biorseoRawB
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoRawB
.
predictions
)],
[
rna
.
biorseoBayesPairA
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairA
.
predictions
)],
[
rna
.
biorseoBayesPairB
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairB
.
predictions
)],
[
rna
.
biorseoBayesPairC
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairC
.
predictions
)],
[
rna
.
biorseoBayesPairD
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairD
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DA
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DA
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DB
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DB
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DC
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DC
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DD
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DD
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairA
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairA
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairB
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairB
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairC
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairC
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairD
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairD
.
predictions
)]
[
rna
.
biokop
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biokop
.
predictions
)],
[
rna
.
biokop
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biokop
.
predictions
)],
[
rna
.
biorseoRawA
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoRawA
.
predictions
)],
[
rna
.
biorseoRawB
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoRawB
.
predictions
)],
[
rna
.
biorseoBayesPairA
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairA
.
predictions
)],
[
rna
.
biorseoBayesPairB
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairB
.
predictions
)],
[
rna
.
biorseoBayesPairC
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairC
.
predictions
)],
[
rna
.
biorseoBayesPairD
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairD
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DA
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DA
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DB
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DB
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DC
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DC
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DD
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DD
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairA
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairA
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairB
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairB
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairC
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairC
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairD
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairD
.
predictions
)]
]
RNAs_fully_predicted
=
[
x
for
x
in
RNAcontainer
if
x
.
has_complete_results
(
True
)]
x_PK_fully
=
[
[
rna
.
biokop
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biokop
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoRawA
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoRawB
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairA
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairB
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairC
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairD
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DA
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DB
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DC
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DD
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairA
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairB
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairC
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairD
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biokop
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biokop
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoRawA
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoRawB
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairA
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairB
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairC
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairD
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DA
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DB
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DC
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DD
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairA
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairB
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairC
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairD
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
]
# We ensure having the same number of RNAs in every sample by discarding the one for which computations did not ended/succeeded.
print
()
...
...
@@ -1296,43 +1296,43 @@ print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop
# ================= PLOTS OF RESULTS =======================================
merge
=
[
x_
PK_fully
[
0
],
# Biokop
x_noPK
_fully
[
0
],
# RNA subopt
x_
noPK_fully
[
1
],
# RNA-MoIP
x_
noPK_fully
[
2
],
x_PK_fully
[
2
],
#biorseoRawA
x_
noPK_fully
[
3
],
x_PK_fully
[
3
],
#biorseoRawB
x_
noPK_fully
[
4
],
x_PK_fully
[
4
],
#biorseoBayesPairA
x_
noPK_fully
[
5
],
x_PK_fully
[
5
],
#biorseoBayesPairB
x_
noPK_fully
[
6
],
x_PK_fully
[
6
],
#biorseoBayesPairC
x_
noPK_fully
[
7
],
x_PK_fully
[
7
],
#biorseoBayesPairD
x_
noPK_fully
[
8
],
x_PK_fully
[
8
],
#biorseoBGSUJAR3DA
x_
noPK_fully
[
9
],
x_PK_fully
[
9
],
#biorseoBGSUJAR3DB
x_
noPK_fully
[
10
],
x_PK_fully
[
10
],
#biorseoBGSUJAR3DC
x_
noPK_fully
[
11
],
x_PK_fully
[
11
],
#biorseoBGSUJAR3DD
x_
noPK_fully
[
12
],
x_PK_fully
[
12
],
#biorseoBGSUBayesPairA
x_
noPK_fully
[
13
],
x_PK_fully
[
13
],
#biorseoBGSUBayesPairB
x_
noPK_fully
[
14
],
x_PK_fully
[
14
],
#biorseoBGSUBayesPairC
x_
noPK_fully
[
15
],
x_PK_fully
[
15
],
#biorseoBGSUBayesPairD
merge
=
[
x_
noPK
[
0
],
# RNA subopt
x_noPK
[
1
],
# RNA-MoIP
x_
PK
[
0
],
# Biokop
x_
PK
[
2
],
#biorseoRawA
x_
PK
[
3
],
#biorseoRawB
x_
PK
[
4
],
#biorseoBayesPairA
x_
PK
[
5
],
#biorseoBayesPairB
x_
PK
[
6
],
#biorseoBayesPairC
x_
PK
[
7
],
#biorseoBayesPairD
x_
PK
[
8
],
#biorseoBGSUJAR3DA
x_
PK
[
9
],
#biorseoBGSUJAR3DB
x_
PK
[
10
],
#biorseoBGSUJAR3DC
x_
PK
[
11
],
#biorseoBGSUJAR3DD
x_
PK
[
12
],
#biorseoBGSUBayesPairA
x_
PK
[
13
],
#biorseoBGSUBayesPairB
x_
PK
[
14
],
#biorseoBGSUBayesPairC
x_
PK
[
15
],
#biorseoBGSUBayesPairD
]
colors
=
[
'green'
,
'blue'
,
'goldenrod'
,
'darkturquoise'
,
'darkturquoise'
,
'red'
,
'red'
,
'firebrick'
,
'firebrick'
,
'limegreen'
,
'limegreen'
,
'olive'
,
'olive'
,
'forestgreen'
,
'forestgreen'
,
'lime'
,
'lime'
,
'darkcyan'
,
'darkcyan'
,
'royalblue'
,
'royalblue'
,
'navy'
,
'navy'
,
'limegreen'
,
'limegreen'
,
'olive'
,
'olive'
,
'forestgreen'
,
'forestgreen'
,
'lime'
,
'lime'
colors
=
[
'blue'
,
'goldenrod'
,
'green'
,
'red'
,
'firebrick'
,
'limegreen'
,
'olive'
,
'forestgreen'
,
'lime'
,
'darkcyan'
,
'royalblue'
,
'navy'
,
'limegreen'
,
'olive'
,
'forestgreen'
,
'lime'
]
labels
=
[
"
Biokop"
,
"
RNAsubopt"
,
labels
=
[
"RNAsubopt"
,
"RNA-MoIP"
,
"Biokop"
,
"$f_{1A}$"
,
"$f_{1B}$"
,
"$f_{1A}$"
,
...
...
@@ -1349,49 +1349,64 @@ labels = [ "Biokop", "RNAsubopt",
"$f_{1D}$"
]
ax
=
plt
.
subplot
(
211
)
ax
.
tick_params
(
labelsize
=
12
)
for
y
in
[
i
/
10
for
i
in
range
(
11
)
]:
plt
.
axhline
(
y
=
y
,
color
=
"grey"
,
linestyle
=
"--"
,
linewidth
=
1
)
colors
=
[
'blue'
,
'goldenrod'
,
'red'
,
'firebrick'
,
'limegreen'
,
'olive'
,
'forestgreen'
,
'lime'
,
'darkturquoise'
,
'darkcyan'
,
'royalblue'
,
'navy'
,
'limegreen'
,
'olive'
,
'forestgreen'
,
'lime'
]
bplot
=
plt
.
boxplot
(
x_noPK_fully
,
vert
=
True
,
patch_artist
=
True
,
notch
=
False
,
whis
=
[
3
,
97
])
for
patch
,
color
in
zip
(
bplot
[
'boxes'
],
colors
):
patch
.
set_facecolor
(
color
)
# plt.axhline(y=0, color="black", linewidth=1)
# plt.axhline(y=1, color="black", linewidth=1)
plt
.
xticks
([
1.0
+
i
for
i
in
range
(
16
)],
labels
[
1
:])
plt
.
ylim
((
0.5
,
1.01
))
plt
.
ylabel
(
"MCC"
,
fontsize
=
12
)
plt
.
subplots_adjust
(
left
=
0.05
,
right
=
0.95
)
# plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0]))
ax
=
plt
.
subplot
(
212
)
ax
.
tick_params
(
top
=
True
,
bottom
=
False
,
labeltop
=
True
,
labelbottom
=
False
,
labelsize
=
12
)
ax
.
xaxis
.
set_label_position
(
'top'
)
for
y
in
[
i
/
10
for
i
in
range
(
11
)
]:
plt
.
axhline
(
y
=
y
,
color
=
"grey"
,
linestyle
=
"--"
,
linewidth
=
1
)
colors
=
[
'green'
,
'green'
,
'red'
,
'firebrick'
,
'limegreen'
,
'olive'
,
'forestgreen'
,
'lime'
,
'darkturquoise'
,
'darkcyan'
,
'royalblue'
,
'navy'
,
'limegreen'
,
'olive'
,
'forestgreen'
,
'lime'
]
labels
=
[
"Biokop"
]
bplot
=
plt
.
boxplot
(
x_PK_fully
,
vert
=
True
,
patch_artist
=
True
,
notch
=
False
,
whis
=
[
3
,
97
])
for
patch
,
color
in
zip
(
bplot
[
'boxes'
],
colors
):
patch
.
set_facecolor
(
color
)
# for y in [ i/10 for i in range(11) ]:
# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
# plt.axhline(y=0, color="black", linewidth=1)
# plt.axhline(y=1, color="black", linewidth=1)
plt
.
xticks
([
1.0
+
i
for
i
in
range
(
16
)],
labels
)
plt
.
ylim
((
0.5
,
1.01
))
plt
.
ylabel
(
"MCC"
,
fontsize
=
12
)
plt
.
subplots_adjust
(
left
=
0.05
,
right
=
0.95
)
# plt.text(6.2,-0.3,"Performance with pseudoknots (%d RNAs included)" % len(x_PK_fully[0]), fontsize=12)
# bplot = plt.boxplot(merge, vert=True, patch_artist=True, notch=False, whis=[3,97])
# for patch, color in zip(bplot['boxes'], colors):
# patch.set_facecolor(color)
# plt.xticks([1.0+i for i in range(17)], labels)
# plt.ylim((-0.1, 1.01))
# plt.ylabel("MCC", fontsize=12)
# plt.subplots_adjust(left=0.05, right=0.95)
# # plt.title("Performance with pseudoknotted dataset (%d RNAs from Pseudobase++)" % len(merge[0]))
# plt.show()
# # Separating PK and non-PK
# ax = plt.subplot(211)
# ax.tick_params(labelsize=12)
# for y in [ i/10 for i in range(11) ]:
# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
# colors = [ 'blue','goldenrod',
# 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
# 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
# ]
# bplot = plt.boxplot(x_noPK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
# for patch, color in zip(bplot['boxes'], colors):
# patch.set_facecolor(color)
# # plt.axhline(y=0, color="black", linewidth=1)
# # plt.axhline(y=1, color="black", linewidth=1)
# plt.xticks([1.0+i for i in range(16)], labels[1:])
# plt.ylim((0.5, 1.01))
# plt.ylabel("MCC", fontsize=12)
# plt.subplots_adjust(left=0.05, right=0.95)
# # plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0]))
# ax = plt.subplot(212)
# ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False, labelsize=12)
# ax.xaxis.set_label_position('top')
# for y in [ i/10 for i in range(11) ]:
# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
# colors = [ 'green','green',
# 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
# 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
# ]
# labels = [ "Biokop"]
# bplot = plt.boxplot(x_PK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
# for patch, color in zip(bplot['boxes'], colors):
# patch.set_facecolor(color)
# # plt.axhline(y=0, color="black", linewidth=1)
# # plt.axhline(y=1, color="black", linewidth=1)
# plt.xticks([1.0+i for i in range(16)], labels)
# plt.ylim((0.4, 1.01))
# plt.ylabel("MCC", fontsize=12)
# plt.subplots_adjust(left=0.05, right=0.95)
# # plt.text(6.2,-0.3,"Performance with pseudoknots (%d RNAs included)" % len(x_PK_fully[0]), fontsize=12)
plt
.
show
()
#
plt.show()
# # ================== MCC performance ====================================
...
...
@@ -1536,82 +1551,6 @@ plt.show()
# plt.show()
# # MCC boost compared to RNA subopt
# plt.subplot(143)
# x = [
# [ rna.rnamoip.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.rnamoip.predictions)],
# [ rna.biorseoRawA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)],
# [ rna.biorseoRawB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)],
# [ rna.biokop.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biokop.predictions)],
#]
# colors = ['xkcd:goldenrod', 'xkcd:red', 'firebrick', 'limegreen']
# labels = ["$\Delta$MCC(RNAsubopt,RNA-MoIP)","$\Delta$MCC(RNAsubopt,RNA MoBOIP)",
# "$\Delta$MCC(RNAsubopt,RNA MoBOIP++)","$\Delta$MCC(RNAsubopt,Biokop)"]
# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97])
# for patch, color in zip(bplot['boxes'], colors):
# patch.set_facecolor(color)
# plt.axvline(x=0, color="black", linewidth=1)
# plt.yticks([1.0+i for i in range(4)], labels)
# plt.xlim((-1.1, 1.1))
# plt.xlabel("Improvement in MCC")
# plt.title("MCC performance relatively to RNAsubopt")
# plt.show()
# plt.subplot(222)
# x = [
# [ rna.biorseoBGSUBayesPairA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairA.predictions)],
# [ rna.biorseoBGSUBayesPairB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairB.predictions)],
# [ rna.biorseoBGSUBayesPairC.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairC.predictions)],
# [ rna.biorseoBGSUBayesPairD.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUBayesPairD.predictions)],
#]
# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97])
# for patch, color in zip(bplot['boxes'], colors):
# patch.set_facecolor(color)
# plt.axvline(x=0, color="black", linewidth=1)
# plt.yticks([1.0+i for i in range(4)], labels)
# plt.xlim((-1.1, 1.1))
# # plt.xlabel("Improvement in MCC")
# plt.title("(B) The RNA Motif Atlas 3.2 + BayesPairing")
# plt.subplot(223)
# x = [
# [ rna.biorseoRawA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawA.predictions)],
# [ rna.biorseoRawB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoRawB.predictions)],
#]
# colors = ['red', 'firebrick']
# labels = ["$f_{1A}$", "$f_{1B}$"]
# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97])
# for patch, color in zip(bplot['boxes'], colors):
# patch.set_facecolor(color)
# plt.axvline(x=0, color="black", linewidth=1)
# plt.yticks([1.0+i for i in range(2)], labels)
# plt.xlabel("Improvement in MCC")
# plt.xlim((-1.1, 1.1))
# plt.title("(C) Rna3Dmotifs + Simple pattern matching")
# plt.subplot(224)
# x = [
# [ rna.biorseoBGSUJAR3DA.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DA.predictions)],
# [ rna.biorseoBGSUJAR3DB.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DB.predictions)],
# [ rna.biorseoBGSUJAR3DC.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DC.predictions)],
# [ rna.biorseoBGSUJAR3DD.max_mcc - rna.rnasubopt.max_mcc for rna in RNAcontainer if len(rna.biorseoBGSUJAR3DD.predictions)],
#]
# colors = ['darkturquoise', 'darkcyan', 'royalblue', 'navy']
# labels = ["$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$"]
# bplot = plt.boxplot(x, vert=False, patch_artist=True, notch=False, whis=[3,97])
# for patch, color in zip(bplot['boxes'], colors):
# patch.set_facecolor(color)
# plt.axvline(x=0, color="black", linewidth=1)
# plt.yticks([1.0+i for i in range(4)], labels)
# plt.xlabel("Improvement in MCC")
# plt.xlim((-1.1, 1.1))
# plt.title("(D) The RNA Motif Atlas 3.2 + JAR3D")
# plt.show()
# # insertion ratio of the best structure
# plt.subplot(221)
# x = [
...
...
Please
register
or
login
to post a comment