Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
biorseo
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2019-06-17 16:24:30 +0200
Browse Files
Options
Browse Files
Download
Plain Diff
Commit
720602576982d5fa19ed03e99769ef4a153edb05
72060257
2 parents
2e2ec263
73734e9d
Merge branch 'master' of
https://github.com/persalteas/biominserter
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
123 additions
and
120 deletions
biorseo.py
test_on_RNAstrand.py
biorseo.py
View file @
7206025
...
...
@@ -549,7 +549,7 @@ class BiorseoInstance:
cmd
=
[
"java"
,
"-jar"
,
jar3dexec
,
filename
,
ILmotifDir
+
"/all.txt"
,
loop
.
header
[
1
:]
+
".ILloop.csv"
,
loop
.
header
[
1
:]
+
".ILseq.csv"
]
nowhere
=
open
(
devnull
,
'w'
)
logfile
=
open
(
"
log_of_the_run.sh"
,
'a'
)
logfile
=
open
(
biorseoDir
+
"/
log_of_the_run.sh"
,
'a'
)
logfile
.
write
(
' '
.
join
(
cmd
))
logfile
.
write
(
"
\n
"
)
logfile
.
close
()
...
...
@@ -789,7 +789,7 @@ class BiorseoInstance:
if
c
==
0
:
seq
=
l
[:
-
1
]
.
upper
()
if
is_canonical_nts
(
seq
):
header
=
header
.
replace
(
'/'
,
'_'
)
.
replace
(
'
\'
'
,
''
)
.
replace
(
'('
,
''
)
.
replace
(
')'
,
''
)
header
=
header
.
replace
(
'/'
,
'_'
)
.
replace
(
'
\'
'
,
''
)
.
replace
(
'('
,
''
)
.
replace
(
')'
,
''
)
.
replace
(
' '
,
'_'
)
RNAcontainer
.
append
(
RNA
(
header
,
seq
))
if
not
path
.
isfile
(
self
.
outputf
+
header
+
".fa"
):
rna
=
open
(
self
.
outputf
+
header
+
".fa"
,
"w"
)
...
...
@@ -845,4 +845,4 @@ class BiorseoInstance:
self
.
joblist
.
append
(
Job
(
command
=
command
,
priority
=
priority
,
timeout
=
3600
,
how_many_in_parallel
=
3
))
BiorseoInstance
(
opts
)
BiorseoInstance
(
opts
)
\ No newline at end of file
...
...
test_on_RNAstrand.py
View file @
7206025
...
...
@@ -9,7 +9,7 @@ from matplotlib import colors
from
math
import
sqrt
from
multiprocessing
import
Pool
,
cpu_count
,
Manager
import
multiprocessing
import
ast
import
ast
,
time
# ================== DEFINITION OF THE PATHS ==============================
...
...
@@ -77,9 +77,8 @@ class MyPool(multiprocessing.pool.Pool):
kwargs
[
'context'
]
=
NoDaemonContext
()
super
(
MyPool
,
self
)
.
__init__
(
*
args
,
**
kwargs
)
exit
()
def
execute_job
(
j
):
def
execute_job
(
j
):
if
j
.
checkFunc_
is
not
None
:
if
j
.
checkFunc_
(
*
j
.
checkArgs_
):
running_stats
[
2
]
+=
1
...
...
@@ -223,6 +222,7 @@ def launch_JAR3D_worker(loop):
return
insertion_sites
def
launch_JAR3D
(
seq_
,
basename
):
time1
=
time
.
time
()
rnasubopt_preds
=
[]
# Extracting probable loops from RNA-subopt structures
rna
=
open
(
outputDir
+
basename
+
".subopt"
,
"r"
)
...
...
@@ -270,9 +270,10 @@ def launch_JAR3D(seq_, basename):
positions
.
append
(
"-,-"
)
resultsfile
.
write
(
','
.
join
(
positions
)
+
'
\n
'
)
resultsfile
.
close
()
time2
=
time
.
time
()
print
(
"<
%
s |
%.3
fs"
%
(
basename
,
time2
-
time1
))
def
launch_BayesPairing
(
module_type
,
seq_
,
header_
,
basename
):
chdir
(
bypdir
)
cmd
=
[
"python3"
,
"parse_sequences.py"
,
"-seq"
,
outputDir
+
basename
+
".fa"
,
"-d"
,
module_type
,
"-interm"
,
"1"
]
...
...
@@ -281,6 +282,7 @@ def launch_BayesPairing(module_type, seq_, header_, basename):
logfile
.
write
(
"
\n
"
)
logfile
.
close
()
chdir
(
bypdir
)
out
=
subprocess
.
check_output
(
cmd
)
.
decode
(
'utf-8'
)
BypLog
=
out
.
split
(
'
\n
'
)
idx
=
0
...
...
@@ -1176,43 +1178,43 @@ for instance in RNAcontainer:
instance
.
evaluate
()
x_PK
=
[
[
rna
.
biokop
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biokop
.
predictions
)],
[
rna
.
biokop
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biokop
.
predictions
)],
[
rna
.
biorseoRawA
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoRawA
.
predictions
)],
[
rna
.
biorseoRawB
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoRawB
.
predictions
)],
[
rna
.
biorseoBayesPairA
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairA
.
predictions
)],
[
rna
.
biorseoBayesPairB
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairB
.
predictions
)],
[
rna
.
biorseoBayesPairC
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairC
.
predictions
)],
[
rna
.
biorseoBayesPairD
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairD
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DA
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DA
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DB
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DB
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DC
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DC
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DD
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DD
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairA
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairA
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairB
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairB
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairC
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairC
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairD
.
avg
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairD
.
predictions
)]
[
rna
.
biokop
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biokop
.
predictions
)],
[
rna
.
biokop
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biokop
.
predictions
)],
[
rna
.
biorseoRawA
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoRawA
.
predictions
)],
[
rna
.
biorseoRawB
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoRawB
.
predictions
)],
[
rna
.
biorseoBayesPairA
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairA
.
predictions
)],
[
rna
.
biorseoBayesPairB
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairB
.
predictions
)],
[
rna
.
biorseoBayesPairC
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairC
.
predictions
)],
[
rna
.
biorseoBayesPairD
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairD
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DA
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DA
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DB
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DB
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DC
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DC
.
predictions
)],
[
rna
.
biorseoBGSUJAR3DD
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUJAR3DD
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairA
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairA
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairB
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairB
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairC
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairC
.
predictions
)],
[
rna
.
biorseoBGSUBayesPairD
.
max
_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBGSUBayesPairD
.
predictions
)]
]
RNAs_fully_predicted
=
[
x
for
x
in
RNAcontainer
if
x
.
has_complete_results
(
True
)]
x_PK_fully
=
[
[
rna
.
biokop
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biokop
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoRawA
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoRawB
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairA
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairB
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairC
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairD
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DA
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DB
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DC
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DD
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairA
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairB
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairC
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairD
.
avg
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biokop
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biokop
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoRawA
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoRawB
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairA
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairB
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairC
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBayesPairD
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DA
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DB
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DC
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUJAR3DD
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairA
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairB
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairC
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
biorseoBGSUBayesPairD
.
max
_mcc
for
rna
in
RNAs_fully_predicted
],
]
# We ensure having the same number of RNAs in every sample by discarding the one for which computations did not ended/succeeded.
print
()
...
...
@@ -1260,87 +1262,88 @@ test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[11])
print
(
"Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dD are equal', p-value = "
,
test
.
pvalue
)
# # ================== Print results for application cases =====================
# labels = ["Biokop","Biokop","RawA","RawB","BayesPairingA","BayesPairingB","BayesPairingC","BayesPairingD","JAR3DA","JAR3DB","JAR3DC","JAR3DD","BGSUBayesPairingA","BGSUBayesPairingB","BGSUBayesPairingC","BGSUBayesPairingD"]
# print("RNAsubopt",":",x_noPK[0])
# print("RNA-MOIP",":",x_noPK[1])
# for data, name in zip(x_PK, labels):
# print(name,":",data)
# labels = ["RNAsubopt","Biokop\t", "RNA-MoIP\t","RawA\t","RawB\t","BayesPairingA","BayesPairingB","BayesPairingC","BayesPairingD","JAR3DA\t","JAR3DB\t","JAR3DC\t","JAR3DD\t","BGSUBPairingA","BGSUBPairingB","BGSUBPairingC","BGSUBPairingD"]
# for r in RNAcontainer:
# print("\n",r.header_,"\nTrue structure:\t", r.true2d)
# for m, name in zip([r.rnasubopt, r.biokop, r.rnamoip,
# r.biorseoRawA,
# r.biorseoRawB,
# r.biorseoBayesPairA,
# r.biorseoBayesPairB,
# r.biorseoBayesPairC,
# r.biorseoBayesPairD,
# r.biorseoBGSUJAR3DA,
# r.biorseoBGSUJAR3DB,
# r.biorseoBGSUJAR3DC,
# r.biorseoBGSUJAR3DD,
# r.biorseoBGSUBayesPairA,
# r.biorseoBGSUBayesPairB,
# r.biorseoBGSUBayesPairC,
# r.biorseoBGSUBayesPairD ], labels):
# print(name+":\t",m.best_pred)
# ================= PLOTS OF RESULTS =======================================
merge
=
[
x_noPK
[
0
],
# RNA subopt
x_noPK
[
1
],
# RNA-MoIP
x_PK
[
0
],
# Biokop
x_PK
[
2
],
#biorseoRawA
x_PK
[
3
],
#biorseoRawB
x_PK
[
4
],
#biorseoBayesPairA
x_PK
[
5
],
#biorseoBayesPairB
x_PK
[
6
],
#biorseoBayesPairC
x_PK
[
7
],
#biorseoBayesPairD
x_PK
[
8
],
#biorseoBGSUJAR3DA
x_PK
[
9
],
#biorseoBGSUJAR3DB
x_PK
[
10
],
#biorseoBGSUJAR3DC
x_PK
[
11
],
#biorseoBGSUJAR3DD
x_PK
[
12
],
#biorseoBGSUBayesPairA
x_PK
[
13
],
#biorseoBGSUBayesPairB
x_PK
[
14
],
#biorseoBGSUBayesPairC
x_PK
[
15
],
#biorseoBGSUBayesPairD
]
# ================== Print results for application cases =====================
labels
=
[
"Biokop"
,
"Biokop"
,
"RawA"
,
"RawB"
,
"BayesPairingA"
,
"BayesPairingB"
,
"BayesPairingC"
,
"BayesPairingD"
,
"JAR3DA"
,
"JAR3DB"
,
"JAR3DC"
,
"JAR3DD"
,
"BGSUBayesPairingA"
,
"BGSUBayesPairingB"
,
"BGSUBayesPairingC"
,
"BGSUBayesPairingD"
]
print
(
"RNAsubopt"
,
":"
,
x_noPK
[
0
])
print
(
"RNA-MOIP"
,
":"
,
x_noPK
[
1
])
for
data
,
name
in
zip
(
x_PK
,
labels
):
print
(
name
,
":"
,
data
)
labels
=
[
"RNAsubopt"
,
"Biokop
\t
"
,
"RNA-MoIP
\t
"
,
"RawA
\t
"
,
"RawB
\t
"
,
"BayesPairingA"
,
"BayesPairingB"
,
"BayesPairingC"
,
"BayesPairingD"
,
"JAR3DA
\t
"
,
"JAR3DB
\t
"
,
"JAR3DC
\t
"
,
"JAR3DD
\t
"
,
"BGSUBPairingA"
,
"BGSUBPairingB"
,
"BGSUBPairingC"
,
"BGSUBPairingD"
]
for
r
in
RNAcontainer
:
print
(
"
\n
"
,
r
.
header_
,
"
\n
True structure:
\t
"
,
r
.
true2d
)
for
m
,
name
in
zip
([
r
.
rnasubopt
,
r
.
biokop
,
r
.
rnamoip
,
r
.
biorseoRawA
,
r
.
biorseoRawB
,
r
.
biorseoBayesPairA
,
r
.
biorseoBayesPairB
,
r
.
biorseoBayesPairC
,
r
.
biorseoBayesPairD
,
r
.
biorseoBGSUJAR3DA
,
r
.
biorseoBGSUJAR3DB
,
r
.
biorseoBGSUJAR3DC
,
r
.
biorseoBGSUJAR3DD
,
r
.
biorseoBGSUBayesPairA
,
r
.
biorseoBGSUBayesPairB
,
r
.
biorseoBGSUBayesPairC
,
r
.
biorseoBGSUBayesPairD
],
labels
):
print
(
name
+
":
\t
"
,
m
.
best_pred
,
"
%.2
f"
%
m
.
max_mcc
,
m
.
n_pred
)
# # ================= PLOTS OF RESULTS =======================================
# merge = [
# x_PK[0], # Biokop
# x_noPK[0], # RNA subopt
# x_noPK[1], # RNA-MoIP
# x_PK[2], #biorseoRawA
# x_PK[3], #biorseoRawB
# x_PK[4], #biorseoBayesPairA
# x_PK[5], #biorseoBayesPairB
# x_PK[6], #biorseoBayesPairC
# x_PK[7], #biorseoBayesPairD
# x_PK[8], #biorseoBGSUJAR3DA
# x_PK[9], #biorseoBGSUJAR3DB
# x_PK[10], #biorseoBGSUJAR3DC
# x_PK[11], #biorseoBGSUJAR3DD
# x_PK[12], #biorseoBGSUBayesPairA
# x_PK[13], #biorseoBGSUBayesPairB
# x_PK[14], #biorseoBGSUBayesPairC
# x_PK[15], #biorseoBGSUBayesPairD
# ]
colors
=
[
'blue'
,
'goldenrod'
,
'green
'
,
'red'
,
'firebrick'
,
'limegreen'
,
'olive'
,
'forestgreen'
,
'lime'
,
'darkcyan'
,
'royalblue'
,
'navy'
,
'limegreen
'
,
'olive
'
,
'forestgreen
'
,
'lime'
]
labels
=
[
"RNAsubopt"
,
"RNA-MoIP
"
,
"Biokop"
,
"$f_{1A}$"
,
"$f_{1B}$"
,
"$f_{1A}$"
,
"$f_{1B}$"
,
"$f_{1C}$"
,
"$f_{1D}$"
,
"$f_{1A}$"
,
"$f_{1B}$"
,
"$f_{1C}$"
,
"$f_{1D}$"
,
"$f_{1A}$"
,
"$f_{1B}$"
,
"$f_{1C}$"
,
"$f_{1D}$"
]
# colors = [ 'green', 'blue', 'goldenrod
',
#
'red',
#
'firebrick',
#
'limegreen',
#
'olive',
#
'forestgreen',
#
'lime',
# 'darkturquoise',
# 'darkcyan',
# 'royalblue',
# 'navy
',
# 'limegreen
',
# 'olive
',
# 'forestgreen',
# 'lime'
# ]
# labels = [ "Biokop", "RNAsubopt
",
# "RNA-MoIP",
#
"$f_{1A}$",
#
"$f_{1B}$",
#
"$f_{1A}$",
#
"$f_{1B}$",
#
"$f_{1C}$",
#
"$f_{1D}$",
#
"$f_{1A}$",
#
"$f_{1B}$",
#
"$f_{1C}$",
#
"$f_{1D}$",
#
"$f_{1A}$",
#
"$f_{1B}$",
#
"$f_{1C}$",
#
"$f_{1D}$"
#
]
# for y in [ i/10 for i in range(11) ]:
...
...
@@ -1371,7 +1374,7 @@ labels = [ "RNAsubopt",
# # plt.axhline(y=0, color="black", linewidth=1)
# # plt.axhline(y=1, color="black", linewidth=1)
# plt.xticks([1.0+i for i in range(16)], labels[1:])
# plt.ylim((0.
5
, 1.01))
# plt.ylim((0.
4
, 1.01))
# plt.ylabel("MCC", fontsize=12)
# plt.subplots_adjust(left=0.05, right=0.95)
# # plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0]))
...
...
Please
register
or
login
to post a comment