Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
biorseo
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2019-05-23 09:57:03 +0200
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
350f4e49f618ede0f8679890e23dc8da85390946
350f4e49
1 parent
7ea01976
Applications cases
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
22 deletions
.gitignore
example_data/applications.dbn
test_on_RNAstrand.py
.gitignore
View file @
350f4e4
...
...
@@ -21,5 +21,7 @@ bin/*
# results
results/*
log_of_the_run.sh
logBadDesc.txt
gurobi.log
IL*
HL*
...
...
example_data/applications.dbn
View file @
350f4e4
>E.coli_alpha_operon_mRNA
UGUGCGUUUCCAUUUGAGUAUCCUGAAAACGGGCUUUUCAGCAUGGAACGUACAUAUUAAAUAGUAGGAGUGCAUAGUGGCCCGUAUAGCAGGCAUUAACAUUCCUGA
(((((((.(((((........[[[[....[[[[....{{{{.))))))))))))..........................]]]].....]]]]...........}}}}
>>__'CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE_'_(PDB_00376)
GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
((((((..(((.........)))((((((((...))))))))...(((((.......))))))))))).....
>__'GUANINE_RIBOSWITCH_U22C,_A52G_MUTANT_BOUND_TO_HYPOXANTHINE_'_(PDB_01023)
GGACAUACAAUCGCGUGGAUAUGGCACGCAAGUUUCUGCCGGGCACCGUAAAUGUCCGACUAUGUCCa
(((((((...(((((((.[[..[[)))))))........((((((]]...]]))))))..))))))).
\ No newline at end of file
(((((((...(((((((.[[..[[)))))))........((((((]]...]]))))))..))))))).
>__'SOLUTION_STRUCTURE_OF_THE_P2B-P3_PSEUDOKNOT_FROM_HUMAN_TELOMERASE_RNA_'_(PDB_00857)
GGGCUGUUUUUCUCGCUGACUUUCAGCCCCAAACAAAAAAGUCAGCA
[[[[[[........(((((((((]]]]]]........))))))))).
\ No newline at end of file
...
...
test_on_RNAstrand.py
View file @
350f4e4
...
...
@@ -13,22 +13,23 @@ import ast
# ================== DEFINITION OF THE PATHS ==============================
biorseoDir
=
"."
runDir
=
path
.
dirname
(
path
.
realpath
(
__file__
))
dataFile
=
argv
[
1
]
outputDir
=
biorseoDir
+
"/results/"
# Retrieve Jar3D Paths from file EditMe
# Retrieve Paths from file EditMe
jar3dexec
=
""
HLmotifDir
=
""
ILmotifDir
=
""
descfolder
=
""
bypdir
=
""
biorseoDir
=
"."
exec
(
compile
(
open
(
biorseoDir
+
"/EditMe"
)
.
read
(),
''
,
'exec'
))
runDir
=
path
.
dirname
(
path
.
realpath
(
__file__
))
dataFile
=
argv
[
1
]
outputDir
=
biorseoDir
+
"/results/"
# Create some folders to store the results
subprocess
.
call
([
"mkdir"
,
"-p"
,
outputDir
])
subprocess
.
call
([
"mkdir"
,
"-p"
,
outputDir
+
"PK/"
])
subprocess
.
call
([
"mkdir"
,
"-p"
,
outputDir
+
"noPK/"
])
m
=
Manager
()
running_stats
=
m
.
list
()
running_stats
.
append
(
0
)
# n_launched
...
...
@@ -727,6 +728,7 @@ class RNA:
m
.
max_mcc
=
max
(
mccs
)
m
.
min_mcc
=
min
(
mccs
)
m
.
avg_mcc
=
sum
(
mccs
)
/
float
(
len
(
mccs
))
m
.
best_pred
=
sec_structs
[
mccs
.
index
(
m
.
max_mcc
)]
for
p
,
n
in
zip
(
m
.
predictions
,
m
.
ninsertions
):
if
not
')'
in
p
:
continue
...
...
@@ -1001,7 +1003,7 @@ for nt, number in ignored_nt_dict.items():
tot
=
len
(
RNAcontainer
)
print
(
"Loaded
%
d RNAs of length between 10 and 100.
%
d of them contain pseudoknots."
%
(
tot
,
pk_counter
))
# ================= PREDICTION OF STRUCTURES ===============================
#
#
================= PREDICTION OF STRUCTURES ===============================
# # define job list
# joblist = []
...
...
@@ -1050,7 +1052,7 @@ print("Loaded %d RNAs of length between 10 and 100. %d of them contain pseudokno
# # RNA MoIP
# joblist.append(Job(function=launch_RNAMoIP, args=[instance.seq_, instance.header_, basename], priority=3, timeout=3600, checkFunc=check_RNAMoIP, checkArgs=[basename]))
# # Biokop
# joblist.append(Job(command=[biorseoDir + "../biokop/biokop", "-n1", "-i", outputDir + basename + ".fa", "-o", outputDir + basename + ".biok"], priority=5, timeout=15000, how_many_in_parallel=3, checkFunc=check_biokop, checkArgs=[basename]))
# joblist.append(Job(command=[biorseoDir + "
/
../biokop/biokop", "-n1", "-i", outputDir + basename + ".fa", "-o", outputDir + basename + ".biok"], priority=5, timeout=15000, how_many_in_parallel=3, checkFunc=check_biokop, checkArgs=[basename]))
# # execute jobs
...
...
@@ -1118,6 +1120,7 @@ x_noPK = [
[
rna
.
biorseoBayesPairC
.
max_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairC
.
predictions
)],
[
rna
.
biorseoBayesPairD
.
max_mcc
for
rna
in
RNAcontainer
if
len
(
rna
.
biorseoBayesPairD
.
predictions
)],
]
x_noPK_fully
=
[
[
rna
.
rnasubopt
.
max_mcc
for
rna
in
RNAs_fully_predicted
],
[
rna
.
rnamoip
.
max_mcc
for
rna
in
RNAs_fully_predicted
],
...
...
@@ -1257,6 +1260,33 @@ test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[11])
print
(
"Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dD are equal', p-value = "
,
test
.
pvalue
)
# ================== Print results for application cases =====================
labels
=
[
"Biokop"
,
"Biokop"
,
"RawA"
,
"RawB"
,
"BayesPairingA"
,
"BayesPairingB"
,
"BayesPairingC"
,
"BayesPairingD"
,
"JAR3DA"
,
"JAR3DB"
,
"JAR3DC"
,
"JAR3DD"
,
"BGSUBayesPairingA"
,
"BGSUBayesPairingB"
,
"BGSUBayesPairingC"
,
"BGSUBayesPairingD"
]
print
(
"RNAsubopt"
,
":"
,
x_noPK
[
0
])
print
(
"RNA-MOIP"
,
":"
,
x_noPK
[
1
])
for
data
,
name
in
zip
(
x_PK
,
labels
):
print
(
name
,
":"
,
data
)
labels
=
[
"RNAsubopt"
,
"Biokop
\t
"
,
"RNA MoIP
\t
"
,
"RawA
\t
"
,
"RawB
\t
"
,
"BayesPairingA"
,
"BayesPairingB"
,
"BayesPairingC"
,
"BayesPairingD"
,
"JAR3DA
\t
"
,
"JAR3DB
\t
"
,
"JAR3DC
\t
"
,
"JAR3DD
\t
"
,
"BGSUBPairingA"
,
"BGSUBPairingB"
,
"BGSUBPairingC"
,
"BGSUBPairingD"
]
for
r
in
RNAcontainer
:
print
(
"
\n
"
,
r
.
header_
,
"
\n
True structure:
\t
"
,
r
.
true2d
)
for
m
,
name
in
zip
([
r
.
rnasubopt
,
r
.
biokop
,
r
.
rnamoip
,
r
.
biorseoRawA
,
r
.
biorseoRawB
,
r
.
biorseoBayesPairA
,
r
.
biorseoBayesPairB
,
r
.
biorseoBayesPairC
,
r
.
biorseoBayesPairD
,
r
.
biorseoBGSUJAR3DA
,
r
.
biorseoBGSUJAR3DB
,
r
.
biorseoBGSUJAR3DC
,
r
.
biorseoBGSUJAR3DD
,
r
.
biorseoBGSUBayesPairA
,
r
.
biorseoBGSUBayesPairB
,
r
.
biorseoBGSUBayesPairC
,
r
.
biorseoBGSUBayesPairD
],
labels
):
print
(
name
+
":
\t
"
,
m
.
best_pred
)
# # ================= PLOTS OF RESULTS =======================================
# merge = [ x_PK_fully[0], # Biokop
...
...
@@ -1357,16 +1387,6 @@ print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop
# plt.show()
def
isbetter
(
rna
):
# if rna.rnasubopt.max_mcc > rna.biorseoBGSUJAR3DA.max_mcc: return False
# if rna.biokop.max_mcc > rna.biorseoBGSUJAR3DA.max_mcc: return False
if
rna
.
rnasubopt
.
max_mcc
>
rna
.
biorseoRawA
.
max_mcc
:
return
False
# if rna.biokop.max_mcc > rna.biorseoRawA.max_mcc: return False
return
True
x_names
=
[
rna
.
header_
for
rna
in
RNAs_fully_predicted
if
len
(
rna
.
seq_
)
>
50
and
"TRNA"
not
in
rna
.
header_
and
isbetter
(
rna
)
]
for
r
in
x_names
:
print
(
r
)
# # ================== MCC performance ====================================
# # plt.subplot(141)
# x = [
...
...
Please
register
or
login
to post a comment