Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
biorseo
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Network
Create a new issue
Builds
Commits
Authored by
Nathalie BERNARD
2021-09-14 17:46:34 +0200
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
55bb6176ae96b106809bb13e0f25823a02231cca
55bb6176
1 parent
c6bdd341
Ajout de fonctions pour faire un boxplot pour les MCC de chaque séquence du benchmark
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
140 additions
and
17 deletions
Isaure_benchmark.py
Isaure_benchmark.py
View file @
55bb617
...
...
@@ -5,6 +5,8 @@ import os.path
from
math
import
sqrt
,
ceil
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
import
pandas
as
pd
log_path
=
"test.log"
...
...
@@ -22,7 +24,20 @@ def run_test(cmd, log):
log
.
flush
()
rc
=
process
.
poll
()
def
create_command
(
name
):
def
create_command_E
(
name
):
#cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " +
cmd
=
(
"python3 /local/local/BiorseoNath/biorseo.py -i "
+
"/local/local/BiorseoNath/data/fasta/"
+
name
+
".fa "
+
"-O results/ "
+
"--contacts "
+
"--patternmatch "
+
"--func E --MFE -v "
+
"--biorseo-dir /local/local/BiorseoNath "
+
"--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version "
)
return
cmd
def
create_command_F
(
name
):
#cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " +
cmd
=
(
"python3 /local/local/BiorseoNath/biorseo.py -i "
+
"/local/local/BiorseoNath/data/fasta/"
+
...
...
@@ -129,8 +144,8 @@ def specificity(tp, tn, fp, fn):
# ================== Code from Louis Beckey Benchark.py ==============================
def
write_mcc_in_file_E
(
sequence_id
,
true_contacts
,
true_structure
):
read_prd
=
open
(
"results/test_"
+
sequence_id
+
".json_pmE_M
EA
"
,
"r"
)
write
=
open
(
"results/test_"
+
sequence_id
+
".mcc_E_M
EA
"
,
"w"
)
read_prd
=
open
(
"results/test_"
+
sequence_id
+
".json_pmE_M
FE
"
,
"r"
)
write
=
open
(
"results/test_"
+
sequence_id
+
".mcc_E_M
FE
"
,
"w"
)
max_mcc_str
=
-
1
;
max_mcc_ctc
=
-
1
;
...
...
@@ -223,12 +238,16 @@ def set_axis_style(ax, labels):
ax
.
set_xlim
(
0.25
,
len
(
labels
)
+
0.75
)
ax
.
set_xlabel
(
'Sample name'
)
def
visualization
(
list_struct2d
,
list_contacts
,
function
,
color
,
lines_color
):
def
visualization
_best_mcc
(
list_struct2d
,
list_contacts
,
function
,
color
,
lines_color
):
np_struct2d
=
np
.
array
(
list_struct2d
)
np_contacts
=
np
.
array
(
list_contacts
)
data_to_plot
=
[
np_struct2d
,
np_contacts
]
median_2d
=
np
.
median
(
np_struct2d
)
median_ctc
=
np
.
median
(
np_contacts
)
print
(
"mediane 2D: "
+
str
(
median_2d
)
+
"
\n
"
)
print
(
"mediane ctc: "
+
str
(
median_ctc
)
+
"
\n
"
)
fig
=
plt
.
figure
()
...
...
@@ -249,14 +268,111 @@ def visualization(list_struct2d, list_contacts, function, color, lines_color):
for
v
in
violins
[
'bodies'
]:
v
.
set_facecolor
(
color
)
plt
.
savefig
(
'visualisation'
+
function
+
'.png'
,
bbox_inches
=
'tight'
)
plt
.
savefig
(
'visualisation_16_06_MFE_'
+
function
+
'.png'
,
bbox_inches
=
'tight'
)
def
get_list_structs_contacts
(
path_benchmark
,
estimator
,
function
):
myfile
=
open
(
path_benchmark
,
"r"
)
list_name
=
[]
complete_list_struct2d_F
=
[]
complete_list_contacts_F
=
[]
name
=
myfile
.
readline
()
contacts
=
myfile
.
readline
()
seq
=
myfile
.
readline
()
structure2d
=
myfile
.
readline
()
count
=
0
while
seq
:
name
=
name
[
6
:]
.
strip
()
count
=
count
+
1
file_path
=
"results/test_"
+
name
+
".json_pm"
+
function
+
"_"
+
estimator
if
os
.
path
.
isfile
(
file_path
):
file_result
=
open
(
file_path
,
"r"
)
list_struct2d_F
=
[]
list_contacts_F
=
[]
list_name
.
append
(
name
)
title_prd
=
file_result
.
readline
()
structure_prd
=
file_result
.
readline
()
sequence
=
structure_prd
while
structure_prd
:
structure_prd
=
file_result
.
readline
()
if
(
len
(
structure_prd
)
!=
0
):
mcc_tab
=
compare_two_structures
(
structure2d
,
structure_prd
[:
len
(
sequence
)])
mcc_str
=
mattews_corr_coeff
(
mcc_tab
[
0
],
mcc_tab
[
1
],
mcc_tab
[
2
],
mcc_tab
[
3
])
list_struct2d_F
.
append
(
mcc_str
)
contacts_prd
=
file_result
.
readline
()
if
(
len
(
contacts_prd
)
==
len
(
contacts
)):
mcc_tab
=
compare_two_contacts
(
contacts
,
contacts_prd
)
mcc_ctc
=
mattews_corr_coeff
(
mcc_tab
[
0
],
mcc_tab
[
1
],
mcc_tab
[
2
],
mcc_tab
[
3
])
list_contacts_F
.
append
(
mcc_ctc
)
complete_list_struct2d_F
.
append
(
list_struct2d_F
)
complete_list_contacts_F
.
append
(
list_contacts_F
)
name
=
myfile
.
readline
()
contacts
=
myfile
.
readline
()
seq
=
myfile
.
readline
()
structure2d
=
myfile
.
readline
()
return
[
list_name
,
complete_list_struct2d_F
,
complete_list_contacts_F
]
myfile
.
close
()
def
visualization_all_mcc
(
path_benchmark
,
estimator
,
function
,
color
,
lines_color
):
list_name
=
get_list_structs_contacts
(
path_benchmark
,
estimator
,
function
)[
0
]
tab_struct2d
=
get_list_structs_contacts
(
path_benchmark
,
estimator
,
function
)[
1
]
tab_contacts
=
get_list_structs_contacts
(
path_benchmark
,
estimator
,
function
)[
2
]
np_struct2d
=
np
.
array
(
tab_struct2d
)
size
=
len
(
tab_struct2d
)
list_median_str
=
[]
for
i
in
range
(
size
):
list_median_str
.
append
(
np
.
median
(
np_struct2d
[
i
]))
data
=
[
x
for
_
,
x
in
sorted
(
zip
(
list_median_str
,
tab_struct2d
))]
boxName
=
[
x
for
_
,
x
in
sorted
(
zip
(
list_median_str
,
list_name
))]
absciss
=
len
(
data
)
plt
.
figure
(
figsize
=
(
25
,
4
),
dpi
=
200
)
plt
.
xticks
(
rotation
=
90
)
plt
.
boxplot
(
data
)
for
i
in
range
(
absciss
):
y
=
data
[
i
]
x
=
np
.
random
.
normal
(
1
+
i
,
0.04
,
size
=
len
(
y
))
plt
.
scatter
(
x
,
y
)
plt
.
xticks
(
np
.
arange
(
1
,
absciss
+
1
),
boxName
)
plt
.
xlabel
(
'nom de la séquence'
)
plt
.
ylabel
(
'MCC'
)
plt
.
savefig
(
'visualisation_128arn_structure2d_'
+
estimator
+
"_"
+
function
+
'.png'
,
bbox_inches
=
'tight'
)
np_contacts
=
np
.
array
(
tab_contacts
)
size
=
len
(
tab_contacts
)
list_median_ctc
=
[]
for
i
in
range
(
size
):
list_median_ctc
.
append
(
np
.
median
(
np_contacts
[
i
]))
data
=
[
x
for
_
,
x
in
sorted
(
zip
(
list_median_ctc
,
tab_contacts
))]
boxName
=
[
x
for
_
,
x
in
sorted
(
zip
(
list_median_ctc
,
list_name
))]
absciss
=
len
(
data
)
plt
.
figure
(
figsize
=
(
25
,
4
),
dpi
=
200
)
plt
.
xticks
(
rotation
=
90
)
plt
.
boxplot
(
data
)
for
i
in
range
(
absciss
):
y
=
data
[
i
]
x
=
np
.
random
.
normal
(
1
+
i
,
0.04
,
size
=
len
(
y
))
plt
.
scatter
(
x
,
y
)
plt
.
xticks
(
np
.
arange
(
1
,
absciss
+
1
),
boxName
)
plt
.
xlabel
(
'nom de la séquence'
)
plt
.
ylabel
(
'MCC'
)
plt
.
savefig
(
'visualisation_128arn_contacts_'
+
estimator
+
"_"
+
function
+
'.png'
,
bbox_inches
=
'tight'
)
#cmd = ("cppsrc/Scripts/create")
#cmd0 = ("cppsrc/Scripts/addDelimiter")
#cmd1 = ("cppsrc/Scripts/countPattern")
#cmd2 = ("cppsrc/Scripts/deletePdb")
myfile
=
open
(
"data/modules/ISAURE/Motifs_version_initiale/benchmark.txt"
,
"r"
)
"""
myfile = open("data/modules/ISAURE/Motifs_version_initiale/benchmark.txt", "r")
name = myfile.readline()
contacts = myfile.readline()
seq = myfile.readline()
...
...
@@ -266,36 +382,43 @@ list_struct2d_E = []
list_contacts_E = []
list_struct2d_F = []
list_contacts_F = []
count
=
0
countE = 0
countF = 0
while seq:
name = name[6:].strip()
print(name)
"""
run_test(cmd2 + " " + name + ".fa", log)
print(cmd2 + " " + name + ".fa")
"""
cmd3
=
create_command
(
name
)
cmd3 = create_command
_E
(name)
os.system(cmd3)
"""file_path = "results/test_" + name + ".json_pmE_MEA
"
file_path = "results/test_" + name + ".json_pmE_MFE
"
if os.path.isfile(file_path):
tabE = write_mcc_in_file_E(name, contacts, structure2d)
list_contacts_E.append(tabE[0])
list_struct2d_E.append(tabE[1])"""
list_struct2d_E.append(tabE[1])
countE = countE + 1
cmd3 = create_command_F(name)
os.system(cmd3)
file_path = "results/test_" + name + ".json_pmF_MFE"
if os.path.isfile(file_path):
tabF = write_mcc_in_file_F(name, contacts, structure2d)
list_contacts_F.append(tabF[0])
list_struct2d_F.append(tabF[1])
count
=
count
+
1
count
F = countF
+ 1
name = myfile.readline()
contacts = myfile.readline()
seq = myfile.readline()
structure2d = myfile.readline()
"""visualization(list_struct2d_E, list_contacts_E, 'E', 'red', '#900C3F')"""
visualization
(
list_struct2d_F
,
list_contacts_F
,
'F'
,
'blue'
,
'#0900FF'
)
print
(
"count: "
+
str
(
count
)
+
"
\n
"
)
myfile
.
close
()
visualization_best_mcc(list_struct2d_E, list_contacts_E, 'E', 'red', '#900C3F')
visualization_best_mcc(list_struct2d_F, list_contacts_F, 'F', 'blue', '#0900FF')
print("countE: " + str(countE) + "
\n
")
print("countF: " + str(countF) + "
\n
")
myfile.close()"""
path_benchmark
=
"data/modules/ISAURE/Motifs_version_initiale/benchmark.txt"
visualization_all_mcc
(
path_benchmark
,
'MEA'
,
'F'
,
'blue'
,
'#0900FF'
)
\ No newline at end of file
...
...
Please
register
or
login
to post a comment