Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
RNANet
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Aglaé TABOT
2021-06-15 10:36:07 +0200
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
e20ca4110b26f173cb7d50f5fc546d839ab1a551
e20ca411
1 parent
c0c2efcc
Added functions appariements, parse, graph_basepairs
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
337 additions
and
7 deletions
statistics.py
statistics.py
View file @
e20ca41
...
...
@@ -17,6 +17,7 @@ import scipy.cluster.hierarchy as sch
import
sklearn
import
json
import
pickle
import
Bio
from
scipy.spatial.distance
import
squareform
from
mpl_toolkits.mplot3d
import
axes3d
from
Bio
import
AlignIO
,
SeqIO
...
...
@@ -1639,7 +1640,7 @@ def dist_atoms_hire_RNA (f) :
setproctitle
(
f
"RNANet statistics.py Worker {thr_idx+1} dist_atoms_hire_RNA({f})"
)
parser
=
MMCIFParser
()
s
=
parser
.
get_structure
(
name
,
os
.
path
.
abspath
(
"/home/data/RNA/3D/
rna_only/"
+
f
))
s
=
parser
.
get_structure
(
name
,
os
.
path
.
abspath
(
path_to_3D_data
+
"
rna_only/"
+
f
))
chain
=
next
(
s
[
0
]
.
get_chains
())
residues
=
list
(
chain
.
get_residues
())
pbar
=
tqdm
(
total
=
len
(
residues
),
position
=
thr_idx
+
1
,
desc
=
f
"Worker {thr_idx+1}: {f} dist_atoms_hire_RNA"
,
unit
=
"residu"
,
leave
=
False
)
...
...
@@ -1800,7 +1801,7 @@ def angles_torsion_hire_RNA(f):
os
.
makedirs
(
runDir
+
"/results/HiRE-RNA/torsions/"
,
exist_ok
=
True
)
parser
=
MMCIFParser
()
s
=
parser
.
get_structure
(
name
,
os
.
path
.
abspath
(
"/home/data/RNA/3D/
rna_only/"
+
f
))
s
=
parser
.
get_structure
(
name
,
os
.
path
.
abspath
(
path_to_3D_data
+
"
rna_only/"
+
f
))
chain
=
next
(
s
[
0
]
.
get_chains
())
residues
=
list
(
chain
.
get_residues
())
pbar
=
tqdm
(
total
=
len
(
residues
),
position
=
thr_idx
+
1
,
desc
=
f
"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA"
,
unit
=
"residu"
,
leave
=
False
)
...
...
@@ -1926,7 +1927,7 @@ def angles_plans_hire_RNA(f):
os
.
makedirs
(
runDir
+
"/results/HiRE-RNA/angles/"
,
exist_ok
=
True
)
parser
=
MMCIFParser
()
s
=
parser
.
get_structure
(
name
,
os
.
path
.
abspath
(
"/home/data/RNA/3D/
rna_only/"
+
f
))
s
=
parser
.
get_structure
(
name
,
os
.
path
.
abspath
(
path_to_3D_data
+
"
rna_only/"
+
f
))
chain
=
next
(
s
[
0
]
.
get_chains
())
residues
=
list
(
chain
.
get_residues
())
pbar
=
tqdm
(
total
=
len
(
residues
),
position
=
thr_idx
+
1
,
desc
=
f
"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA"
,
unit
=
"residu"
,
leave
=
False
)
...
...
@@ -2045,6 +2046,7 @@ def GMM_histo(data, name_data, x, y, nb_fichiers) :
def
GMM_histo_without_saving
(
data
,
name_data
,
x
,
y
,
nb_fichiers
)
:
'''
Plot Gaussian-Mixture-Model on histograms
But without saving and close the figure
'''
histogram
(
data
,
name_data
,
x
,
y
,
nb_fichiers
)
#plot the histogram
...
...
@@ -2564,6 +2566,7 @@ def dictio(ld):
key = pdb identifier of the structure
value = list of RNA chains
'''
dictionnaire
=
dict
()
pdb
=
[]
for
f
in
ld
:
pdb_id
=
str
.
split
(
f
,
'_'
)[
0
]
...
...
@@ -2648,19 +2651,335 @@ def graphe(type_LW, angle_1, angle_2, angle_3, angle_4, distance, nb):
plt
.
subplot
(
2
,
1
,
1
)
if
len
(
angle_1
)
>
0
:
GMM_tot
(
angle_1
,
"C4'-C1'-B1"
,
nb
,
'cyan'
)
if
len
(
angle_2
)
>
0
:
GMM_tot
(
angle_2
,
"C1'-B1-B1pair"
,
nb
,
'magenta'
)
if
len
(
angle_3
)
>
0
:
GMM_tot
(
angle_3
,
"B1-B1pair-C1'pair"
,
nb
,
"yellow"
)
if
len
(
angle_4
)
>
0
:
GMM_tot
(
angle_4
,
"B1pair-C1'pair-C4'pair"
,
nb
,
'olive'
)
plt
.
xlabel
(
"Angle(degré)"
)
plt
.
title
(
"GMM des angles plans pour les appariements "
+
type_LW
+
" ("
+
str
(
nb
)
+
" valeurs)"
,
fontsize
=
10
)
plt
.
subplot
(
2
,
1
,
2
)
if
len
(
distance
)
>
0
:
GMM_histo_without_saving
(
distance
,
"Distance pointes "
+
type_LW
,
"Distance (Angström)"
,
"Densité"
,
nb
)
plt
.
savefig
(
"Mesures appariements "
+
type_LW
+
" ("
+
str
(
nb
)
+
" valeurs).png"
)
plt
.
close
()
def
appariements
(
chain
,
df
):
'''
Cleanup of the dataset
measurements of distances and angles between paired nucleotides in the chain
'''
liste_dist
=
[]
warnings
.
simplefilter
(
action
=
"ignore"
,
category
=
SettingWithCopyWarning
)
pairs
=
df
[[
'index_chain'
,
'old_nt_resnum'
,
'paired'
,
'pair_type_LW'
]]
# columns we keep
for
i
in
range
(
pairs
.
shape
[
0
]):
#we remove the lines where no pairing (NaN in paired)
index_with_nan
=
pairs
.
index
[
pairs
.
iloc
[:,
2
]
.
isnull
()]
pairs
.
drop
(
index_with_nan
,
0
,
inplace
=
True
)
paired_int
=
[]
for
i
in
pairs
.
index
:
# convert values from paired to integers or lists of integers
paired
=
pairs
.
at
[
i
,
'paired'
]
if
type
(
paired
)
is
np
.
int64
or
type
(
paired
)
is
np
.
float64
:
paired_int
.
append
(
int
(
paired
))
else
:
#strings
if
len
(
paired
)
<
3
:
#a single pairing
paired_int
.
append
(
int
(
paired
))
else
:
#several pairings
paired
=
paired
.
split
(
','
)
l
=
[
int
(
i
)
for
i
in
paired
]
paired_int
.
append
(
l
)
pair_type_LW_bis
=
[]
for
j
in
pairs
.
index
:
pair_type_LW
=
pairs
.
at
[
j
,
'pair_type_LW'
]
if
len
(
pair_type_LW
)
<
4
:
#a single pairing
pair_type_LW_bis
.
append
(
pair_type_LW
)
else
:
#several pairings
pair_type_LW
=
pair_type_LW
.
split
(
','
)
l
=
[
i
for
i
in
pair_type_LW
]
pair_type_LW_bis
.
append
(
pair_type_LW
)
#addition of these new columns
pairs
.
insert
(
4
,
"paired_int"
,
paired_int
,
True
)
pairs
.
insert
(
5
,
"pair_type_LW_bis"
,
pair_type_LW_bis
,
True
)
indexNames
=
pairs
[
pairs
[
'paired_int'
]
==
0
]
.
index
pairs
.
drop
(
indexNames
,
inplace
=
True
)
#deletion of lines with a 0 in paired_int (matching to another RNA chain)
for
i
in
pairs
.
index
:
'''
calculations for each row of the pairs dataset
'''
resseq
=
pairs
.
at
[
i
,
'old_nt_resnum'
]
#number of the residue in the chain
#code to delete letters in old_nt_resnum
icode_res
=
' '
if
type
(
resseq
)
is
str
:
if
resseq
[
0
]
!=
'-'
:
while
resseq
.
isdigit
()
is
False
:
l
=
len
(
resseq
)
if
icode_res
==
' '
:
icode_res
=
resseq
[
l
-
1
]
else
:
icode_res
=
resseq
[
l
-
1
]
+
icode_res
resseq
=
resseq
[:
l
-
1
]
resseq
=
int
(
resseq
)
index
=
pairs
.
at
[
i
,
'index_chain'
]
type_LW
=
pairs
.
at
[
i
,
'pair_type_LW_bis'
]
#pairing type
num_paired
=
pairs
.
at
[
i
,
'paired_int'
]
#number (index_chain) of the paired nucleotide
if
type
(
num_paired
)
is
int
or
type
(
num_paired
)
is
np
.
int64
:
l
=
pairs
[
pairs
[
'index_chain'
]
==
num_paired
]
.
index
.
to_list
()
resnum
=
pairs
.
at
[
l
[
0
],
'old_nt_resnum'
]
icode_pair
=
' '
if
type
(
resnum
)
is
str
:
if
resnum
[
0
]
!=
'-'
:
while
resnum
.
isdigit
()
is
False
:
l
=
len
(
resnum
)
if
icode_pair
==
' '
:
icode_pair
=
resnum
[
l
-
1
]
else
:
icode_pair
=
resnum
[
l
-
1
]
+
icode_pair
resnum
=
resnum
[:
l
-
1
]
resnum
=
int
(
resnum
)
try
:
d
=
dist_pointes
(
chain
[(
' '
,
resseq
,
icode_res
)],
chain
[(
' '
,
resnum
,
icode_pair
)])
#calculation of the distance between the tips of the paired nucleotides
angle
=
angle_c1_b1
(
chain
[(
' '
,
resseq
,
icode_res
)],
chain
[(
' '
,
resnum
,
icode_pair
)])
if
d
!=
0.0
:
liste_dist
.
append
([
chain
,
type_LW
,
resseq
,
resnum
,
d
,
angle
[
0
],
angle
[
1
],
angle
[
2
],
angle
[
3
]])
except
:
pass
else
:
for
j
in
range
(
len
(
num_paired
)):
#if several pairings, process them one by one
if
num_paired
[
j
]
!=
0
:
l
=
pairs
[
pairs
[
'index_chain'
]
==
num_paired
[
j
]]
.
index
.
to_list
()
resnum
=
pairs
.
at
[
l
[
0
],
'old_nt_resnum'
]
icode_pair
=
' '
if
type
(
resnum
)
is
str
:
if
resnum
[
0
]
!=
'-'
:
while
resnum
.
isdigit
()
is
False
:
l
=
len
(
resnum
)
if
icode_pair
==
' '
:
icode_pair
=
resnum
[
l
-
1
]
else
:
icode_pair
=
resnum
[
l
-
1
]
+
icode_pair
resnum
=
resnum
[:
l
-
1
]
resnum
=
int
(
resnum
)
try
:
d
=
dist_pointes
(
chain
[(
' '
,
resseq
,
icode_res
)],
chain
[(
' '
,
resnum
,
icode_pair
)])
angle
=
angle_c1_b1
(
chain
[(
' '
,
resseq
,
icode_res
)],
chain
[(
' '
,
resnum
,
icode_pair
)])
if
d
!=
0.0
:
liste_dist
.
append
([
chain
,
type_LW
[
j
],
resseq
,
resnum
,
d
,
angle
[
0
],
angle
[
1
],
angle
[
2
],
angle
[
3
]])
except
:
pass
return
(
liste_dist
)
def
parse
(
cle
):
l
=
[]
os
.
makedirs
(
runDir
+
"/results/basepairs/"
,
exist_ok
=
True
)
with
warnings
.
catch_warnings
():
# Ignore the PDB problems. This mostly warns that some chain is discontinuous.
warnings
.
simplefilter
(
'ignore'
,
Bio
.
PDB
.
PDBExceptions
.
PDBConstructionWarning
)
warnings
.
simplefilter
(
'ignore'
,
Bio
.
PDB
.
PDBExceptions
.
BiopythonWarning
)
parser
=
MMCIFParser
()
s
=
parser
.
get_structure
(
cle
,
os
.
path
.
abspath
(
path_to_3D_data
+
"RNAcifs/"
+
cle
+
".cif"
))
for
model
in
s
:
for
valeur
in
dictionnaire
[
cle
]:
if
len
(
valeur
)
>
2
:
df_tot
=
[]
for
id_chain
in
valeur
:
for
data
in
ld
:
if
(
len
(
data
)
<
10
):
#unmapped
chaine
=
str
.
split
(
data
,
'_'
)
if
chaine
[
0
]
==
cle
and
chaine
[
2
]
==
id_chain
:
df
=
pd
.
read_csv
(
os
.
path
.
abspath
(
path_to_3D_data
+
"datapoints/"
+
data
))
if
df
[
'index_chain'
][
0
]
==
1
:
#ignore files with numbering errors
l
=
appariements
(
model
[
id_chain
],
df
)
else
:
for
data
in
ld
:
if
(
len
(
data
)
<
10
):
#unmapped
chaine
=
str
.
split
(
data
,
'_'
)
if
chaine
[
0
]
==
cle
and
chaine
[
2
]
==
valeur
:
df
=
pd
.
read_csv
(
os
.
path
.
abspath
(
path_to_3D_data
+
"datapoints/"
+
data
))
if
df
[
'index_chain'
][
0
]
==
1
:
l
=
appariements
(
model
[
valeur
],
df
)
df_calc
=
pd
.
DataFrame
(
l
,
columns
=
[
"Chaine"
,
"type LW"
,
"Resseq"
,
"Num paired"
,
"Distance"
,
"C4'-C1'-B1"
,
"C1'-B1-B1pair"
,
"B1-B1pair-C1'pair"
,
"B1pair-C1'pair-C4'pair"
])
df_calc
.
to_csv
(
runDir
+
"/results/basepairs/"
+
'basepairs '
+
cle
+
'_'
+
valeur
+
'.csv'
)
def
graph_basepairs
():
df
=
pd
.
read_csv
(
os
.
path
.
abspath
(
runDir
+
"/results/basepairs/basepairs.csv"
))
cWW
=
df
[
df
[
'type LW'
]
==
'cWW'
]
cWW_dist
=
list
(
cWW
[
"Distance"
])
cWW_angle_1
=
list
(
cWW
[
"C4'-C1'-B1"
])
cWW_angle_2
=
list
(
cWW
[
"C1'-B1-B1pair"
])
cWW_angle_3
=
list
(
cWW
[
"B1-B1pair-C1'pair"
])
cWW_angle_4
=
list
(
cWW
[
"B1pair-C1'pair-C4'pair"
])
tWW
=
df
[
df
[
'type LW'
]
==
'tWW'
]
tWW_dist
=
list
(
tWW
[
"Distance"
])
tWW_angle_1
=
list
(
tWW
[
"C4'-C1'-B1"
])
tWW_angle_2
=
list
(
tWW
[
"C1'-B1-B1pair"
])
tWW_angle_3
=
list
(
tWW
[
"B1-B1pair-C1'pair"
])
tWW_angle_4
=
list
(
tWW
[
"B1pair-C1'pair-C4'pair"
])
cWH
=
df
[
df
[
'type LW'
]
==
'cWH'
]
cWH_dist
=
list
(
cWH
[
"Distance"
])
cWH_angle_1
=
list
(
cWH
[
"C4'-C1'-B1"
])
cWH_angle_2
=
list
(
cWH
[
"C1'-B1-B1pair"
])
cWH_angle_3
=
list
(
cWH
[
"B1-B1pair-C1'pair"
])
cWH_angle_4
=
list
(
cWH
[
"B1pair-C1'pair-C4'pair"
])
tWH
=
df
[
df
[
'type LW'
]
==
'tWH'
]
tWH_dist
=
list
(
tWH
[
"Distance"
])
tWH_angle_1
=
list
(
tWH
[
"C4'-C1'-B1"
])
tWH_angle_2
=
list
(
tWH
[
"C1'-B1-B1pair"
])
tWH_angle_3
=
list
(
tWH
[
"B1-B1pair-C1'pair"
])
tWH_angle_4
=
list
(
tWH
[
"B1pair-C1'pair-C4'pair"
])
cHW
=
df
[
df
[
'type LW'
]
==
'cHW'
]
cHW_dist
=
list
(
cHW
[
"Distance"
])
cHW_angle_1
=
list
(
cHW
[
"C4'-C1'-B1"
])
cHW_angle_2
=
list
(
cHW
[
"C1'-B1-B1pair"
])
cHW_angle_3
=
list
(
cHW
[
"B1-B1pair-C1'pair"
])
cHW_angle_4
=
list
(
cHW
[
"B1pair-C1'pair-C4'pair"
])
tHW
=
df
[
df
[
'type LW'
]
==
'tHW'
]
tHW_dist
=
list
(
tHW
[
"Distance"
])
tHW_angle_1
=
list
(
tHW
[
"C4'-C1'-B1"
])
tHW_angle_2
=
list
(
tHW
[
"C1'-B1-B1pair"
])
tHW_angle_3
=
list
(
tHW
[
"B1-B1pair-C1'pair"
])
tHW_angle_4
=
list
(
tHW
[
"B1pair-C1'pair-C4'pair"
])
cWS
=
df
[
df
[
'type LW'
]
==
'cWS'
]
cWS_dist
=
list
(
cWS
[
"Distance"
])
cWS_angle_1
=
list
(
cWS
[
"C4'-C1'-B1"
])
cWS_angle_2
=
list
(
cWS
[
"C1'-B1-B1pair"
])
cWS_angle_3
=
list
(
cWS
[
"B1-B1pair-C1'pair"
])
cWS_angle_4
=
list
(
cWS
[
"B1pair-C1'pair-C4'pair"
])
tWS
=
df
[
df
[
'type LW'
]
==
'tWS'
]
tWS_dist
=
list
(
tWS
[
"Distance"
])
tWS_angle_1
=
list
(
tWS
[
"C4'-C1'-B1"
])
tWS_angle_2
=
list
(
tWS
[
"C1'-B1-B1pair"
])
tWS_angle_3
=
list
(
tWS
[
"B1-B1pair-C1'pair"
])
tWS_angle_4
=
list
(
tWS
[
"B1pair-C1'pair-C4'pair"
])
cSW
=
df
[
df
[
'type LW'
]
==
'cSW'
]
cSW_dist
=
list
(
cSW
[
"Distance"
])
cSW_angle_1
=
list
(
cSW
[
"C4'-C1'-B1"
])
cSW_angle_2
=
list
(
cSW
[
"C1'-B1-B1pair"
])
cSW_angle_3
=
list
(
cSW
[
"B1-B1pair-C1'pair"
])
cSW_angle_4
=
list
(
cSW
[
"B1pair-C1'pair-C4'pair"
])
tSW
=
df
[
df
[
'type LW'
]
==
'tSW'
]
tSW_dist
=
list
(
tSW
[
"Distance"
])
tSW_angle_1
=
list
(
tSW
[
"C4'-C1'-B1"
])
tSW_angle_2
=
list
(
tSW
[
"C1'-B1-B1pair"
])
tSW_angle_3
=
list
(
tSW
[
"B1-B1pair-C1'pair"
])
tSW_angle_4
=
list
(
tSW
[
"B1pair-C1'pair-C4'pair"
])
cHH
=
df
[
df
[
'type LW'
]
==
'cHH'
]
cHH_dist
=
list
(
cHH
[
"Distance"
])
cHH_angle_1
=
list
(
cHH
[
"C4'-C1'-B1"
])
cHH_angle_2
=
list
(
cHH
[
"C1'-B1-B1pair"
])
cHH_angle_3
=
list
(
cHH
[
"B1-B1pair-C1'pair"
])
cHH_angle_4
=
list
(
cHH
[
"B1pair-C1'pair-C4'pair"
])
tHH
=
df
[
df
[
'type LW'
]
==
'tHH'
]
tHH_dist
=
list
(
tHH
[
"Distance"
])
tHH_angle_1
=
list
(
tHH
[
"C4'-C1'-B1"
])
tHH_angle_2
=
list
(
tHH
[
"C1'-B1-B1pair"
])
tHH_angle_3
=
list
(
tHH
[
"B1-B1pair-C1'pair"
])
tHH_angle_4
=
list
(
tHH
[
"B1pair-C1'pair-C4'pair"
])
cSH
=
df
[
df
[
'type LW'
]
==
'cSH'
]
cSH_dist
=
list
(
cSH
[
"Distance"
])
cSH_angle_1
=
list
(
cSH
[
"C4'-C1'-B1"
])
cSH_angle_2
=
list
(
cSH
[
"C1'-B1-B1pair"
])
cSH_angle_3
=
list
(
cSH
[
"B1-B1pair-C1'pair"
])
cSH_angle_4
=
list
(
cSH
[
"B1pair-C1'pair-C4'pair"
])
tSH
=
df
[
df
[
'type LW'
]
==
'tSH'
]
tSH_dist
=
list
(
tSH
[
"Distance"
])
tSH_angle_1
=
list
(
tSH
[
"C4'-C1'-B1"
])
tSH_angle_2
=
list
(
tSH
[
"C1'-B1-B1pair"
])
tSH_angle_3
=
list
(
tSH
[
"B1-B1pair-C1'pair"
])
tSH_angle_4
=
list
(
tSH
[
"B1pair-C1'pair-C4'pair"
])
cHS
=
df
[
df
[
'type LW'
]
==
'cHS'
]
cHS_dist
=
list
(
cHS
[
"Distance"
])
cHS_angle_1
=
list
(
cHS
[
"C4'-C1'-B1"
])
cHS_angle_2
=
list
(
cHS
[
"C1'-B1-B1pair"
])
cHS_angle_3
=
list
(
cHS
[
"B1-B1pair-C1'pair"
])
cHS_angle_4
=
list
(
cHS
[
"B1pair-C1'pair-C4'pair"
])
tHS
=
df
[
df
[
'type LW'
]
==
'tHS'
]
tHS_dist
=
list
(
tHS
[
"Distance"
])
tHS_angle_1
=
list
(
tHS
[
"C4'-C1'-B1"
])
tHS_angle_2
=
list
(
tHS
[
"C1'-B1-B1pair"
])
tHS_angle_3
=
list
(
tHS
[
"B1-B1pair-C1'pair"
])
tHS_angle_4
=
list
(
tHS
[
"B1pair-C1'pair-C4'pair"
])
cSS
=
df
[
df
[
'type LW'
]
==
'cSS'
]
cSS_dist
=
list
(
cSS
[
"Distance"
])
cSS_angle_1
=
list
(
cSS
[
"C4'-C1'-B1"
])
cSS_angle_2
=
list
(
cSS
[
"C1'-B1-B1pair"
])
cSS_angle_3
=
list
(
cSS
[
"B1-B1pair-C1'pair"
])
cSS_angle_4
=
list
(
cSS
[
"B1pair-C1'pair-C4'pair"
])
tSS
=
df
[
df
[
'type LW'
]
==
'tSS'
]
tSS_dist
=
list
(
tSS
[
"Distance"
])
tSS_angle_1
=
list
(
tSS
[
"C4'-C1'-B1"
])
tSS_angle_2
=
list
(
tSS
[
"C1'-B1-B1pair"
])
tSS_angle_3
=
list
(
tSS
[
"B1-B1pair-C1'pair"
])
tSS_angle_4
=
list
(
tSS
[
"B1pair-C1'pair-C4'pair"
])
os
.
makedirs
(
runDir
+
"/results/figures/basepairs/"
,
exist_ok
=
True
)
os
.
chdir
(
runDir
+
"/results/figures/basepairs/"
)
graphe
(
'cWW'
,
cWW_angle_1
,
cWW_angle_2
,
cWW_angle_3
,
cWW_angle_4
,
cWW_dist
,
len
(
cWW
))
graphe
(
'tWW'
,
tWW_angle_1
,
tWW_angle_2
,
tWW_angle_3
,
tWW_angle_4
,
tWW_dist
,
len
(
tWW
))
graphe
(
'cWH'
,
cWH_angle_1
,
cWH_angle_2
,
cWH_angle_3
,
cWH_angle_4
,
cWH_dist
,
len
(
cWH
))
graphe
(
'tWH'
,
tWH_angle_1
,
tWH_angle_2
,
tWH_angle_3
,
tWH_angle_4
,
tWH_dist
,
len
(
tWH
))
graphe
(
'cHW'
,
cHW_angle_1
,
cHW_angle_2
,
cHW_angle_3
,
cHW_angle_4
,
cHW_dist
,
len
(
cHW
))
graphe
(
'tHW'
,
tHW_angle_1
,
tHW_angle_2
,
tHW_angle_3
,
tHW_angle_4
,
tHW_dist
,
len
(
tHW
))
graphe
(
'tWS'
,
tWS_angle_1
,
tWS_angle_2
,
tWS_angle_3
,
tWS_angle_4
,
tWS_dist
,
len
(
tWS
))
graphe
(
'cWS'
,
cWS_angle_1
,
cWS_angle_2
,
cWS_angle_3
,
cWS_angle_4
,
cWS_dist
,
len
(
cWS
))
graphe
(
'tSW'
,
tSW_angle_1
,
tSW_angle_2
,
tSW_angle_3
,
tSW_angle_4
,
tSW_dist
,
len
(
tSW
))
graphe
(
'cSW'
,
cSW_angle_1
,
cSW_angle_2
,
cSW_angle_3
,
cSW_angle_4
,
cSW_dist
,
len
(
cSW
))
graphe
(
'cHH'
,
cHH_angle_1
,
cHH_angle_2
,
cHH_angle_3
,
cHH_angle_4
,
cHH_dist
,
len
(
cHH
))
graphe
(
'tHH'
,
tHH_angle_1
,
tHH_angle_2
,
tHH_angle_3
,
tHH_angle_4
,
tHH_dist
,
len
(
tHH
))
graphe
(
'cSH'
,
cSH_angle_1
,
cSH_angle_2
,
cSH_angle_3
,
cSH_angle_4
,
cSH_dist
,
len
(
cSH
))
graphe
(
'tSH'
,
tSH_angle_1
,
tSH_angle_2
,
tSH_angle_3
,
tSH_angle_4
,
tSH_dist
,
len
(
tSH
))
graphe
(
'cHS'
,
cHS_angle_1
,
cHS_angle_2
,
cHS_angle_3
,
cHS_angle_4
,
cHS_dist
,
len
(
cHS
))
graphe
(
'tHS'
,
tHS_angle_1
,
tHS_angle_2
,
tHS_angle_3
,
tHS_angle_4
,
tHS_dist
,
len
(
tHS
))
graphe
(
'cSS'
,
cSS_angle_1
,
cSS_angle_2
,
cSS_angle_3
,
cSS_angle_4
,
cSS_dist
,
len
(
cSS
))
graphe
(
'tSS'
,
tSS_angle_1
,
tSS_angle_2
,
tSS_angle_3
,
tSS_angle_4
,
tSS_dist
,
len
(
tSS
))
nc
=
len
(
cWW
)
+
len
(
cHH
)
+
len
(
cSS
)
+
len
(
cWH
)
+
len
(
cHW
)
+
len
(
cWS
)
+
len
(
cSW
)
+
len
(
cHS
)
+
len
(
cSH
)
GMM_tot
(
cWW_dist
,
"cWW"
,
nc
,
'lightcoral'
)
GMM_tot
(
cHH_dist
,
"cHH"
,
nc
,
'lightseagreen'
)
GMM_tot
(
cSS_dist
,
"cSS"
,
nc
,
'black'
)
GMM_tot
(
cWH_dist
,
"cWH"
,
nc
,
'goldenrod'
)
GMM_tot
(
cHW_dist
,
"cHW"
,
nc
,
'olive'
)
GMM_tot
(
cWS_dist
,
"cWS"
,
nc
,
'steelblue'
)
GMM_tot
(
cSW_dist
,
"cSW"
,
nc
,
'silver'
)
GMM_tot
(
cHS_dist
,
"cHS"
,
nc
,
'deeppink'
)
GMM_tot
(
cSH_dist
,
"cSH"
,
nc
,
'navy'
)
plt
.
xlabel
(
'Distance (Angström)'
)
plt
.
title
(
"GMM des distances entre pointes des nucléotides pour les appariements cis ("
+
str
(
nc
)
+
" valeurs)"
,
fontsize
=
9
)
plt
.
savefig
(
"GMM des distances entre pointes des nucléotides pour les appariements cis ("
+
str
(
nc
)
+
" valeurs).png"
)
plt
.
close
()
nt
=
len
(
tWW
)
+
len
(
tHH
)
+
len
(
tSS
)
+
len
(
tWH
)
+
len
(
tHW
)
+
len
(
tWS
)
+
len
(
tSW
)
+
len
(
tHS
)
+
len
(
tSH
)
GMM_tot
(
tWW_dist
,
"tWW"
,
nt
,
'sienna'
)
GMM_tot
(
tHH_dist
,
"tHH"
,
nt
,
'maroon'
)
GMM_tot
(
tSS_dist
,
"tSS"
,
nt
,
'orange'
)
GMM_tot
(
tWH_dist
,
"tWH"
,
nt
,
'mediumaquamarine'
)
GMM_tot
(
tHW_dist
,
"tHW"
,
nt
,
'tomato'
)
GMM_tot
(
tWS_dist
,
"tWS"
,
nt
,
'indigo'
)
GMM_tot
(
tSW_dist
,
"tSW"
,
nt
,
'orchid'
)
GMM_tot
(
tHS_dist
,
"tHS"
,
nt
,
'tan'
)
GMM_tot
(
tSH_dist
,
"tSH"
,
nt
,
'lime'
)
plt
.
xlabel
(
'Distance (Angström)'
)
plt
.
title
(
"GMM des distances entre pointes des nucléotides pour les appariements trans ("
+
str
(
nt
)
+
" valeurs)"
,
fontsize
=
9
)
plt
.
savefig
(
"GMM des distances entre pointes des nucléotides pour les appariements trans ("
+
str
(
nt
)
+
" valeurs).png"
)
plt
.
close
()
if
__name__
==
"__main__"
:
...
...
@@ -2793,18 +3112,28 @@ if __name__ == "__main__":
joblist.append(Job(function=dist_atoms, args=(f,)))
'''
#dist_atoms_hire_RNA(os.listdir(path_to_3D_data + "rna_only")[0])
#concatenate('/results/distances/', os.listdir(runDir+'/results/distances/'), 'dist_atoms.csv')
ld
=
os
.
listdir
(
path_to_3D_data
+
'datapoints'
)[:
1000
]
if
'4zdo_1_E'
in
ld
:
ld
.
remove
(
'4zdo_1_E'
)
#cas particuliers
if
'4zdp_1_E'
in
ld
:
ld
.
remove
(
'4zdp_1_E'
)
dictionnaire
=
dictio
(
ld
)
for
cle
in
dictionnaire
.
keys
():
joblist
.
append
(
Job
(
function
=
parse
,
args
=
(
cle
,)))
exit
()
#exit()
'''
f_prec=os.listdir(path_to_3D_data + "rna_only")[0]
for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
#joblist.append(Job(function=dist_atoms, args=(f,)))
#joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,)))
joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,)))
joblist.append(Job(function=angles_plans_hire_RNA, args=(f,)))
'''
p
=
Pool
(
initializer
=
init_worker
,
initargs
=
(
tqdm
.
get_lock
(),),
processes
=
nworkers
)
pbar
=
tqdm
(
total
=
len
(
joblist
),
desc
=
"Stat jobs"
,
position
=
0
,
unit
=
"job"
,
leave
=
True
)
...
...
@@ -2836,7 +3165,8 @@ if __name__ == "__main__":
print
()
print
()
concatenate
(
'/results/basepairs/'
,
'basepairs.csv'
)
graph_basepairs
()
# finish the work after the parallel portions
'''
per_chain_stats()
...
...
Please
register
or
login
to post a comment