Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
RNANet
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2020-12-17 09:46:42 +0100
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
177d6e94e09d6f7a4198e00d5c89413b74b3096c
177d6e94
1 parent
a8141d69
script to measure angles + details
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
180 additions
and
3 deletions
.gitignore
RNAnet.py
known_issues.txt
known_issues_reasons.txt
scripts/measure.py
.gitignore
View file @
177d6e9
...
...
@@ -4,6 +4,7 @@ log_of_the_run.sh
# results
results/*
archive/*
logs/*
# temporary results files
...
...
RNAnet.py
View file @
177d6e9
...
...
@@ -1206,7 +1206,7 @@ class Pipeline:
self
.
known_issues
=
[
x
[:
-
1
]
for
x
in
issues
.
readlines
()
if
not
'-'
in
x
]
if
self
.
USE_KNOWN_ISSUES
:
print
(
"
\t
> Ignoring known issues:"
)
print
(
" "
.
join
(
self
.
known_issues
"
)
print
(
" "
.
join
(
self
.
known_issues
)
)
if
self
.
HOMOLOGY
:
# Ask Rfam if some are mapped to Rfam families
...
...
@@ -1263,8 +1263,7 @@ class Pipeline:
conn
.
close
()
if
self
.
SELECT_ONLY
is
not
None
:
self.update
=
[
c
for
c
in
self.update
if
c.chain_label
==
self.SELECT_ONLY
]
self
.
update
=
[
c
for
c
in
self
.
update
if
c
.
chain_label
==
self
.
SELECT_ONLY
]
self
.
n_chains
=
len
(
self
.
update
)
print
(
str
(
self
.
n_chains
)
+
" RNA chains of interest."
)
...
...
known_issues.txt
View file @
177d6e9
...
...
@@ -1611,3 +1611,15 @@
7jz0_1_F
7k00_1_5
7k00_1_B
1qzb_1_B_1-73
1qza_1_B_1-73
5zzm_1_M_3-118
5zzm_1_N_1-2904
3dg2_1_B_1-2904
3dg0_1_B_1-2904
3dg4_1_B_1-2904
3dg5_1_B_1-2904
3dg2_1_A_1-1542
3dg0_1_A_1-1542
3dg4_1_A_1-1542
3dg5_1_A_1-1542
...
...
known_issues_reasons.txt
View file @
177d6e9
...
...
@@ -4837,3 +4837,39 @@ Sequence is too short. (< 5 resolved nts)
5hjz_1_C
Sequence is too short. (< 5 resolved nts)
1qzb_1_B_1-73
DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B_1-73.
1qza_1_B_1-73
DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B_1-73.
5zzm_1_M_3-118
DSSR warning 5zzm.json: no nucleotides found. Ignoring 5zzm_1_M_3-118.
5zzm_1_N_1-2904
DSSR warning 5zzm.json: no nucleotides found. Ignoring 5zzm_1_N_1-2904.
3dg2_1_B_1-2904
DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_B_1-2904.
3dg0_1_B_1-2904
DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_B_1-2904.
3dg4_1_B_1-2904
DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_B_1-2904.
3dg5_1_B_1-2904
DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B_1-2904.
3dg2_1_A_1-1542
DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A_1-1542.
3dg0_1_A_1-1542
DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A_1-1542.
3dg4_1_A_1-1542
DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A_1-1542.
3dg5_1_A_1-1542
DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A_1-1542.
...
...
scripts/measure.py
0 → 100755
View file @
177d6e9
#!/usr/bin/python3.8
# usage : pass as an argument a folder containing .cif files of RNA chains, like those produced by RNANet:
# usage : ./measure_bonds_and_angles.py ~/Data/RNA/3D/rna_only
# OR
# usage : ./measure_bonds_and_angles.py ~/Data/RNA/3D/rna_mapped_to_Rfam
from
Bio.PDB
import
MMCIFParser
from
Bio.PDB.vectors
import
Vector
,
calc_angle
from
sys
import
argv
from
tqdm
import
tqdm
import
multiprocessing
as
mp
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
os
,
signal
def
measure_in_chain
(
f
):
mmcif_parser
=
MMCIFParser
()
s
=
mmcif_parser
.
get_structure
(
'null'
,
os
.
path
.
abspath
(
path_to_3D_data
+
f
))
chain
=
next
(
s
[
0
]
.
get_chains
())
# Assume only one chain per .cif file
c_to_p
=
[]
p_to_c
=
[]
c_p_c
=
[]
p_c_p
=
[]
last_p
=
None
last_c
=
None
nres
=
0
for
res
in
chain
:
nres
+=
1
# Get the new c1' and p atoms
atom_c1p
=
[
atom
.
get_coord
()
for
atom
in
res
if
"C1'"
in
atom
.
get_fullname
()
]
atom_p
=
[
atom
.
get_coord
()
for
atom
in
res
if
atom
.
get_name
()
==
"P"
]
if
len
(
atom_c1p
)
+
len
(
atom_p
)
!=
2
:
last_c
=
None
last_p
=
None
continue
atom_c1p
=
Vector
(
atom_c1p
[
0
])
atom_p
=
Vector
(
atom_p
[
0
])
if
last_c
is
not
None
:
# There was a previous residue
# Get the C1'(i-1) -> P distance
c_to_p
.
append
((
last_c
-
atom_p
)
.
norm
())
# the C1'(i-1) -> P bond of the theta angle
# Get the C1'(i-1)-P(i)-C1'(i) flat angle
c_p_c
.
append
(
calc_angle
(
last_c
,
atom_p
,
atom_c1p
))
# Get the P(i-1)-C1'(i-1)-P(i) flat angle
p_c_p
.
append
(
calc_angle
(
last_p
,
last_c
,
atom_p
))
p_to_c
.
append
((
atom_c1p
-
atom_p
)
.
norm
())
# the P -> C1' bond of the eta angle
last_c
=
atom_c1p
last_p
=
atom_p
c_to_p
=
np
.
array
(
c_to_p
,
dtype
=
np
.
float16
)
p_to_c
=
np
.
array
(
p_to_c
,
dtype
=
np
.
float16
)
c_p_c
=
np
.
array
(
c_p_c
,
dtype
=
np
.
float16
)
p_c_p
=
np
.
array
(
p_c_p
,
dtype
=
np
.
float16
)
c_to_p
=
c_to_p
[
~
np
.
isnan
(
c_to_p
)]
p_to_c
=
p_to_c
[
~
np
.
isnan
(
p_to_c
)]
c_p_c
=
c_p_c
[
~
np
.
isnan
(
c_p_c
)]
p_c_p
=
p_c_p
[
~
np
.
isnan
(
p_c_p
)]
return
(
c_to_p
,
p_to_c
,
c_p_c
,
p_c_p
)
def
init_worker
(
tqdm_lock
=
None
):
signal
.
signal
(
signal
.
SIGINT
,
signal
.
SIG_IGN
)
if
tqdm_lock
is
not
None
:
tqdm
.
set_lock
(
tqdm_lock
)
def
measure_all_dist_angles
():
path_to_3D_data
=
argv
[
1
]
if
path_to_3D_data
[
-
1
]
!=
'/'
:
path_to_3D_data
+=
'/'
r_cp
=
np
.
array
([],
dtype
=
np
.
float16
)
r_pc
=
np
.
array
([],
dtype
=
np
.
float16
)
flat_angles_cpc
=
np
.
array
([],
dtype
=
np
.
float16
)
flat_angles_pcp
=
np
.
array
([],
dtype
=
np
.
float16
)
p
=
mp
.
Pool
(
initializer
=
init_worker
,
initargs
=
(
tqdm
.
get_lock
(),),
processes
=
os
.
cpu_count
())
pbar
=
tqdm
(
total
=
len
(
os
.
listdir
(
path_to_3D_data
)),
desc
=
"Scanning RNA chains"
,
position
=
0
,
leave
=
True
)
try
:
nchains
=
0
for
_
,
r
in
enumerate
(
p
.
imap_unordered
(
measure_in_chain
,
os
.
listdir
(
path_to_3D_data
))):
pbar
.
update
(
1
)
nchains
+=
1
r_cp
=
np
.
hstack
([
r_cp
,
r
[
0
]])
r_pc
=
np
.
hstack
([
r_pc
,
r
[
1
]])
flat_angles_cpc
=
np
.
hstack
([
flat_angles_cpc
,
r
[
2
]])
flat_angles_pcp
=
np
.
hstack
([
flat_angles_pcp
,
r
[
3
]])
p
.
close
()
p
.
join
()
pbar
.
close
()
np
.
savez
(
"measures.npz"
,
c_p
=
r_cp
,
p_c
=
r_pc
,
c_p_c
=
flat_angles_pcp
,
p_c_p
=
flat_angles_pcp
)
except
KeyboardInterrupt
:
print
(
"Caught Ctrl-C, quitting"
)
p
.
terminate
()
p
.
join
()
pbar
.
close
()
except
Exception
as
e
:
print
(
e
)
p
.
terminate
()
p
.
join
()
pbar
.
close
()
np
.
savez
(
"measures_incomplete.npz"
,
c_p
=
r_cp
,
p_c
=
r_pc
,
c_p_c
=
flat_angles_pcp
,
p_c_p
=
flat_angles_pcp
)
if
__name__
==
"__main__"
:
# Do the computations and save/reload the data
# measure_all_dist_angles()
d
=
np
.
load
(
"measures.npz"
)
c_p
=
d
[
"c_p"
]
p_c
=
d
[
"p_c"
]
p_c_p
=
d
[
"p_c_p"
]
c_p_c
=
d
[
"c_p_c"
]
# Plot stuff
plt
.
figure
(
figsize
=
(
6
,
4
),
dpi
=
300
)
plt
.
hist
(
c_p
)
plt
.
savefig
(
"lengths.png"
)
plt
.
close
()
# print(f"Final values: P->C1' is \033[32m{avg[0]/10:.3f} ± {avg[1]/10:.3f} nm\033[0m, "
# f"C1'->P is \033[32m{avg[2]/10:.3f} ± {avg[3]/10:.3f} nm\033[0m, "
# f"angles C-P-C \033[32m{avg[4]:.2f} ± {avg[5]:.2f}\033[0m and P-C-P \033[32m{avg[6]:.2f} ± {avg[7]:.2f}\033[0m")
Please
register
or
login
to post a comment