Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
RNANet
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Aglaé TABOT
2021-06-15 14:19:11 +0200
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
69915be71bb690b5a1d5bd5e0dc52a39b04bc82f
69915be7
1 parent
1218e086
reorganization and addition of options
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
22 deletions
statistics.py
statistics.py
View file @
69915be
...
...
@@ -2978,8 +2978,9 @@ if __name__ == "__main__":
DELETE_OLD_DATA
=
False
DO_WADLEY_ANALYSIS
=
False
DO_AVG_DISTANCE_MATRIX
=
False
DO_HIRE_RNA_MEASURES
=
False
try
:
opts
,
_
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
"r:h"
,
[
"help"
,
"from-scratch"
,
"wadley"
,
"distance-matrices"
,
"resolution="
,
"3d-folder="
,
"seq-folder="
])
opts
,
_
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
"r:h"
,
[
"help"
,
"from-scratch"
,
"wadley"
,
"distance-matrices"
,
"resolution="
,
"3d-folder="
,
"seq-folder="
,
"hire-rna="
])
except
getopt
.
GetoptError
as
err
:
print
(
err
)
sys
.
exit
(
2
)
...
...
@@ -3000,6 +3001,7 @@ if __name__ == "__main__":
print
(
"--from-scratch
\t\t\t
Do not use precomputed results from past runs, recompute everything"
)
print
(
"--distance-matrices
\t\t
Compute average distance between nucleotide pairs for each family."
)
print
(
"--wadley
\t\t\t
Reproduce Wadley & al 2007 clustering of pseudotorsions."
)
print
(
"--hire-rna
\t\t\t
Compute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data."
)
sys
.
exit
()
elif
opt
==
'--version'
:
...
...
@@ -3023,10 +3025,12 @@ if __name__ == "__main__":
DO_AVG_DISTANCE_MATRIX
=
True
elif
opt
==
'--wadley'
:
DO_WADLEY_ANALYSIS
=
True
elif
opt
==
'--hire-rna'
:
DO_HIRE_RNA_MEASURES
=
True
# Load mappings. famlist will contain only families with structures at this resolution threshold.
'''
print
(
"Loading mappings list..."
)
with
sqlite3
.
connect
(
runDir
+
"/results/RNANet.db"
)
as
conn
:
conn
.
execute
(
'pragma journal_mode=wal'
)
...
...
@@ -3050,7 +3054,7 @@ if __name__ == "__main__":
print
(
f
"Found {len(famlist)} families with chains of resolution {res_thr}A or better."
)
if
len
(
ignored
):
print
(
f
"Idty matrices: Ignoring {len(ignored)} families with only one chain:"
,
" "
.
join
(
ignored
)
+
'
\n
'
)
'''
if
DELETE_OLD_DATA
:
for
f
in
famlist
:
subprocess
.
run
([
"rm"
,
"-f"
,
runDir
+
f
"/data/{f}.npy"
,
runDir
+
f
"/data/{f}_pairs.csv"
,
runDir
+
f
"/data/{f}_counts.csv"
])
...
...
@@ -3068,7 +3072,7 @@ if __name__ == "__main__":
# Define the tasks
joblist
=
[]
'''
if
n_unmapped_chains
and
DO_WADLEY_ANALYSIS
:
joblist
.
append
(
Job
(
function
=
reproduce_wadley_results
,
args
=
(
1
,
False
,
(
1
,
4
),
res_thr
)))
joblist
.
append
(
Job
(
function
=
reproduce_wadley_results
,
args
=
(
4
,
False
,
(
1
,
4
),
res_thr
)))
...
...
@@ -3090,15 +3094,20 @@ if __name__ == "__main__":
joblist
.
append
(
Job
(
function
=
parallel_stats_pairs
,
args
=
(
f
,)))
# updates the database
if
f
not
in
ignored
:
joblist
.
append
(
Job
(
function
=
to_id_matrix
,
args
=
(
f
,)))
# updates the database
'''
'''
f_prec
=
os
.
listdir
(
path_to_3D_data
+
"rna_only"
)[
0
]
#exit()
for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
for
f
in
os
.
listdir
(
path_to_3D_data
+
"rna_only"
)[:
100
]:
joblist
.
append
(
Job
(
function
=
dist_atoms
,
args
=
(
f
,)))
if
DO_HIRE_RNA_MEASURES
:
f_prec
=
os
.
listdir
(
path_to_3D_data
+
"rna_only"
)[
0
]
for
f
in
os
.
listdir
(
path_to_3D_data
+
"rna_only"
)[:
100
]:
joblist
.
append
(
Job
(
function
=
dist_atoms_hire_RNA
,
args
=
(
f
,)))
joblist
.
append
(
Job
(
function
=
angles_torsion_hire_RNA
,
args
=
(
f
,)))
joblist
.
append
(
Job
(
function
=
angles_plans_hire_RNA
,
args
=
(
f
,)))
'''
Basepairs
'''
ld
=
os
.
listdir
(
path_to_3D_data
+
'datapoints'
)[:
1000
]
if
'4zdo_1_E'
in
ld
:
...
...
@@ -3109,19 +3118,9 @@ if __name__ == "__main__":
for
cle
in
dictionnaire
.
keys
():
joblist
.
append
(
Job
(
function
=
parse
,
args
=
(
cle
,)))
'''
#exit()
'''
f_prec=os.listdir(path_to_3D_data + "rna_only")[0]
for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
joblist.append(Job(function=dist_atoms, args=(f,)))
joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,)))
joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,)))
joblist.append(Job(function=angles_plans_hire_RNA, args=(f,)))
'''
#exit()
p
=
Pool
(
initializer
=
init_worker
,
initargs
=
(
tqdm
.
get_lock
(),),
processes
=
nworkers
)
pbar
=
tqdm
(
total
=
len
(
joblist
),
desc
=
"Stat jobs"
,
position
=
0
,
unit
=
"job"
,
leave
=
True
)
...
...
@@ -3153,8 +3152,8 @@ if __name__ == "__main__":
print
()
print
()
# finish the work after the parallel portions
'''
per_chain_stats()
seq_idty()
...
...
@@ -3162,6 +3161,10 @@ if __name__ == "__main__":
if n_unmapped_chains:
general_stats()
'''
if
DO_WADLEY_ANALYSIS
:
graph_eta_theta
()
concatenate
(
'/results/all-atoms/distances/'
,
'dist_atoms.csv'
)
graph_dist_atoms
()
concatenate
(
'/results/HiRE-RNA/distances/'
,
'dist_atoms_hire_RNA.csv'
)
...
...
@@ -3173,7 +3176,7 @@ if __name__ == "__main__":
graph_plans_h_RNA
()
graph_angles_torsion
()
graph_eta_theta
()
concatenate
(
'/results/basepairs/'
,
'basepairs.csv'
)
graph_basepairs
()
...
...
Please
register
or
login
to post a comment