Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
RNANet
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2020-03-17 18:42:48 +0000
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
c9882ee58e479243ac57b04a220c612b1a29f493
c9882ee5
1 parent
8800ab7b
Mapping inference from BGSU lists
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
72 additions
and
15 deletions
RNAnet.py
results/clusters_rot180.png
statistics.py
RNAnet.py
View file @
c9882ee
This diff is collapsed. Click to expand it.
results/clusters_rot180.png
View file @
c9882ee
448 KB
|
W:
|
H:
399 KB
|
W:
|
H:
2-up
Swipe
Onion skin
statistics.py
View file @
c9882ee
...
...
@@ -2,12 +2,15 @@
import
os
import
numpy
as
np
import
pandas
as
pd
import
threading
as
th
import
scipy.stats
as
st
import
matplotlib.pyplot
as
plt
import
matplotlib.patches
as
ptch
from
mpl_toolkits.mplot3d
import
axes3d
from
matplotlib
import
cm
from
tqdm
import
tqdm
from
multiprocessing
import
Pool
from
RNAnet
import
read_cpu_number
if
os
.
path
.
isdir
(
"/home/ubuntu/"
):
# this is the IFB-core cloud
...
...
@@ -26,27 +29,35 @@ else:
print
(
"I don't know that machine... I'm shy, maybe you should introduce yourself ?"
)
exit
(
1
)
if
__name__
==
"__main__"
:
#TODO: compute nt frequencies, chain lengths
def
load_rna_frome_file
(
path_to_textfile
):
return
pd
.
read_csv
(
path_to_textfile
,
sep
=
','
,
header
=
0
,
engine
=
"c"
,
index_col
=
0
)
print
(
"loading CSV files..."
)
rna_points
=
[]
def
reproduce_wadley_results
(
dfs
,
show
=
True
):
all_etas
=
[]
all_thetas
=
[]
for
csvfile
in
tqdm
(
os
.
listdir
(
path_to_3D_data
+
"pseudotorsions"
)):
df
=
pd
.
read_csv
(
path_to_3D_data
+
"pseudotorsions/"
+
csvfile
)
.
drop
(
'Unnamed: 0'
,
axis
=
1
)
all_forms
=
[]
c
=
0
for
df
in
dfs
:
all_etas
+=
list
(
df
[
'eta'
]
.
values
)
all_thetas
+=
list
(
df
[
'theta'
]
.
values
)
rna_points
.
append
(
df
)
all_forms
+=
list
(
df
[
'form'
]
.
values
)
if
(
len
([
x
for
x
in
df
[
'eta'
]
.
values
if
x
<
0
or
x
>
7
])
or
len
([
x
for
x
in
df
[
'theta'
]
.
values
if
x
<
0
or
x
>
7
])):
c
+=
1
print
(
c
,
"points on"
,
len
(
dfs
),
"have non-radian angles !"
)
print
(
"combining etas and thetas..."
)
# increase all the angles by 180°
alldata
=
[
((
e
+
360
)
%
360
-
180
,
(
t
+
360
)
%
360
-
180
)
for
e
,
t
in
zip
(
all_etas
,
all_thetas
)
# # increase all the angles by 180°
# alldata = [ ((e+360)%360-180, (t+360)%360-180)
# for e, t in zip(all_etas, all_thetas)
# if ('nan' not in str((e,t)))
# and not(e<-150 and t<-110) and not (e>160 and t<-110) ]
alldata
=
[
(
e
,
t
)
for
e
,
t
,
f
in
zip
(
all_etas
,
all_thetas
,
all_forms
)
if
(
'nan'
not
in
str
((
e
,
t
)))
and
not
(
e
<-
150
and
t
<-
110
)
and
not
(
e
>
160
and
t
<-
110
)
]
print
(
len
(
alldata
),
"couples of nts found."
)
and
f
==
'.'
]
print
(
len
(
alldata
),
"couples of n
on-helical n
ts found."
)
x
=
np
.
array
([
p
[
0
]
for
p
in
alldata
])
y
=
np
.
array
([
p
[
1
]
for
p
in
alldata
])
...
...
@@ -71,7 +82,7 @@ if __name__ == "__main__":
plt
.
contourf
(
xx
,
yy
,
z
,
cmap
=
cm
.
BuPu
,
alpha
=
0.5
)
ax
.
set_xlabel
(
"$
\\
eta'=C_1'^{i-1}-P^i-C_1'^i-P^{i+1}$"
)
ax
.
set_ylabel
(
"$
\\
theta'=P^i-C_1'^i-P^{i+1}-C_1'^{i+1}$"
)
ax
.
add_patch
(
ptch
.
Rectangle
((
-
20
,
0
),
50
,
70
,
linewidth
=
1
,
edgecolor
=
'r'
,
facecolor
=
'#ff000080'
))
#
ax.add_patch(ptch.Rectangle((-20,0),50,70, linewidth=1, edgecolor='r', facecolor='#ff000080'))
ax
=
fig
.
add_subplot
(
132
,
projection
=
'3d'
)
ax
.
plot_surface
(
xx
,
yy
,
z_inc
,
cmap
=
cm
.
coolwarm
,
linewidth
=
0
,
antialiased
=
True
)
...
...
@@ -86,4 +97,50 @@ if __name__ == "__main__":
ax
.
set_xlabel
(
"$
\\
eta'=C_1'^{i-1}-P^i-C_1'^i-P^{i+1}$"
)
ax
.
set_ylabel
(
"$
\\
theta'=P^i-C_1'^i-P^{i+1}-C_1'^{i+1}$"
)
plt
.
savefig
(
"results/clusters_rot180.png"
)
plt
.
show
()
if
show
:
plt
.
show
()
def
stats_len
(
dfs
):
lengths
=
[]
full_lengths
=
[]
for
r
in
dfs
:
nt_codes
=
r
[
'nt_code'
]
.
values
.
tolist
()
lengths
.
append
(
len
(
nt_codes
))
full_lengths
.
append
(
len
([
c
for
c
in
nt_codes
if
c
!=
'-'
]))
if
__name__
==
"__main__"
:
#TODO: compute nt frequencies, chain lengths
#################################################################
# LOAD ALL FILES
#################################################################
print
(
"Loading mappings list..."
)
mappings_list
=
pd
.
read_csv
(
path_to_seq_data
+
"realigned/mappings_list.csv"
,
sep
=
','
,
index_col
=
0
)
.
to_dict
()
print
(
"Loading datapoints from file..."
)
filelist
=
[
path_to_3D_data
+
"/datapoints/"
+
f
for
f
in
os
.
listdir
(
path_to_3D_data
+
"/datapoints"
)
if
".log"
not
in
f
and
".gz"
not
in
f
]
rna_points
=
[]
p
=
Pool
(
initializer
=
tqdm
.
set_lock
,
initargs
=
(
tqdm
.
get_lock
(),),
processes
=
read_cpu_number
())
pbar
=
tqdm
(
total
=
len
(
filelist
),
desc
=
"RNA files"
,
position
=
0
,
leave
=
True
)
for
i
,
rna
in
enumerate
(
p
.
imap_unordered
(
load_rna_frome_file
,
filelist
)):
rna_points
.
append
(
rna
)
pbar
.
update
(
1
)
pbar
.
close
()
p
.
close
()
p
.
join
()
npoints
=
len
(
rna_points
)
print
(
npoints
,
"RNA files loaded."
)
#################################################################
# Define threads for the tasks
#################################################################
wadley_thr
=
th
.
Thread
(
target
=
reproduce_wadley_results
,
args
=
[
rna_points
])
wadley_thr
.
start
()
wadley_thr
.
join
()
\ No newline at end of file
...
...
Please
register
or
login
to post a comment