Showing
4 changed files
with
91 additions
and
0 deletions
This diff is collapsed. Click to expand it.
results/clusters_rot180.png
0 → 100644
![](/lbecquey/RNANetLegacy/raw/091f15280041779878ee81be160a9757324d6b67/results/clusters_rot180.png)
448 KB
statistics.py
0 → 100755
1 | +#!/usr/bin/python3.8 | ||
2 | +import os | ||
3 | +import numpy as np | ||
4 | +import pandas as pd | ||
5 | +import scipy.stats as st | ||
6 | +import matplotlib.pyplot as plt | ||
7 | +import matplotlib.patches as ptch | ||
8 | +from mpl_toolkits.mplot3d import axes3d | ||
9 | +from matplotlib import cm | ||
10 | +from tqdm import tqdm | ||
11 | + | ||
12 | + | ||
13 | +if os.path.isdir("/home/ubuntu/"): # this is the IFB-core cloud | ||
14 | + path_to_3D_data = "/mnt/Data/RNA/3D/" | ||
15 | + path_to_seq_data = "/mnt/Data/RNA/sequences/" | ||
16 | +elif os.path.isdir("/home/persalteas"): # this is my personal workstation | ||
17 | + path_to_3D_data = "/home/persalteas/Data/RNA/3D/" | ||
18 | + path_to_seq_data = "/home/persalteas/Data/RNA/sequences/" | ||
19 | +elif os.path.isdir("/home/lbecquey"): # this is the IBISC server | ||
20 | + path_to_3D_data = "/home/lbecquey/Data/RNA/3D/" | ||
21 | + path_to_seq_data = "/home/lbecquey/Data/RNA/sequences/" | ||
22 | +elif os.path.isdir("/nhome/siniac/lbecquey"): # this is the office PC | ||
23 | + path_to_3D_data = "/nhome/siniac/lbecquey/Data/RNA/3D/" | ||
24 | + path_to_seq_data = "/nhome/siniac/lbecquey/Data/RNA/sequences/" | ||
25 | +else: | ||
26 | + print("I don't know that machine... I'm shy, maybe you should introduce yourself ?") | ||
27 | + exit(1) | ||
28 | + | ||
29 | +if __name__ == "__main__": | ||
30 | + | ||
31 | + #TODO: compute nt frequencies, chain lengths | ||
32 | + | ||
33 | + print("loading CSV files...") | ||
34 | + rna_points = [] | ||
35 | + all_etas = [] | ||
36 | + all_thetas = [] | ||
37 | + for csvfile in tqdm(os.listdir(path_to_3D_data + "pseudotorsions")): | ||
38 | + df = pd.read_csv(path_to_3D_data + "pseudotorsions/" + csvfile).drop('Unnamed: 0', axis=1) | ||
39 | + all_etas += list(df['eta'].values) | ||
40 | + all_thetas += list(df['theta'].values) | ||
41 | + rna_points.append(df) | ||
42 | + | ||
43 | + print("combining etas and thetas...") | ||
44 | + # increase all the angles by 180° | ||
45 | + alldata = [ ((e+360)%360-180, (t+360)%360-180) | ||
46 | + for e, t in zip(all_etas, all_thetas) | ||
47 | + if ('nan' not in str((e,t))) | ||
48 | + and not(e<-150 and t<-110) and not (e>160 and t<-110) ] | ||
49 | + print(len(alldata), "couples of nts found.") | ||
50 | + | ||
51 | + x = np.array([ p[0] for p in alldata ]) | ||
52 | + y = np.array([ p[1] for p in alldata ]) | ||
53 | + xmin, xmax = min(x), max(x) | ||
54 | + ymin, ymax = min(y), max(y) | ||
55 | + xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j] | ||
56 | + positions = np.vstack([xx.ravel(), yy.ravel()]) | ||
57 | + values = np.vstack([x, y]) | ||
58 | + kernel = st.gaussian_kde(values) | ||
59 | + f = np.reshape(kernel(positions).T, xx.shape) | ||
60 | + | ||
61 | + # histogram : | ||
62 | + fig, axs = plt.subplots(1,3, figsize=(18, 6)) | ||
63 | + ax = fig.add_subplot(131) | ||
64 | + | ||
65 | + plt.axhline(y=0, alpha=0.5, color='black') | ||
66 | + plt.axvline(x=0, alpha=0.5, color='black') | ||
67 | + plt.scatter(x, y, s=1, alpha=0.1) | ||
68 | + plt.contourf(xx, yy, f, cmap=cm.BuPu, alpha=0.5) | ||
69 | + ax.set_xlabel("$\\eta'=C_1'^{i-1}-P^i-C_1'^i-P^{i+1}$") | ||
70 | + ax.set_ylabel("$\\theta'=P^i-C_1'^i-P^{i+1}-C_1'^{i+1}$") | ||
71 | + ax.add_patch(ptch.Rectangle((-20,0),50,70, linewidth=1, edgecolor='r', facecolor='#ff000080')) | ||
72 | + | ||
73 | + ax = fig.add_subplot(132, projection='3d') | ||
74 | + ax.plot_surface(xx, yy, f, cmap=cm.coolwarm, linewidth=0, antialiased=False) | ||
75 | + ax.set_title("\"Wadley plot\"\n$\\eta'$, $\\theta'$ pseudotorsions in 3D RNA structures\n(Massive peak removed in the red zone, = double helices)") | ||
76 | + ax.set_xlabel("$\\eta'=C_1'^{i-1}-P^i-C_1'^i-P^{i+1}$") | ||
77 | + ax.set_ylabel("$\\theta'=P^i-C_1'^i-P^{i+1}-C_1'^{i+1}$") | ||
78 | + | ||
79 | + ax = fig.add_subplot(133, projection='3d') | ||
80 | + hist, xedges, yedges = np.histogram2d(x, y, bins=300, range=[[xmin, xmax], [ymin, ymax]]) | ||
81 | + xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij") | ||
82 | + ax.bar3d(xpos.ravel(), ypos.ravel(), 0, 0.5, 0.5, hist.ravel(), zsort='average') | ||
83 | + ax.set_xlabel("$\\eta'=C_1'^{i-1}-P^i-C_1'^i-P^{i+1}$") | ||
84 | + ax.set_ylabel("$\\theta'=P^i-C_1'^i-P^{i+1}-C_1'^{i+1}$") | ||
85 | + plt.savefig("results/clusters_rot180.png") | ||
86 | + plt.show() |
-
Please register or login to post a comment