Louis BECQUEY

aesthetics, comments, and statistics on the produced data

1 +# execution outputs:
2 +nohup.out
3 +jobstats.csv
4 +log_of_the_run.sh
5 +
1 # Byte-compiled / optimized / DLL files 6 # Byte-compiled / optimized / DLL files
2 __pycache__/ 7 __pycache__/
3 *.py[cod] 8 *.py[cod]
......
This diff is collapsed. Click to expand it.
1 +#!/usr/bin/python3.8
2 +import os
3 +import numpy as np
4 +import pandas as pd
5 +import scipy.stats as st
6 +import matplotlib.pyplot as plt
7 +import matplotlib.patches as ptch
8 +from mpl_toolkits.mplot3d import axes3d
9 +from matplotlib import cm
10 +from tqdm import tqdm
11 +
12 +
13 +if os.path.isdir("/home/ubuntu/"): # this is the IFB-core cloud
14 + path_to_3D_data = "/mnt/Data/RNA/3D/"
15 + path_to_seq_data = "/mnt/Data/RNA/sequences/"
16 +elif os.path.isdir("/home/persalteas"): # this is my personal workstation
17 + path_to_3D_data = "/home/persalteas/Data/RNA/3D/"
18 + path_to_seq_data = "/home/persalteas/Data/RNA/sequences/"
19 +elif os.path.isdir("/home/lbecquey"): # this is the IBISC server
20 + path_to_3D_data = "/home/lbecquey/Data/RNA/3D/"
21 + path_to_seq_data = "/home/lbecquey/Data/RNA/sequences/"
22 +elif os.path.isdir("/nhome/siniac/lbecquey"): # this is the office PC
23 + path_to_3D_data = "/nhome/siniac/lbecquey/Data/RNA/3D/"
24 + path_to_seq_data = "/nhome/siniac/lbecquey/Data/RNA/sequences/"
25 +else:
26 + print("I don't know that machine... I'm shy, maybe you should introduce yourself ?")
27 + exit(1)
28 +
29 +if __name__ == "__main__":
30 +
31 + #TODO: compute nt frequencies, chain lengths
32 +
33 + print("loading CSV files...")
34 + rna_points = []
35 + all_etas = []
36 + all_thetas = []
37 + for csvfile in tqdm(os.listdir(path_to_3D_data + "pseudotorsions")):
38 + df = pd.read_csv(path_to_3D_data + "pseudotorsions/" + csvfile).drop('Unnamed: 0', axis=1)
39 + all_etas += list(df['eta'].values)
40 + all_thetas += list(df['theta'].values)
41 + rna_points.append(df)
42 +
43 + print("combining etas and thetas...")
44 + # increase all the angles by 180°
45 + alldata = [ ((e+360)%360-180, (t+360)%360-180)
46 + for e, t in zip(all_etas, all_thetas)
47 + if ('nan' not in str((e,t)))
48 + and not(e<-150 and t<-110) and not (e>160 and t<-110) ]
49 + print(len(alldata), "couples of nts found.")
50 +
51 + x = np.array([ p[0] for p in alldata ])
52 + y = np.array([ p[1] for p in alldata ])
53 + xmin, xmax = min(x), max(x)
54 + ymin, ymax = min(y), max(y)
55 + xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
56 + positions = np.vstack([xx.ravel(), yy.ravel()])
57 + values = np.vstack([x, y])
58 + kernel = st.gaussian_kde(values)
59 + f = np.reshape(kernel(positions).T, xx.shape)
60 +
61 + # histogram :
62 + fig, axs = plt.subplots(1,3, figsize=(18, 6))
63 + ax = fig.add_subplot(131)
64 +
65 + plt.axhline(y=0, alpha=0.5, color='black')
66 + plt.axvline(x=0, alpha=0.5, color='black')
67 + plt.scatter(x, y, s=1, alpha=0.1)
68 + plt.contourf(xx, yy, f, cmap=cm.BuPu, alpha=0.5)
69 + ax.set_xlabel("$\\eta'=C_1'^{i-1}-P^i-C_1'^i-P^{i+1}$")
70 + ax.set_ylabel("$\\theta'=P^i-C_1'^i-P^{i+1}-C_1'^{i+1}$")
71 + ax.add_patch(ptch.Rectangle((-20,0),50,70, linewidth=1, edgecolor='r', facecolor='#ff000080'))
72 +
73 + ax = fig.add_subplot(132, projection='3d')
74 + ax.plot_surface(xx, yy, f, cmap=cm.coolwarm, linewidth=0, antialiased=False)
75 + ax.set_title("\"Wadley plot\"\n$\\eta'$, $\\theta'$ pseudotorsions in 3D RNA structures\n(Massive peak removed in the red zone, = double helices)")
76 + ax.set_xlabel("$\\eta'=C_1'^{i-1}-P^i-C_1'^i-P^{i+1}$")
77 + ax.set_ylabel("$\\theta'=P^i-C_1'^i-P^{i+1}-C_1'^{i+1}$")
78 +
79 + ax = fig.add_subplot(133, projection='3d')
80 + hist, xedges, yedges = np.histogram2d(x, y, bins=300, range=[[xmin, xmax], [ymin, ymax]])
81 + xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
82 + ax.bar3d(xpos.ravel(), ypos.ravel(), 0, 0.5, 0.5, hist.ravel(), zsort='average')
83 + ax.set_xlabel("$\\eta'=C_1'^{i-1}-P^i-C_1'^i-P^{i+1}$")
84 + ax.set_ylabel("$\\theta'=P^i-C_1'^i-P^{i+1}-C_1'^{i+1}$")
85 + plt.savefig("results/clusters_rot180.png")
86 + plt.show()