Louis BECQUEY

removed useless files


Former-commit-id: 93faa57d
File mode changed
1 -f665319aeeca9246a8617b209cdf02646f547518
...\ No newline at end of file ...\ No newline at end of file
This diff could not be displayed because it is too large.
1 -#!/usr/bin/python3.8
2 -
3 -# usage : pass as an argument a folder containing .cif files of RNA chains, like those produced by RNANet:
4 -# usage : ./measure_bonds_and_angles.py ~/Data/RNA/3D/rna_only
5 -# OR
6 -# usage : ./measure_bonds_and_angles.py ~/Data/RNA/3D/rna_mapped_to_Rfam
7 -
8 -from Bio.PDB import MMCIFParser
9 -from Bio.PDB.vectors import Vector, calc_angle
10 -from sys import argv
11 -from tqdm import tqdm
12 -import multiprocessing as mp
13 -import matplotlib.pyplot as plt
14 -import numpy as np
15 -import os, signal
16 -
17 -def measure_in_chain(f):
18 - mmcif_parser = MMCIFParser()
19 - s = mmcif_parser.get_structure('null', os.path.abspath(path_to_3D_data + f))
20 - chain = next(s[0].get_chains()) # Assume only one chain per .cif file
21 -
22 - c_to_p = []
23 - p_to_c = []
24 - c_p_c = []
25 - p_c_p = []
26 - last_p = None
27 - last_c = None
28 - nres = 0
29 - for res in chain:
30 - nres += 1
31 -
32 - # Get the new c1' and p atoms
33 - atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
34 - atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"]
35 - if len(atom_c1p) + len(atom_p) != 2:
36 - last_c = None
37 - last_p = None
38 - continue
39 - atom_c1p = Vector(atom_c1p[0])
40 - atom_p = Vector(atom_p[0])
41 -
42 - if last_c is not None: # There was a previous residue
43 - # Get the C1'(i-1) -> P distance
44 - c_to_p.append((last_c - atom_p).norm()) # the C1'(i-1) -> P bond of the theta angle
45 -
46 - # Get the C1'(i-1)-P(i)-C1'(i) flat angle
47 - c_p_c.append(calc_angle(last_c, atom_p, atom_c1p))
48 -
49 - # Get the P(i-1)-C1'(i-1)-P(i) flat angle
50 - p_c_p.append(calc_angle(last_p, last_c, atom_p))
51 -
52 - p_to_c.append((atom_c1p - atom_p).norm()) # the P -> C1' bond of the eta angle
53 - last_c = atom_c1p
54 - last_p = atom_p
55 -
56 - c_to_p = np.array(c_to_p, dtype=np.float16)
57 - p_to_c = np.array(p_to_c, dtype=np.float16)
58 - c_p_c = np.array(c_p_c, dtype=np.float16)
59 - p_c_p = np.array(p_c_p, dtype=np.float16)
60 - c_to_p = c_to_p[~np.isnan(c_to_p)]
61 - p_to_c = p_to_c[~np.isnan(p_to_c)]
62 - c_p_c = c_p_c[~np.isnan(c_p_c)]
63 - p_c_p = p_c_p[~np.isnan(p_c_p)]
64 -
65 - return (c_to_p, p_to_c, c_p_c, p_c_p)
66 -
67 -def init_worker(tqdm_lock=None):
68 - signal.signal(signal.SIGINT, signal.SIG_IGN)
69 - if tqdm_lock is not None:
70 - tqdm.set_lock(tqdm_lock)
71 -
72 -def measure_all_dist_angles():
73 - path_to_3D_data = argv[1]
74 -
75 - if path_to_3D_data[-1] != '/':
76 - path_to_3D_data += '/'
77 -
78 - r_cp = np.array([], dtype=np.float16)
79 - r_pc = np.array([], dtype=np.float16)
80 - flat_angles_cpc = np.array([], dtype=np.float16)
81 - flat_angles_pcp = np.array([], dtype=np.float16)
82 - p = mp.Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=os.cpu_count())
83 - pbar = tqdm(total=len(os.listdir(path_to_3D_data)), desc="Scanning RNA chains", position=0, leave=True)
84 - try:
85 - nchains = 0
86 - for _, r in enumerate(p.imap_unordered(measure_in_chain, os.listdir(path_to_3D_data))):
87 - pbar.update(1)
88 - nchains += 1
89 - r_cp = np.hstack([r_cp, r[0]])
90 - r_pc = np.hstack([r_pc, r[1]])
91 - flat_angles_cpc = np.hstack([flat_angles_cpc, r[2]])
92 - flat_angles_pcp = np.hstack([flat_angles_pcp, r[3]])
93 - p.close()
94 - p.join()
95 - pbar.close()
96 - np.savez("measures.npz", c_p=r_cp, p_c=r_pc, c_p_c=flat_angles_pcp, p_c_p=flat_angles_pcp)
97 - except KeyboardInterrupt:
98 - print("Caught Ctrl-C, quitting")
99 - p.terminate()
100 - p.join()
101 - pbar.close()
102 - except Exception as e:
103 - print(e)
104 - p.terminate()
105 - p.join()
106 - pbar.close()
107 - np.savez("measures_incomplete.npz", c_p=r_cp, p_c=r_pc, c_p_c=flat_angles_pcp, p_c_p=flat_angles_pcp)
108 -
109 -
110 -if __name__ == "__main__":
111 - # Do the computations and save/reload the data
112 -
113 - # measure_all_dist_angles()
114 -
115 - d = np.load("measures.npz")
116 - c_p = d["c_p"]
117 - p_c = d["p_c"]
118 - p_c_p = d["p_c_p"]
119 - c_p_c = d["c_p_c"]
120 -
121 - # Plot stuff
122 - plt.figure(figsize=(6,4), dpi=300)
123 - plt.hist(c_p)
124 - plt.savefig("lengths.png")
125 - plt.close()
126 -
127 - # print(f"Final values: P->C1' is \033[32m{avg[0]/10:.3f} ± {avg[1]/10:.3f} nm\033[0m, "
128 - # f"C1'->P is \033[32m{avg[2]/10:.3f} ± {avg[3]/10:.3f} nm\033[0m, "
129 - # f"angles C-P-C \033[32m{avg[4]:.2f} ± {avg[5]:.2f}\033[0m and P-C-P \033[32m{avg[6]:.2f} ± {avg[7]:.2f}\033[0m")