Showing
4 changed files
with
0 additions
and
130 deletions
RNANet_1.db
deleted
100644 → 0
File mode changed
angles torsion.REMOVED.git-id
deleted
100644 → 0
1 | -f665319aeeca9246a8617b209cdf02646f547518 | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
errors.txt
deleted
100644 → 0
This diff could not be displayed because it is too large.
scripts/measure.py
deleted
100755 → 0
1 | -#!/usr/bin/python3.8 | ||
2 | - | ||
3 | -# usage : pass as an argument a folder containing .cif files of RNA chains, like those produced by RNANet: | ||
4 | -# usage : ./measure_bonds_and_angles.py ~/Data/RNA/3D/rna_only | ||
5 | -# OR | ||
6 | -# usage : ./measure_bonds_and_angles.py ~/Data/RNA/3D/rna_mapped_to_Rfam | ||
7 | - | ||
8 | -from Bio.PDB import MMCIFParser | ||
9 | -from Bio.PDB.vectors import Vector, calc_angle | ||
10 | -from sys import argv | ||
11 | -from tqdm import tqdm | ||
12 | -import multiprocessing as mp | ||
13 | -import matplotlib.pyplot as plt | ||
14 | -import numpy as np | ||
15 | -import os, signal | ||
16 | - | ||
17 | -def measure_in_chain(f): | ||
18 | - mmcif_parser = MMCIFParser() | ||
19 | - s = mmcif_parser.get_structure('null', os.path.abspath(path_to_3D_data + f)) | ||
20 | - chain = next(s[0].get_chains()) # Assume only one chain per .cif file | ||
21 | - | ||
22 | - c_to_p = [] | ||
23 | - p_to_c = [] | ||
24 | - c_p_c = [] | ||
25 | - p_c_p = [] | ||
26 | - last_p = None | ||
27 | - last_c = None | ||
28 | - nres = 0 | ||
29 | - for res in chain: | ||
30 | - nres += 1 | ||
31 | - | ||
32 | - # Get the new c1' and p atoms | ||
33 | - atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
34 | - atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] | ||
35 | - if len(atom_c1p) + len(atom_p) != 2: | ||
36 | - last_c = None | ||
37 | - last_p = None | ||
38 | - continue | ||
39 | - atom_c1p = Vector(atom_c1p[0]) | ||
40 | - atom_p = Vector(atom_p[0]) | ||
41 | - | ||
42 | - if last_c is not None: # There was a previous residue | ||
43 | - # Get the C1'(i-1) -> P distance | ||
44 | - c_to_p.append((last_c - atom_p).norm()) # the C1'(i-1) -> P bond of the theta angle | ||
45 | - | ||
46 | - # Get the C1'(i-1)-P(i)-C1'(i) flat angle | ||
47 | - c_p_c.append(calc_angle(last_c, atom_p, atom_c1p)) | ||
48 | - | ||
49 | - # Get the P(i-1)-C1'(i-1)-P(i) flat angle | ||
50 | - p_c_p.append(calc_angle(last_p, last_c, atom_p)) | ||
51 | - | ||
52 | - p_to_c.append((atom_c1p - atom_p).norm()) # the P -> C1' bond of the eta angle | ||
53 | - last_c = atom_c1p | ||
54 | - last_p = atom_p | ||
55 | - | ||
56 | - c_to_p = np.array(c_to_p, dtype=np.float16) | ||
57 | - p_to_c = np.array(p_to_c, dtype=np.float16) | ||
58 | - c_p_c = np.array(c_p_c, dtype=np.float16) | ||
59 | - p_c_p = np.array(p_c_p, dtype=np.float16) | ||
60 | - c_to_p = c_to_p[~np.isnan(c_to_p)] | ||
61 | - p_to_c = p_to_c[~np.isnan(p_to_c)] | ||
62 | - c_p_c = c_p_c[~np.isnan(c_p_c)] | ||
63 | - p_c_p = p_c_p[~np.isnan(p_c_p)] | ||
64 | - | ||
65 | - return (c_to_p, p_to_c, c_p_c, p_c_p) | ||
66 | - | ||
67 | -def init_worker(tqdm_lock=None): | ||
68 | - signal.signal(signal.SIGINT, signal.SIG_IGN) | ||
69 | - if tqdm_lock is not None: | ||
70 | - tqdm.set_lock(tqdm_lock) | ||
71 | - | ||
72 | -def measure_all_dist_angles(): | ||
73 | - path_to_3D_data = argv[1] | ||
74 | - | ||
75 | - if path_to_3D_data[-1] != '/': | ||
76 | - path_to_3D_data += '/' | ||
77 | - | ||
78 | - r_cp = np.array([], dtype=np.float16) | ||
79 | - r_pc = np.array([], dtype=np.float16) | ||
80 | - flat_angles_cpc = np.array([], dtype=np.float16) | ||
81 | - flat_angles_pcp = np.array([], dtype=np.float16) | ||
82 | - p = mp.Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=os.cpu_count()) | ||
83 | - pbar = tqdm(total=len(os.listdir(path_to_3D_data)), desc="Scanning RNA chains", position=0, leave=True) | ||
84 | - try: | ||
85 | - nchains = 0 | ||
86 | - for _, r in enumerate(p.imap_unordered(measure_in_chain, os.listdir(path_to_3D_data))): | ||
87 | - pbar.update(1) | ||
88 | - nchains += 1 | ||
89 | - r_cp = np.hstack([r_cp, r[0]]) | ||
90 | - r_pc = np.hstack([r_pc, r[1]]) | ||
91 | - flat_angles_cpc = np.hstack([flat_angles_cpc, r[2]]) | ||
92 | - flat_angles_pcp = np.hstack([flat_angles_pcp, r[3]]) | ||
93 | - p.close() | ||
94 | - p.join() | ||
95 | - pbar.close() | ||
96 | - np.savez("measures.npz", c_p=r_cp, p_c=r_pc, c_p_c=flat_angles_pcp, p_c_p=flat_angles_pcp) | ||
97 | - except KeyboardInterrupt: | ||
98 | - print("Caught Ctrl-C, quitting") | ||
99 | - p.terminate() | ||
100 | - p.join() | ||
101 | - pbar.close() | ||
102 | - except Exception as e: | ||
103 | - print(e) | ||
104 | - p.terminate() | ||
105 | - p.join() | ||
106 | - pbar.close() | ||
107 | - np.savez("measures_incomplete.npz", c_p=r_cp, p_c=r_pc, c_p_c=flat_angles_pcp, p_c_p=flat_angles_pcp) | ||
108 | - | ||
109 | - | ||
110 | -if __name__ == "__main__": | ||
111 | - # Do the computations and save/reload the data | ||
112 | - | ||
113 | - # measure_all_dist_angles() | ||
114 | - | ||
115 | - d = np.load("measures.npz") | ||
116 | - c_p = d["c_p"] | ||
117 | - p_c = d["p_c"] | ||
118 | - p_c_p = d["p_c_p"] | ||
119 | - c_p_c = d["c_p_c"] | ||
120 | - | ||
121 | - # Plot stuff | ||
122 | - plt.figure(figsize=(6,4), dpi=300) | ||
123 | - plt.hist(c_p) | ||
124 | - plt.savefig("lengths.png") | ||
125 | - plt.close() | ||
126 | - | ||
127 | - # print(f"Final values: P->C1' is \033[32m{avg[0]/10:.3f} ± {avg[1]/10:.3f} nm\033[0m, " | ||
128 | - # f"C1'->P is \033[32m{avg[2]/10:.3f} ± {avg[3]/10:.3f} nm\033[0m, " | ||
129 | - # f"angles C-P-C \033[32m{avg[4]:.2f} ± {avg[5]:.2f}\033[0m and P-C-P \033[32m{avg[6]:.2f} ± {avg[7]:.2f}\033[0m") |
-
Please register or login to post a comment