Showing
5 changed files
with
180 additions
and
3 deletions
... | @@ -1206,7 +1206,7 @@ class Pipeline: | ... | @@ -1206,7 +1206,7 @@ class Pipeline: |
1206 | self.known_issues = [ x[:-1] for x in issues.readlines() if not '-' in x ] | 1206 | self.known_issues = [ x[:-1] for x in issues.readlines() if not '-' in x ] |
1207 | if self.USE_KNOWN_ISSUES: | 1207 | if self.USE_KNOWN_ISSUES: |
1208 | print("\t> Ignoring known issues:") | 1208 | print("\t> Ignoring known issues:") |
1209 | - print(" ".join(self.known_issues") | 1209 | + print(" ".join(self.known_issues)) |
1210 | 1210 | ||
1211 | if self.HOMOLOGY: | 1211 | if self.HOMOLOGY: |
1212 | # Ask Rfam if some are mapped to Rfam families | 1212 | # Ask Rfam if some are mapped to Rfam families |
... | @@ -1263,8 +1263,7 @@ class Pipeline: | ... | @@ -1263,8 +1263,7 @@ class Pipeline: |
1263 | conn.close() | 1263 | conn.close() |
1264 | 1264 | ||
1265 | if self.SELECT_ONLY is not None: | 1265 | if self.SELECT_ONLY is not None: |
1266 | - self.update = [ | 1266 | + self.update = [ c for c in self.update if c.chain_label == self.SELECT_ONLY ] |
1267 | - c for c in self.update if c.chain_label == self.SELECT_ONLY] | ||
1268 | 1267 | ||
1269 | self.n_chains = len(self.update) | 1268 | self.n_chains = len(self.update) |
1270 | print(str(self.n_chains) + " RNA chains of interest.") | 1269 | print(str(self.n_chains) + " RNA chains of interest.") | ... | ... |
... | @@ -1611,3 +1611,15 @@ | ... | @@ -1611,3 +1611,15 @@ |
1611 | 7jz0_1_F | 1611 | 7jz0_1_F |
1612 | 7k00_1_5 | 1612 | 7k00_1_5 |
1613 | 7k00_1_B | 1613 | 7k00_1_B |
1614 | +1qzb_1_B_1-73 | ||
1615 | +1qza_1_B_1-73 | ||
1616 | +5zzm_1_M_3-118 | ||
1617 | +5zzm_1_N_1-2904 | ||
1618 | +3dg2_1_B_1-2904 | ||
1619 | +3dg0_1_B_1-2904 | ||
1620 | +3dg4_1_B_1-2904 | ||
1621 | +3dg5_1_B_1-2904 | ||
1622 | +3dg2_1_A_1-1542 | ||
1623 | +3dg0_1_A_1-1542 | ||
1624 | +3dg4_1_A_1-1542 | ||
1625 | +3dg5_1_A_1-1542 | ... | ... |
... | @@ -4837,3 +4837,39 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4837,3 +4837,39 @@ Sequence is too short. (< 5 resolved nts) |
4837 | 5hjz_1_C | 4837 | 5hjz_1_C |
4838 | Sequence is too short. (< 5 resolved nts) | 4838 | Sequence is too short. (< 5 resolved nts) |
4839 | 4839 | ||
4840 | +1qzb_1_B_1-73 | ||
4841 | +DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B_1-73. | ||
4842 | + | ||
4843 | +1qza_1_B_1-73 | ||
4844 | +DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B_1-73. | ||
4845 | + | ||
4846 | +5zzm_1_M_3-118 | ||
4847 | +DSSR warning 5zzm.json: no nucleotides found. Ignoring 5zzm_1_M_3-118. | ||
4848 | + | ||
4849 | +5zzm_1_N_1-2904 | ||
4850 | +DSSR warning 5zzm.json: no nucleotides found. Ignoring 5zzm_1_N_1-2904. | ||
4851 | + | ||
4852 | +3dg2_1_B_1-2904 | ||
4853 | +DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_B_1-2904. | ||
4854 | + | ||
4855 | +3dg0_1_B_1-2904 | ||
4856 | +DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_B_1-2904. | ||
4857 | + | ||
4858 | +3dg4_1_B_1-2904 | ||
4859 | +DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_B_1-2904. | ||
4860 | + | ||
4861 | +3dg5_1_B_1-2904 | ||
4862 | +DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B_1-2904. | ||
4863 | + | ||
4864 | +3dg2_1_A_1-1542 | ||
4865 | +DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A_1-1542. | ||
4866 | + | ||
4867 | +3dg0_1_A_1-1542 | ||
4868 | +DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A_1-1542. | ||
4869 | + | ||
4870 | +3dg4_1_A_1-1542 | ||
4871 | +DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A_1-1542. | ||
4872 | + | ||
4873 | +3dg5_1_A_1-1542 | ||
4874 | +DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A_1-1542. | ||
4875 | + | ... | ... |
scripts/measure.py
0 → 100755
1 | +#!/usr/bin/python3.8 | ||
2 | + | ||
3 | +# usage : pass as an argument a folder containing .cif files of RNA chains, like those produced by RNANet: | ||
4 | +# usage : ./measure_bonds_and_angles.py ~/Data/RNA/3D/rna_only | ||
5 | +# OR | ||
6 | +# usage : ./measure_bonds_and_angles.py ~/Data/RNA/3D/rna_mapped_to_Rfam | ||
7 | + | ||
8 | +from Bio.PDB import MMCIFParser | ||
9 | +from Bio.PDB.vectors import Vector, calc_angle | ||
10 | +from sys import argv | ||
11 | +from tqdm import tqdm | ||
12 | +import multiprocessing as mp | ||
13 | +import matplotlib.pyplot as plt | ||
14 | +import numpy as np | ||
15 | +import os, signal | ||
16 | + | ||
17 | +def measure_in_chain(f): | ||
18 | + mmcif_parser = MMCIFParser() | ||
19 | + s = mmcif_parser.get_structure('null', os.path.abspath(path_to_3D_data + f)) | ||
20 | + chain = next(s[0].get_chains()) # Assume only one chain per .cif file | ||
21 | + | ||
22 | + c_to_p = [] | ||
23 | + p_to_c = [] | ||
24 | + c_p_c = [] | ||
25 | + p_c_p = [] | ||
26 | + last_p = None | ||
27 | + last_c = None | ||
28 | + nres = 0 | ||
29 | + for res in chain: | ||
30 | + nres += 1 | ||
31 | + | ||
32 | + # Get the new c1' and p atoms | ||
33 | + atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
34 | + atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] | ||
35 | + if len(atom_c1p) + len(atom_p) != 2: | ||
36 | + last_c = None | ||
37 | + last_p = None | ||
38 | + continue | ||
39 | + atom_c1p = Vector(atom_c1p[0]) | ||
40 | + atom_p = Vector(atom_p[0]) | ||
41 | + | ||
42 | + if last_c is not None: # There was a previous residue | ||
43 | + # Get the C1'(i-1) -> P distance | ||
44 | + c_to_p.append((last_c - atom_p).norm()) # the C1'(i-1) -> P bond of the theta angle | ||
45 | + | ||
46 | + # Get the C1'(i-1)-P(i)-C1'(i) flat angle | ||
47 | + c_p_c.append(calc_angle(last_c, atom_p, atom_c1p)) | ||
48 | + | ||
49 | + # Get the P(i-1)-C1'(i-1)-P(i) flat angle | ||
50 | + p_c_p.append(calc_angle(last_p, last_c, atom_p)) | ||
51 | + | ||
52 | + p_to_c.append((atom_c1p - atom_p).norm()) # the P -> C1' bond of the eta angle | ||
53 | + last_c = atom_c1p | ||
54 | + last_p = atom_p | ||
55 | + | ||
56 | + c_to_p = np.array(c_to_p, dtype=np.float16) | ||
57 | + p_to_c = np.array(p_to_c, dtype=np.float16) | ||
58 | + c_p_c = np.array(c_p_c, dtype=np.float16) | ||
59 | + p_c_p = np.array(p_c_p, dtype=np.float16) | ||
60 | + c_to_p = c_to_p[~np.isnan(c_to_p)] | ||
61 | + p_to_c = p_to_c[~np.isnan(p_to_c)] | ||
62 | + c_p_c = c_p_c[~np.isnan(c_p_c)] | ||
63 | + p_c_p = p_c_p[~np.isnan(p_c_p)] | ||
64 | + | ||
65 | + return (c_to_p, p_to_c, c_p_c, p_c_p) | ||
66 | + | ||
67 | +def init_worker(tqdm_lock=None): | ||
68 | + signal.signal(signal.SIGINT, signal.SIG_IGN) | ||
69 | + if tqdm_lock is not None: | ||
70 | + tqdm.set_lock(tqdm_lock) | ||
71 | + | ||
72 | +def measure_all_dist_angles(): | ||
73 | + path_to_3D_data = argv[1] | ||
74 | + | ||
75 | + if path_to_3D_data[-1] != '/': | ||
76 | + path_to_3D_data += '/' | ||
77 | + | ||
78 | + r_cp = np.array([], dtype=np.float16) | ||
79 | + r_pc = np.array([], dtype=np.float16) | ||
80 | + flat_angles_cpc = np.array([], dtype=np.float16) | ||
81 | + flat_angles_pcp = np.array([], dtype=np.float16) | ||
82 | + p = mp.Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=os.cpu_count()) | ||
83 | + pbar = tqdm(total=len(os.listdir(path_to_3D_data)), desc="Scanning RNA chains", position=0, leave=True) | ||
84 | + try: | ||
85 | + nchains = 0 | ||
86 | + for _, r in enumerate(p.imap_unordered(measure_in_chain, os.listdir(path_to_3D_data))): | ||
87 | + pbar.update(1) | ||
88 | + nchains += 1 | ||
89 | + r_cp = np.hstack([r_cp, r[0]]) | ||
90 | + r_pc = np.hstack([r_pc, r[1]]) | ||
91 | + flat_angles_cpc = np.hstack([flat_angles_cpc, r[2]]) | ||
92 | + flat_angles_pcp = np.hstack([flat_angles_pcp, r[3]]) | ||
93 | + p.close() | ||
94 | + p.join() | ||
95 | + pbar.close() | ||
96 | + np.savez("measures.npz", c_p=r_cp, p_c=r_pc, c_p_c=flat_angles_pcp, p_c_p=flat_angles_pcp) | ||
97 | + except KeyboardInterrupt: | ||
98 | + print("Caught Ctrl-C, quitting") | ||
99 | + p.terminate() | ||
100 | + p.join() | ||
101 | + pbar.close() | ||
102 | + except Exception as e: | ||
103 | + print(e) | ||
104 | + p.terminate() | ||
105 | + p.join() | ||
106 | + pbar.close() | ||
107 | + np.savez("measures_incomplete.npz", c_p=r_cp, p_c=r_pc, c_p_c=flat_angles_pcp, p_c_p=flat_angles_pcp) | ||
108 | + | ||
109 | + | ||
110 | +if __name__ == "__main__": | ||
111 | + # Do the computations and save/reload the data | ||
112 | + | ||
113 | + # measure_all_dist_angles() | ||
114 | + | ||
115 | + d = np.load("measures.npz") | ||
116 | + c_p = d["c_p"] | ||
117 | + p_c = d["p_c"] | ||
118 | + p_c_p = d["p_c_p"] | ||
119 | + c_p_c = d["c_p_c"] | ||
120 | + | ||
121 | + # Plot stuff | ||
122 | + plt.figure(figsize=(6,4), dpi=300) | ||
123 | + plt.hist(c_p) | ||
124 | + plt.savefig("lengths.png") | ||
125 | + plt.close() | ||
126 | + | ||
127 | + # print(f"Final values: P->C1' is \033[32m{avg[0]/10:.3f} ± {avg[1]/10:.3f} nm\033[0m, " | ||
128 | + # f"C1'->P is \033[32m{avg[2]/10:.3f} ± {avg[3]/10:.3f} nm\033[0m, " | ||
129 | + # f"angles C-P-C \033[32m{avg[4]:.2f} ± {avg[5]:.2f}\033[0m and P-C-P \033[32m{avg[6]:.2f} ± {avg[7]:.2f}\033[0m") |
-
Please register or login to post a comment