last modification for renumbering (issues with OP2)
Former-commit-id: 8c0c0b61
Showing
3 changed files
with
94 additions
and
8 deletions
... | @@ -321,8 +321,8 @@ class Chain: | ... | @@ -321,8 +321,8 @@ class Chain: |
321 | self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif" | 321 | self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif" |
322 | else: | 322 | else: |
323 | status = f"Extract {self.pdb_id}-{self.pdb_chain_id}" | 323 | status = f"Extract {self.pdb_id}-{self.pdb_chain_id}" |
324 | - self.file = path_to_3D_data+"renumbered_rna_only/"+self.chain_label+".cif" | 324 | + self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif" |
325 | - #self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif" | 325 | + |
326 | 326 | ||
327 | # Check if file exists, if yes, abort (do not recompute) | 327 | # Check if file exists, if yes, abort (do not recompute) |
328 | if os.path.exists(self.file): | 328 | if os.path.exists(self.file): |
... | @@ -405,7 +405,7 @@ class Chain: | ... | @@ -405,7 +405,7 @@ class Chain: |
405 | nt=nums.at[i, "nt_name"] | 405 | nt=nums.at[i, "nt_name"] |
406 | 406 | ||
407 | # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G) | 407 | # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G) |
408 | - if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' and resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : | 408 | + if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' or resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : |
409 | res=chain[(' ', resseq, icode_res)] | 409 | res=chain[(' ', resseq, icode_res)] |
410 | else : #modified nucleotides (e.g. chain 5l4o_1_A) | 410 | else : #modified nucleotides (e.g. chain 5l4o_1_A) |
411 | het='H_' + nt | 411 | het='H_' + nt |
... | @@ -1521,14 +1521,10 @@ class Pipeline: | ... | @@ -1521,14 +1521,10 @@ class Pipeline: |
1521 | if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"): | 1521 | if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"): |
1522 | # for the portions mapped to Rfam | 1522 | # for the portions mapped to Rfam |
1523 | os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam") | 1523 | os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam") |
1524 | - ''' | 1524 | + |
1525 | if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"): | 1525 | if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"): |
1526 | # extract chains of pure RNA | 1526 | # extract chains of pure RNA |
1527 | os.makedirs(path_to_3D_data + "rna_only") | 1527 | os.makedirs(path_to_3D_data + "rna_only") |
1528 | - ''' | ||
1529 | - if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "renumbered_rna_only"): | ||
1530 | - # extract chains of pure RNA | ||
1531 | - os.makedirs(path_to_3D_data + "renumbered_rna_only") | ||
1532 | 1528 | ||
1533 | # define and run jobs | 1529 | # define and run jobs |
1534 | joblist = [] | 1530 | joblist = [] | ... | ... |
scripts/convert_hrna_jsons.py
0 → 100644
1 | +#/usr/bin/python3 | ||
2 | +import json | ||
3 | +import os | ||
4 | +import numpy as np | ||
5 | + | ||
6 | +runDir = os.getcwd() | ||
7 | + | ||
8 | +def get_best(i): | ||
9 | + weights = [ float(x.strip("[]")) for x in i["weights"] ] | ||
10 | + means = [ float(x.strip("[]")) for x in i["means"] ] | ||
11 | + s = sorted(zip(weights, means), reverse=True) | ||
12 | + return s[0][1] | ||
13 | + | ||
14 | +def get_k(lw, bp): | ||
15 | + if lw == "cWW": | ||
16 | + if bp in ["GC", "CG"]: | ||
17 | + return 3.9 | ||
18 | + if bp in ["AU", "UA"]: | ||
19 | + return 3.3 | ||
20 | + if bp in ["GU", "UG"]: | ||
21 | + return 3.15 | ||
22 | + return 2.4 | ||
23 | + if lw == "tWW": | ||
24 | + return 2.4 | ||
25 | + return 0.8 | ||
26 | + | ||
27 | +if __name__ == "__main__": | ||
28 | + print("processing HRNA jsons...") | ||
29 | + | ||
30 | + lws = [] | ||
31 | + for c in "ct": | ||
32 | + for nt1 in "WHS": | ||
33 | + for nt2 in "WHS": | ||
34 | + lws.append(c+nt1+nt2) | ||
35 | + | ||
36 | + bps = [] | ||
37 | + for nt1 in "ACGU": | ||
38 | + for nt2 in "ACGU": | ||
39 | + bps.append(nt1+nt2) | ||
40 | + | ||
41 | + fullresults = dict() | ||
42 | + fullresults["A"] = dict() | ||
43 | + fullresults["C"] = dict() | ||
44 | + fullresults["G"] = dict() | ||
45 | + fullresults["U"] = dict() | ||
46 | + counts = dict() | ||
47 | + for lw in lws: | ||
48 | + counts[lw] = 0 | ||
49 | + for bp in bps: | ||
50 | + fullresults[bp[0]][bp[1]] = [] | ||
51 | + | ||
52 | + # open json file | ||
53 | + with open(runDir + f"/results/geometry/json/hirerna_{bp}_basepairs.json", "rb") as f: | ||
54 | + data = json.load(f) | ||
55 | + | ||
56 | + # consider each BP type | ||
57 | + for lw in lws: | ||
58 | + this = dict() | ||
59 | + | ||
60 | + # gather params | ||
61 | + distance = 0 | ||
62 | + a1 = 0 | ||
63 | + a2 = 0 | ||
64 | + for i in data: | ||
65 | + if i["measure"] == f"Distance between {lw} {bp} tips": | ||
66 | + distance = np.round(get_best(i), 2) | ||
67 | + if i["measure"] == f"{lw}_{bp}_alpha_1": | ||
68 | + a1 = np.round(np.pi/180.0*get_best(i), 2) | ||
69 | + if i["measure"] == f"{lw}_{bp}_alpha_2": | ||
70 | + a2 = np.round(np.pi/180.0*get_best(i), 2) | ||
71 | + | ||
72 | + if distance == 0 and a1 == 0 and a2 == 0: | ||
73 | + # not found | ||
74 | + continue | ||
75 | + | ||
76 | + counts[lw] += 1 | ||
77 | + | ||
78 | + # create entry | ||
79 | + this["rho"] = distance | ||
80 | + this["a1"] = a1 | ||
81 | + this["a2"] = a2 | ||
82 | + this["k"] = get_k(lw, bp) | ||
83 | + this["canonical"] = 1.0 if lw=="cWW" and bp in ["GC", "CG", "GU", "UG", "AU", "UA"] else 0.0 | ||
84 | + this["LW"] = lw | ||
85 | + | ||
86 | + # store entry | ||
87 | + fullresults[bp[0]][bp[1]].append(this) | ||
88 | + | ||
89 | + with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f: | ||
90 | + json.dump(fullresults, f, indent=4) |
This diff is collapsed. Click to expand it.
-
Please register or login to post a comment