Aglaé TABOT

last modification for renumbering (issues with OP2)


Former-commit-id: 8c0c0b61
...@@ -321,8 +321,8 @@ class Chain: ...@@ -321,8 +321,8 @@ class Chain:
321 self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif" 321 self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif"
322 else: 322 else:
323 status = f"Extract {self.pdb_id}-{self.pdb_chain_id}" 323 status = f"Extract {self.pdb_id}-{self.pdb_chain_id}"
324 - self.file = path_to_3D_data+"renumbered_rna_only/"+self.chain_label+".cif" 324 + self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif"
325 - #self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif" 325 +
326 326
327 # Check if file exists, if yes, abort (do not recompute) 327 # Check if file exists, if yes, abort (do not recompute)
328 if os.path.exists(self.file): 328 if os.path.exists(self.file):
...@@ -405,7 +405,7 @@ class Chain: ...@@ -405,7 +405,7 @@ class Chain:
405 nt=nums.at[i, "nt_name"] 405 nt=nums.at[i, "nt_name"]
406 406
407 # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G) 407 # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G)
408 - if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' and resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : 408 + if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' or resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' :
409 res=chain[(' ', resseq, icode_res)] 409 res=chain[(' ', resseq, icode_res)]
410 else : #modified nucleotides (e.g. chain 5l4o_1_A) 410 else : #modified nucleotides (e.g. chain 5l4o_1_A)
411 het='H_' + nt 411 het='H_' + nt
...@@ -1521,14 +1521,10 @@ class Pipeline: ...@@ -1521,14 +1521,10 @@ class Pipeline:
1521 if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"): 1521 if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"):
1522 # for the portions mapped to Rfam 1522 # for the portions mapped to Rfam
1523 os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam") 1523 os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam")
1524 - ''' 1524 +
1525 if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"): 1525 if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"):
1526 # extract chains of pure RNA 1526 # extract chains of pure RNA
1527 os.makedirs(path_to_3D_data + "rna_only") 1527 os.makedirs(path_to_3D_data + "rna_only")
1528 - '''
1529 - if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "renumbered_rna_only"):
1530 - # extract chains of pure RNA
1531 - os.makedirs(path_to_3D_data + "renumbered_rna_only")
1532 1528
1533 # define and run jobs 1529 # define and run jobs
1534 joblist = [] 1530 joblist = []
......
1 +#/usr/bin/python3
2 +import json
3 +import os
4 +import numpy as np
5 +
6 +runDir = os.getcwd()
7 +
8 +def get_best(i):
9 + weights = [ float(x.strip("[]")) for x in i["weights"] ]
10 + means = [ float(x.strip("[]")) for x in i["means"] ]
11 + s = sorted(zip(weights, means), reverse=True)
12 + return s[0][1]
13 +
14 +def get_k(lw, bp):
15 + if lw == "cWW":
16 + if bp in ["GC", "CG"]:
17 + return 3.9
18 + if bp in ["AU", "UA"]:
19 + return 3.3
20 + if bp in ["GU", "UG"]:
21 + return 3.15
22 + return 2.4
23 + if lw == "tWW":
24 + return 2.4
25 + return 0.8
26 +
27 +if __name__ == "__main__":
28 + print("processing HRNA jsons...")
29 +
30 + lws = []
31 + for c in "ct":
32 + for nt1 in "WHS":
33 + for nt2 in "WHS":
34 + lws.append(c+nt1+nt2)
35 +
36 + bps = []
37 + for nt1 in "ACGU":
38 + for nt2 in "ACGU":
39 + bps.append(nt1+nt2)
40 +
41 + fullresults = dict()
42 + fullresults["A"] = dict()
43 + fullresults["C"] = dict()
44 + fullresults["G"] = dict()
45 + fullresults["U"] = dict()
46 + counts = dict()
47 + for lw in lws:
48 + counts[lw] = 0
49 + for bp in bps:
50 + fullresults[bp[0]][bp[1]] = []
51 +
52 + # open json file
53 + with open(runDir + f"/results/geometry/json/hirerna_{bp}_basepairs.json", "rb") as f:
54 + data = json.load(f)
55 +
56 + # consider each BP type
57 + for lw in lws:
58 + this = dict()
59 +
60 + # gather params
61 + distance = 0
62 + a1 = 0
63 + a2 = 0
64 + for i in data:
65 + if i["measure"] == f"Distance between {lw} {bp} tips":
66 + distance = np.round(get_best(i), 2)
67 + if i["measure"] == f"{lw}_{bp}_alpha_1":
68 + a1 = np.round(np.pi/180.0*get_best(i), 2)
69 + if i["measure"] == f"{lw}_{bp}_alpha_2":
70 + a2 = np.round(np.pi/180.0*get_best(i), 2)
71 +
72 + if distance == 0 and a1 == 0 and a2 == 0:
73 + # not found
74 + continue
75 +
76 + counts[lw] += 1
77 +
78 + # create entry
79 + this["rho"] = distance
80 + this["a1"] = a1
81 + this["a2"] = a2
82 + this["k"] = get_k(lw, bp)
83 + this["canonical"] = 1.0 if lw=="cWW" and bp in ["GC", "CG", "GU", "UG", "AU", "UA"] else 0.0
84 + this["LW"] = lw
85 +
86 + # store entry
87 + fullresults[bp[0]][bp[1]].append(this)
88 +
89 + with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f:
90 + json.dump(fullresults, f, indent=4)
This diff is collapsed. Click to expand it.