convert_hrna_jsons.py
2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#/usr/bin/python3
import json
import os
import numpy as np
runDir = os.getcwd()
def get_best(i):
weights = [ float(x.strip("[]")) for x in i["weights"] ]
means = [ float(x.strip("[]")) for x in i["means"] ]
s = sorted(zip(weights, means), reverse=True)
return s[0][1]
def get_k(lw, bp):
if lw == "cWW":
if bp in ["GC", "CG"]:
return 3.9
if bp in ["AU", "UA"]:
return 3.3
if bp in ["GU", "UG"]:
return 3.15
return 2.4
if lw == "tWW":
return 2.4
return 0.8
if __name__ == "__main__":
print("processing HRNA jsons...")
lws = []
for c in "ct":
for nt1 in "WHS":
for nt2 in "WHS":
lws.append(c+nt1+nt2)
bps = []
for nt1 in "ACGU":
for nt2 in "ACGU":
bps.append(nt1+nt2)
fullresults = dict()
fullresults["A"] = dict()
fullresults["C"] = dict()
fullresults["G"] = dict()
fullresults["U"] = dict()
counts = dict()
for lw in lws:
counts[lw] = 0
for bp in bps:
fullresults[bp[0]][bp[1]] = []
# open json file
with open(runDir + f"/results/geometry/json/hirerna_{bp}_basepairs.json", "rb") as f:
data = json.load(f)
# consider each BP type
for lw in lws:
this = dict()
# gather params
distance = 0
a1 = 0
a2 = 0
for i in data:
if i["measure"] == f"Distance between {lw} {bp} tips":
distance = np.round(get_best(i), 2)
if i["measure"] == f"{lw}_{bp}_alpha_1":
a1 = np.round(np.pi/180.0*get_best(i), 2)
if i["measure"] == f"{lw}_{bp}_alpha_2":
a2 = np.round(np.pi/180.0*get_best(i), 2)
if distance == 0 and a1 == 0 and a2 == 0:
# not found
continue
counts[lw] += 1
# create entry
this["rho"] = distance
this["a1"] = a1
this["a2"] = a2
this["k"] = get_k(lw, bp)
this["canonical"] = 1.0 if lw=="cWW" and bp in ["GC", "CG", "GU", "UG", "AU", "UA"] else 0.0
this["LW"] = lw
# store entry
fullresults[bp[0]][bp[1]].append(this)
with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f:
json.dump(fullresults, f, indent=4)