Louis BECQUEY

new script to prepare MOARNA basepair parameters

1 +#/usr/bin/python3
2 +import json
3 +import os
4 +import numpy as np
5 +
6 +runDir = os.getcwd()
7 +
8 +def get_best(i):
9 + weights = [ float(x.strip("[]")) for x in i["weights"] ]
10 + means = [ float(x.strip("[]")) for x in i["means"] ]
11 + s = sorted(zip(weights, means), reverse=True)
12 + return s[0][1]
13 +
14 +def get_k(lw, bp):
15 + if lw == "cWW":
16 + if bp in ["GC", "CG"]:
17 + return 3.9
18 + if bp in ["AU", "UA"]:
19 + return 3.3
20 + if bp in ["GU", "UG"]:
21 + return 3.15
22 + return 2.4
23 + if lw == "tWW":
24 + return 2.4
25 + return 0.8
26 +
27 +if __name__ == "__main__":
28 + print("processing HRNA jsons...")
29 +
30 + lws = []
31 + for c in "ct":
32 + for nt1 in "WHS":
33 + for nt2 in "WHS":
34 + lws.append(c+nt1+nt2)
35 +
36 + bps = []
37 + for nt1 in "ACGU":
38 + for nt2 in "ACGU":
39 + bps.append(nt1+nt2)
40 +
41 + fullresults = dict()
42 + fullresults["A"] = dict()
43 + fullresults["C"] = dict()
44 + fullresults["G"] = dict()
45 + fullresults["U"] = dict()
46 + counts = dict()
47 + for lw in lws:
48 + counts[lw] = 0
49 + for bp in bps:
50 + fullresults[bp[0]][bp[1]] = []
51 +
52 + # open json file
53 + with open(runDir + f"/results/geometry/json/hirerna_{bp}_basepairs.json", "rb") as f:
54 + data = json.load(f)
55 +
56 + # consider each BP type
57 + for lw in lws:
58 + this = dict()
59 +
60 + # gather params
61 + distance = 0
62 + a1 = 0
63 + a2 = 0
64 + for i in data:
65 + if i["measure"] == f"Distance between {lw} {bp} tips":
66 + distance = np.round(get_best(i), 2)
67 + if i["measure"] == f"{lw}_{bp}_alpha_1":
68 + a1 = np.round(np.pi/180.0*get_best(i), 2)
69 + if i["measure"] == f"{lw}_{bp}_alpha_2":
70 + a2 = np.round(np.pi/180.0*get_best(i), 2)
71 +
72 + if distance == 0 and a1 == 0 and a2 == 0:
73 + # not found
74 + continue
75 +
76 + counts[lw] += 1
77 +
78 + # create entry
79 + this["rho"] = distance
80 + this["a1"] = a1
81 + this["a2"] = a2
82 + this["k"] = get_k(lw, bp)
83 + this["canonical"] = 1.0 if lw=="cWW" and bp in ["GC", "CG", "GU", "UG", "AU", "UA"] else 0.0
84 + this["LW"] = lw
85 +
86 + # store entry
87 + fullresults[bp[0]][bp[1]].append(this)
88 +
89 + with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f:
90 + json.dump(fullresults, f, indent=None)
...@@ -2026,9 +2026,9 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) ...@@ -2026,9 +2026,9 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True)
2026 mean = means[i] 2026 mean = means[i]
2027 sigma = np.sqrt(covariances[i]) 2027 sigma = np.sqrt(covariances[i])
2028 weight = weights[i] 2028 weight = weights[i]
2029 - summary_data["means"].append(str(mean)) 2029 + summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]"))))
2030 - summary_data["std"].append(str(sigma)) 2030 + summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]"))))
2031 - summary_data["weights"].append(str(weight)) 2031 + summary_data["weights"].append("{:.2f}".format(str(weight).strip("[]")))
2032 2032
2033 # compute the right x and y data to plot 2033 # compute the right x and y data to plot
2034 y = weight*st.norm.pdf(x, mean, sigma) 2034 y = weight*st.norm.pdf(x, mean, sigma)
...@@ -2730,6 +2730,8 @@ def process_jobs(joblist): ...@@ -2730,6 +2730,8 @@ def process_jobs(joblist):
2730 print("Something went wrong") 2730 print("Something went wrong")
2731 2731
2732 if __name__ == "__main__": 2732 if __name__ == "__main__":
2733 + merge_jsons()
2734 + exit(0)
2733 os.makedirs(runDir + "/results/figures/", exist_ok=True) 2735 os.makedirs(runDir + "/results/figures/", exist_ok=True)
2734 2736
2735 # parse options 2737 # parse options
......