new script to prepare MOARNA basepair parameters

Former-commit-id: 2979e486

new script to prepare MOARNA basepair parameters
Former-commit-id: 2979e486
Louis BECQUEY
Commit e0fd0ceaa8564ddc0b64d072a7c56ff8efb5cfb7 e0fd0cea 1 parent 3a74c203
Showing 2 changed files with 95 additions and 3 deletions
scripts/convert_hrna_jsons.py
statistics.py
--- a/scripts/convert_hrna_jsons.py 0 → 100644
View file @e0fd0ce
+++ b/scripts/convert_hrna_jsons.py 0 → 100644
View file @e0fd0ce
+ #/usr/bin/python3
+ import json
+ import os
+ import numpy as np
+ 
+ runDir = os.getcwd()
+ 
+ def get_best(i):
+     weights = [ float(x.strip("[]")) for x in i["weights"] ]
+     means = [ float(x.strip("[]")) for x in i["means"] ]
+     s = sorted(zip(weights, means), reverse=True)
+     return s[0][1]
+ 
+ def get_k(lw, bp):
+     if lw == "cWW":
+         if bp in ["GC", "CG"]:
+             return 3.9
+         if bp in ["AU", "UA"]:
+             return 3.3
+         if bp in ["GU", "UG"]:
+             return 3.15
+         return 2.4
+     if lw == "tWW":
+         return 2.4
+     return 0.8
+ 
+ if __name__ == "__main__":
+     print("processing HRNA jsons...")
+ 
+     lws = []
+     for c in "ct":
+         for nt1 in "WHS":
+             for nt2 in "WHS":
+                 lws.append(c+nt1+nt2)
+ 
+     bps = []
+     for nt1 in "ACGU":
+         for nt2 in "ACGU":
+             bps.append(nt1+nt2)
+     
+     fullresults = dict()
+     fullresults["A"] = dict()
+     fullresults["C"] = dict()
+     fullresults["G"] = dict()
+     fullresults["U"] = dict()
+     counts = dict()
+     for lw in lws:
+         counts[lw] = 0
+     for bp in bps:
+         fullresults[bp[0]][bp[1]] = []
+ 
+         # open json file
+         with open(runDir + f"/results/geometry/json/hirerna_{bp}_basepairs.json", "rb") as f:
+             data = json.load(f)
+         
+         # consider each BP type
+         for lw in lws:
+             this = dict()
+ 
+             # gather params
+             distance = 0
+             a1 = 0
+             a2 = 0
+             for i in data:
+                 if i["measure"] == f"Distance between {lw} {bp} tips":
+                     distance = np.round(get_best(i), 2)
+                 if i["measure"] == f"{lw}_{bp}_alpha_1":
+                     a1 = np.round(np.pi/180.0*get_best(i), 2)
+                 if i["measure"] == f"{lw}_{bp}_alpha_2":
+                     a2 = np.round(np.pi/180.0*get_best(i), 2)
+ 
+             if distance == 0 and a1 == 0 and a2 == 0:
+                 # not found
+                 continue
+             
+             counts[lw] += 1
+ 
+             # create entry
+             this["rho"] = distance
+             this["a1"] = a1
+             this["a2"] = a2
+             this["k"] = get_k(lw, bp)
+             this["canonical"] = 1.0 if lw=="cWW" and bp in ["GC", "CG", "GU", "UG", "AU", "UA"] else 0.0
+             this["LW"] = lw
+ 
+             # store entry
+             fullresults[bp[0]][bp[1]].append(this)
+ 
+     with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f:
+         json.dump(fullresults, f, indent=None)
--- a/statistics.py
View file @e0fd0ce
+++ b/statistics.py
View file @e0fd0ce
@@ -2026,9 +2026,9 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) 
         mean = means[i]
         sigma = np.sqrt(covariances[i])
         weight = weights[i]
-         summary_data["means"].append(str(mean))
-         summary_data["std"].append(str(sigma))
-         summary_data["weights"].append(str(weight))
+         summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]"))))
+         summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]"))))
+         summary_data["weights"].append("{:.2f}".format(str(weight).strip("[]")))
 
         # compute the right x and y data to plot
         y = weight*st.norm.pdf(x, mean, sigma)
@@ -2730,6 +2730,8 @@ def process_jobs(joblist):
         print("Something went wrong")
 
 if __name__ == "__main__":
+     merge_jsons()
+     exit(0)
     os.makedirs(runDir + "/results/figures/", exist_ok=True)
 
     # parse options