Louis BECQUEY

working basepair angles measures

...@@ -87,4 +87,4 @@ if __name__ == "__main__": ...@@ -87,4 +87,4 @@ if __name__ == "__main__":
87 fullresults[bp[0]][bp[1]].append(this) 87 fullresults[bp[0]][bp[1]].append(this)
88 88
89 with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f: 89 with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f:
90 - json.dump(fullresults, f, indent=None) 90 + json.dump(fullresults, f, indent=4)
......
...@@ -279,6 +279,7 @@ def stats_len(): ...@@ -279,6 +279,7 @@ def stats_len():
279 ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5)) 279 ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5))
280 280
281 # Save the figure 281 # Save the figure
282 +
282 fig.savefig(runDir + f"/results/figures/lengths_{res_thr}A.png") 283 fig.savefig(runDir + f"/results/figures/lengths_{res_thr}A.png")
283 idxQueue.put(thr_idx) # replace the thread index in the queue 284 idxQueue.put(thr_idx) # replace the thread index in the queue
284 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") 285 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
...@@ -1457,12 +1458,15 @@ def basepair_measures(res, pair): ...@@ -1457,12 +1458,15 @@ def basepair_measures(res, pair):
1457 # Measure tau, the dihedral 1458 # Measure tau, the dihedral
1458 u = (res_12**rho).normalized() 1459 u = (res_12**rho).normalized()
1459 v = (rho**pair_12).normalized() 1460 v = (rho**pair_12).normalized()
1460 - w1 = n1**u
1461 - w2 = v**n2
1462 cosTau1 = n1*u 1461 cosTau1 = n1*u
1463 - cosTau2 = v*n2 1462 + cosTau2 = v*n2
1463 +
1464 + # cosTau is enough to compute alpha, but we can't distinguish
1465 + # yet betwwen tau and -tau. If the full computation if required, then:
1464 tau1 = np.arccos(cosTau1)*(180/np.pi) 1466 tau1 = np.arccos(cosTau1)*(180/np.pi)
1465 tau2 = np.arccos(cosTau2)*(180/np.pi) 1467 tau2 = np.arccos(cosTau2)*(180/np.pi)
1468 + w1 = u**n1
1469 + w2 = v**n2
1466 if res_12*w1 < 0: 1470 if res_12*w1 < 0:
1467 tau1 = -tau1 1471 tau1 = -tau1
1468 if pair_12*w2 < 0: 1472 if pair_12*w2 < 0:
...@@ -1471,11 +1475,12 @@ def basepair_measures(res, pair): ...@@ -1471,11 +1475,12 @@ def basepair_measures(res, pair):
1471 # And finally, the a1 and a2 angles between res_12 and p1 / pair_12 and p2 1475 # And finally, the a1 and a2 angles between res_12 and p1 / pair_12 and p2
1472 with warnings.catch_warnings(): 1476 with warnings.catch_warnings():
1473 warnings.simplefilter('ignore', RuntimeWarning) 1477 warnings.simplefilter('ignore', RuntimeWarning)
1474 - a1 = res_12.angle(p1)*(180/np.pi) 1478 + a1 = (-res_12).angle(p1)*(180/np.pi)
1475 - a2 = pair_12.angle(p2)*(180/np.pi) 1479 + a2 = (-pair_12).angle(p2)*(180/np.pi)
1476 - if cosTau1 < 0: 1480 + if cosTau1 > 0:
1481 + # CosTau > 0 (Tau < 90 or Tau > 270) implies that alpha > 180.
1477 a1 = -a1 1482 a1 = -a1
1478 - if cosTau2 < 0: 1483 + if cosTau2 > 0:
1479 a2 = -a2 1484 a2 = -a2
1480 1485
1481 return [dist, b, c, d1, d2, a1, a2, tau1, tau2] 1486 return [dist, b, c, d1, d2, a1, a2, tau1, tau2]
...@@ -1518,8 +1523,8 @@ def measures_wadley(name, s, thr_idx): ...@@ -1518,8 +1523,8 @@ def measures_wadley(name, s, thr_idx):
1518 """ 1523 """
1519 1524
1520 # do not recompute something already computed 1525 # do not recompute something already computed
1521 - if (path.isfile(runDir + '/results/geometry/Pyle/angles/flat_angles_pyle ' + name + '.csv') and 1526 + if (path.isfile(runDir + '/results/geometry/Pyle/angles/flat_angles_pyle_' + name + '.csv') and
1522 - path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")): 1527 + path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")):
1523 return 1528 return
1524 1529
1525 liste_dist = [] 1530 liste_dist = []
...@@ -1558,9 +1563,9 @@ def measures_wadley(name, s, thr_idx): ...@@ -1558,9 +1563,9 @@ def measures_wadley(name, s, thr_idx):
1558 liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv]) 1563 liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv])
1559 1564
1560 df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"]) 1565 df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"])
1561 - df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv") 1566 + df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")
1562 df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"]) 1567 df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"])
1563 - df.to_csv(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle "+name+".csv") 1568 + df.to_csv(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle_"+name+".csv")
1564 1569
1565 @trace_unhandled_exceptions 1570 @trace_unhandled_exceptions
1566 def measures_aa(name, s, thr_idx): 1571 def measures_aa(name, s, thr_idx):
...@@ -1841,7 +1846,7 @@ def measures_hrna_basepairs(name, s, thr_idx): ...@@ -1841,7 +1846,7 @@ def measures_hrna_basepairs(name, s, thr_idx):
1841 chain = next(s[0].get_chains()) 1846 chain = next(s[0].get_chains())
1842 1847
1843 # do not recompute something already computed 1848 # do not recompute something already computed
1844 - if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs "+name+".csv"): 1849 + if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs_"+name+".csv"):
1845 return 1850 return
1846 1851
1847 df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name)) 1852 df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name))
...@@ -1850,7 +1855,7 @@ def measures_hrna_basepairs(name, s, thr_idx): ...@@ -1850,7 +1855,7 @@ def measures_hrna_basepairs(name, s, thr_idx):
1850 l = measures_hrna_basepairs_chain(name, chain, df, thr_idx) 1855 l = measures_hrna_basepairs_chain(name, chain, df, thr_idx)
1851 df_calc = pd.DataFrame(l, columns=["type_LW", "nt1_idx", "nt1_res", "nt2_idx", "nt2_res", "Distance", 1856 df_calc = pd.DataFrame(l, columns=["type_LW", "nt1_idx", "nt1_res", "nt2_idx", "nt2_res", "Distance",
1852 "211_angle", "112_angle", "dB1", "dB2", "alpha1", "alpha2", "3211_torsion", "1123_torsion"]) 1857 "211_angle", "112_angle", "dB1", "dB2", "alpha1", "alpha2", "3211_torsion", "1123_torsion"])
1853 - df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs ' + name + '.csv', float_format="%.3f") 1858 + df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs_' + name + '.csv', float_format="%.3f")
1854 1859
1855 @trace_unhandled_exceptions 1860 @trace_unhandled_exceptions
1856 def measures_hrna_basepairs_chain(name, chain, df, thr_idx): 1861 def measures_hrna_basepairs_chain(name, chain, df, thr_idx):
...@@ -2028,7 +2033,7 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) ...@@ -2028,7 +2033,7 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True)
2028 weight = weights[i] 2033 weight = weights[i]
2029 summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]")))) 2034 summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]"))))
2030 summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]")))) 2035 summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]"))))
2031 - summary_data["weights"].append("{:.2f}".format(str(weight).strip("[]"))) 2036 + summary_data["weights"].append("{:.2f}".format(float(str(weight).strip("[]"))))
2032 2037
2033 # compute the right x and y data to plot 2038 # compute the right x and y data to plot
2034 y = weight*st.norm.pdf(x, mean, sigma) 2039 y = weight*st.norm.pdf(x, mean, sigma)
...@@ -2335,11 +2340,11 @@ def gmm_wadley(): ...@@ -2335,11 +2340,11 @@ def gmm_wadley():
2335 2340
2336 GMM_histo(p_c1p, "P-C1'") 2341 GMM_histo(p_c1p, "P-C1'")
2337 GMM_histo(c1p_p, "C1'-P") 2342 GMM_histo(c1p_p, "C1'-P")
2338 - GMM_histo(p_c1p, "P-C4'") 2343 + GMM_histo(p_c4p, "P-C4'")
2339 - GMM_histo(c1p_p, "C4'-P") 2344 + GMM_histo(c4p_p, "C4'-P")
2340 2345
2341 - GMM_histo(p_c1p, "P-C4'", toric=False, hist=False, col='gold') 2346 + GMM_histo(p_c4p, "P-C4'", toric=False, hist=False, col='gold')
2342 - GMM_histo(c1p_p, "C4'-P", toric=False, hist=False, col='indigo') 2347 + GMM_histo(c4p_p, "C4'-P", toric=False, hist=False, col='indigo')
2343 GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, col='firebrick') 2348 GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, col='firebrick')
2344 GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, col='seagreen') 2349 GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, col='seagreen')
2345 plt.xlabel("Distance (Angströms)") 2350 plt.xlabel("Distance (Angströms)")
...@@ -2730,8 +2735,6 @@ def process_jobs(joblist): ...@@ -2730,8 +2735,6 @@ def process_jobs(joblist):
2730 print("Something went wrong") 2735 print("Something went wrong")
2731 2736
2732 if __name__ == "__main__": 2737 if __name__ == "__main__":
2733 - merge_jsons()
2734 - exit(0)
2735 os.makedirs(runDir + "/results/figures/", exist_ok=True) 2738 os.makedirs(runDir + "/results/figures/", exist_ok=True)
2736 2739
2737 # parse options 2740 # parse options
...@@ -2915,7 +2918,7 @@ if __name__ == "__main__": ...@@ -2915,7 +2918,7 @@ if __name__ == "__main__":
2915 # general_stats() 2918 # general_stats()
2916 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) 2919 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True)
2917 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) 2920 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True)
2918 - joblist = [] 2921 + # joblist = []
2919 # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) 2922 # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')))
2920 # if DO_HIRE_RNA_MEASURES: 2923 # if DO_HIRE_RNA_MEASURES:
2921 # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) 2924 # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
...@@ -2926,11 +2929,11 @@ if __name__ == "__main__": ...@@ -2926,11 +2929,11 @@ if __name__ == "__main__":
2926 # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) 2929 # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv')))
2927 # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv'))) 2930 # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv')))
2928 # process_jobs(joblist) 2931 # process_jobs(joblist)
2929 - # joblist = [] 2932 + joblist = []
2930 - joblist.append(Job(function=gmm_aa_dists, args=())) 2933 + # joblist.append(Job(function=gmm_aa_dists, args=()))
2931 - joblist.append(Job(function=gmm_aa_torsions, args=())) 2934 + # joblist.append(Job(function=gmm_aa_torsions, args=()))
2932 if DO_HIRE_RNA_MEASURES: 2935 if DO_HIRE_RNA_MEASURES:
2933 - joblist.append(Job(function=gmm_hrna, args=())) 2936 + # joblist.append(Job(function=gmm_hrna, args=()))
2934 joblist.append(Job(function=gmm_hrna_basepairs, args=())) 2937 joblist.append(Job(function=gmm_hrna_basepairs, args=()))
2935 if DO_WADLEY_ANALYSIS: 2938 if DO_WADLEY_ANALYSIS:
2936 joblist.append(Job(function=gmm_wadley, args=())) 2939 joblist.append(Job(function=gmm_wadley, args=()))
......