Aglaé TABOT

last modification for renumbering (issues with OP2)

...@@ -321,8 +321,8 @@ class Chain: ...@@ -321,8 +321,8 @@ class Chain:
321 self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif" 321 self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif"
322 else: 322 else:
323 status = f"Extract {self.pdb_id}-{self.pdb_chain_id}" 323 status = f"Extract {self.pdb_id}-{self.pdb_chain_id}"
324 - self.file = path_to_3D_data+"renumbered_rna_only/"+self.chain_label+".cif" 324 + self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif"
325 - #self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif" 325 +
326 326
327 # Check if file exists, if yes, abort (do not recompute) 327 # Check if file exists, if yes, abort (do not recompute)
328 if os.path.exists(self.file): 328 if os.path.exists(self.file):
...@@ -405,7 +405,7 @@ class Chain: ...@@ -405,7 +405,7 @@ class Chain:
405 nt=nums.at[i, "nt_name"] 405 nt=nums.at[i, "nt_name"]
406 406
407 # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G) 407 # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G)
408 - if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' and resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : 408 + if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' or resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' :
409 res=chain[(' ', resseq, icode_res)] 409 res=chain[(' ', resseq, icode_res)]
410 else : #modified nucleotides (e.g. chain 5l4o_1_A) 410 else : #modified nucleotides (e.g. chain 5l4o_1_A)
411 het='H_' + nt 411 het='H_' + nt
...@@ -1521,14 +1521,10 @@ class Pipeline: ...@@ -1521,14 +1521,10 @@ class Pipeline:
1521 if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"): 1521 if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"):
1522 # for the portions mapped to Rfam 1522 # for the portions mapped to Rfam
1523 os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam") 1523 os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam")
1524 - ''' 1524 +
1525 if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"): 1525 if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"):
1526 # extract chains of pure RNA 1526 # extract chains of pure RNA
1527 os.makedirs(path_to_3D_data + "rna_only") 1527 os.makedirs(path_to_3D_data + "rna_only")
1528 - '''
1529 - if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "renumbered_rna_only"):
1530 - # extract chains of pure RNA
1531 - os.makedirs(path_to_3D_data + "renumbered_rna_only")
1532 1528
1533 # define and run jobs 1529 # define and run jobs
1534 joblist = [] 1530 joblist = []
......
1 +#/usr/bin/python3
2 +import json
3 +import os
4 +import numpy as np
5 +
6 +runDir = os.getcwd()
7 +
8 +def get_best(i):
9 + weights = [ float(x.strip("[]")) for x in i["weights"] ]
10 + means = [ float(x.strip("[]")) for x in i["means"] ]
11 + s = sorted(zip(weights, means), reverse=True)
12 + return s[0][1]
13 +
14 +def get_k(lw, bp):
15 + if lw == "cWW":
16 + if bp in ["GC", "CG"]:
17 + return 3.9
18 + if bp in ["AU", "UA"]:
19 + return 3.3
20 + if bp in ["GU", "UG"]:
21 + return 3.15
22 + return 2.4
23 + if lw == "tWW":
24 + return 2.4
25 + return 0.8
26 +
27 +if __name__ == "__main__":
28 + print("processing HRNA jsons...")
29 +
30 + lws = []
31 + for c in "ct":
32 + for nt1 in "WHS":
33 + for nt2 in "WHS":
34 + lws.append(c+nt1+nt2)
35 +
36 + bps = []
37 + for nt1 in "ACGU":
38 + for nt2 in "ACGU":
39 + bps.append(nt1+nt2)
40 +
41 + fullresults = dict()
42 + fullresults["A"] = dict()
43 + fullresults["C"] = dict()
44 + fullresults["G"] = dict()
45 + fullresults["U"] = dict()
46 + counts = dict()
47 + for lw in lws:
48 + counts[lw] = 0
49 + for bp in bps:
50 + fullresults[bp[0]][bp[1]] = []
51 +
52 + # open json file
53 + with open(runDir + f"/results/geometry/json/hirerna_{bp}_basepairs.json", "rb") as f:
54 + data = json.load(f)
55 +
56 + # consider each BP type
57 + for lw in lws:
58 + this = dict()
59 +
60 + # gather params
61 + distance = 0
62 + a1 = 0
63 + a2 = 0
64 + for i in data:
65 + if i["measure"] == f"Distance between {lw} {bp} tips":
66 + distance = np.round(get_best(i), 2)
67 + if i["measure"] == f"{lw}_{bp}_alpha_1":
68 + a1 = np.round(np.pi/180.0*get_best(i), 2)
69 + if i["measure"] == f"{lw}_{bp}_alpha_2":
70 + a2 = np.round(np.pi/180.0*get_best(i), 2)
71 +
72 + if distance == 0 and a1 == 0 and a2 == 0:
73 + # not found
74 + continue
75 +
76 + counts[lw] += 1
77 +
78 + # create entry
79 + this["rho"] = distance
80 + this["a1"] = a1
81 + this["a2"] = a2
82 + this["k"] = get_k(lw, bp)
83 + this["canonical"] = 1.0 if lw=="cWW" and bp in ["GC", "CG", "GU", "UG", "AU", "UA"] else 0.0
84 + this["LW"] = lw
85 +
86 + # store entry
87 + fullresults[bp[0]][bp[1]].append(this)
88 +
89 + with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f:
90 + json.dump(fullresults, f, indent=4)
...@@ -19,6 +19,7 @@ import matplotlib.patches as mpatches ...@@ -19,6 +19,7 @@ import matplotlib.patches as mpatches
19 import scipy.cluster.hierarchy as sch 19 import scipy.cluster.hierarchy as sch
20 import sklearn 20 import sklearn
21 import json 21 import json
22 +import glob
22 import pickle 23 import pickle
23 import Bio 24 import Bio
24 from scipy.spatial.distance import squareform 25 from scipy.spatial.distance import squareform
...@@ -278,6 +279,7 @@ def stats_len(): ...@@ -278,6 +279,7 @@ def stats_len():
278 ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5)) 279 ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5))
279 280
280 # Save the figure 281 # Save the figure
282 +
281 fig.savefig(runDir + f"/results/figures/lengths_{res_thr}A.png") 283 fig.savefig(runDir + f"/results/figures/lengths_{res_thr}A.png")
282 idxQueue.put(thr_idx) # replace the thread index in the queue 284 idxQueue.put(thr_idx) # replace the thread index in the queue
283 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") 285 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
...@@ -1377,72 +1379,111 @@ def pos_b2(res): ...@@ -1377,72 +1379,111 @@ def pos_b2(res):
1377 else: 1379 else:
1378 return [] 1380 return []
1379 1381
1380 -def basepair_apex_distance(res, pair): 1382 +@trace_unhandled_exceptions
1381 - """ 1383 +def basepair_measures(res, pair):
1382 - measure of the distance between the tips of the paired nucleotides (B1 / B1 or B1 / B2 or B2 / B2)
1383 - """
1384 - dist=[]
1385 - d=0
1386 - if res.get_resname()=='A' or res.get_resname()=='G' :# different cases if 1 aromatic cycle or 2
1387 - atom_res=pos_b2(res)
1388 - if pair.get_resname()=='A' or pair.get_resname()=='G' :
1389 - atom_pair=pos_b2(pair)
1390 - if pair.get_resname()=='C' or pair.get_resname()=='U' :
1391 - atom_pair=pos_b1(pair)
1392 -
1393 - if res.get_resname()=='C' or res.get_resname()=='U' :
1394 - atom_res=pos_b1(res)
1395 - if pair.get_resname()=='A' or pair.get_resname()=='G' :
1396 - atom_pair=pos_b2(pair)
1397 - if pair.get_resname()=='C' or pair.get_resname()=='U' :
1398 - atom_pair=pos_b1(pair)
1399 -
1400 - dist = get_euclidian_distance(atom_res, atom_pair)
1401 -
1402 - return dist
1403 -
1404 -def basepair_flat_angle(res, pair):
1405 """ 1384 """
1406 - measurement of the plane angles formed by the vectors C1->B1 of the paired nucleotides 1385 + measurement of the flat angles describing a basepair in the HiRE-RNA model
1407 """ 1386 """
1408 if res.get_resname()=='C' or res.get_resname()=='U' : 1387 if res.get_resname()=='C' or res.get_resname()=='U' :
1409 atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] 1388 atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
1410 atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] 1389 atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1411 atom_b1_res = pos_b1(res) 1390 atom_b1_res = pos_b1(res)
1412 - a1_res = Vector(atom_c4_res[0]) 1391 + if not len(atom_c4_res) or not len(atom_c1p_res) or not len(atom_b1_res):
1392 + return
1393 + a3_res = Vector(atom_c4_res[0])
1413 a2_res = Vector(atom_c1p_res[0]) 1394 a2_res = Vector(atom_c1p_res[0])
1414 - a3_res = Vector(atom_b1_res[0]) 1395 + a1_res = Vector(atom_b1_res[0])
1415 if res.get_resname()=='A' or res.get_resname()=='G' : 1396 if res.get_resname()=='A' or res.get_resname()=='G' :
1416 atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] 1397 atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1417 atom_b1_res = pos_b1(res) 1398 atom_b1_res = pos_b1(res)
1418 atom_b2_res = pos_b2(res) 1399 atom_b2_res = pos_b2(res)
1419 - a1_res = Vector(atom_c1p_res[0]) 1400 + if not len(atom_c1p_res) or not len(atom_b1_res) or not len(atom_b2_res):
1401 + return
1402 + a3_res = Vector(atom_c1p_res[0])
1420 a2_res = Vector(atom_b1_res[0]) 1403 a2_res = Vector(atom_b1_res[0])
1421 - a3_res = Vector(atom_b2_res[0]) 1404 + a1_res = Vector(atom_b2_res[0])
1422 1405
1423 if pair.get_resname()=='C' or pair.get_resname()=='U' : 1406 if pair.get_resname()=='C' or pair.get_resname()=='U' :
1424 atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] 1407 atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ]
1425 atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] 1408 atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
1426 atom_b1_pair = pos_b1(pair) 1409 atom_b1_pair = pos_b1(pair)
1427 - a1_pair = Vector(atom_c4_pair[0]) 1410 + if not len(atom_c4_pair) or not len(atom_c1p_pair) or not len(atom_b1_pair):
1411 + return
1412 + a3_pair = Vector(atom_c4_pair[0])
1428 a2_pair = Vector(atom_c1p_pair[0]) 1413 a2_pair = Vector(atom_c1p_pair[0])
1429 - a3_pair = Vector(atom_b1_pair) 1414 + a1_pair = Vector(atom_b1_pair[0])
1430 if pair.get_resname()=='A' or pair.get_resname()=='G' : 1415 if pair.get_resname()=='A' or pair.get_resname()=='G' :
1431 atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] 1416 atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
1432 atom_b1_pair = pos_b1(pair) 1417 atom_b1_pair = pos_b1(pair)
1433 atom_b2_pair = pos_b2(pair) 1418 atom_b2_pair = pos_b2(pair)
1434 - a1_pair = Vector(atom_c1p_pair[0]) 1419 + if not len(atom_c1p_pair) or not len(atom_b1_pair) or not len(atom_b2_pair): # No C1' atom in the paired nucleotide, skip measures.
1420 + return
1421 + a3_pair = Vector(atom_c1p_pair[0])
1435 a2_pair = Vector(atom_b1_pair[0]) 1422 a2_pair = Vector(atom_b1_pair[0])
1436 - a3_pair = Vector(atom_b2_pair[0]) 1423 + a1_pair = Vector(atom_b2_pair[0])
1437 1424
1438 - # we calculate the 4 plane angles including these vectors 1425 + # Bond vectors
1426 + res_32 = a3_res - a2_res
1427 + res_12 = a1_res - a2_res
1428 + pair_32 = a3_pair - a2_pair
1429 + pair_12 = a1_pair - a2_pair
1430 + rho = a1_res - a1_pair # from pair to res
1431 +
1432 + # dist
1433 + dist = rho.norm()
1434 +
1435 + # we calculate the 2 plane angles
1436 + with warnings.catch_warnings():
1437 + warnings.simplefilter('ignore', RuntimeWarning)
1438 + b = res_12.angle(rho)*(180/np.pi) # equal to the previous implementation
1439 + c = pair_12.angle(-rho)*(180/np.pi) #
1440 + # a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) # not required
1441 + # b = calc_angle(a2_res, a1_res, a1_pair)*(180/np.pi)
1442 + # c = calc_angle(a1_res, a1_pair, a2_pair)*(180/np.pi)
1443 + # d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) # not required
1444 +
1445 + # Compute plane vectors
1446 + n1 = (res_32**res_12).normalized() # ** between vectors, is the cross product
1447 + n2 = (pair_32**pair_12).normalized()
1448 +
1449 + # Distances between base tip and the other base's plane (orthogonal projection)
1450 + # if angle(rho, n) > pi/2 the distance is negative (signed following n)
1451 + d1 = rho*n1 # projection of rho on axis n1
1452 + d2 = rho*n2
1453 +
1454 + # Now the projection of rho in the planes. It's just a sum of the triangles' two other edges.
1455 + p1 = (-rho+n1**d1).normalized() # between vector and scalar, ** is the multiplication by a scalar
1456 + p2 = (rho-n2**d2).normalized()
1457 +
1458 + # Measure tau, the dihedral
1459 + u = (res_12**rho).normalized()
1460 + v = (rho**pair_12).normalized()
1461 + cosTau1 = n1*u
1462 + cosTau2 = v*n2
1439 1463
1440 - a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) 1464 + # cosTau is enough to compute alpha, but we can't distinguish
1441 - b = calc_angle(a2_res, a3_res, a3_pair)*(180/np.pi) 1465 + # yet betwwen tau and -tau. If the full computation if required, then:
1442 - c = calc_angle(a3_res, a3_pair, a2_pair)*(180/np.pi) 1466 + tau1 = np.arccos(cosTau1)*(180/np.pi)
1443 - d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) 1467 + tau2 = np.arccos(cosTau2)*(180/np.pi)
1444 - angles = [a, b, c, d] 1468 + w1 = u**n1
1445 - return angles 1469 + w2 = v**n2
1470 + if res_12*w1 < 0:
1471 + tau1 = -tau1
1472 + if pair_12*w2 < 0:
1473 + tau2 = -tau2
1474 +
1475 + # And finally, the a1 and a2 angles between res_12 and p1 / pair_12 and p2
1476 + with warnings.catch_warnings():
1477 + warnings.simplefilter('ignore', RuntimeWarning)
1478 + a1 = (-res_12).angle(p1)*(180/np.pi)
1479 + a2 = (-pair_12).angle(p2)*(180/np.pi)
1480 + if cosTau1 > 0:
1481 + # CosTau > 0 (Tau < 90 or Tau > 270) implies that alpha > 180.
1482 + a1 = -a1
1483 + if cosTau2 > 0:
1484 + a2 = -a2
1485 +
1486 + return [dist, b, c, d1, d2, a1, a2, tau1, tau2]
1446 1487
1447 @trace_unhandled_exceptions 1488 @trace_unhandled_exceptions
1448 def measure_from_structure(f): 1489 def measure_from_structure(f):
...@@ -1482,8 +1523,8 @@ def measures_wadley(name, s, thr_idx): ...@@ -1482,8 +1523,8 @@ def measures_wadley(name, s, thr_idx):
1482 """ 1523 """
1483 1524
1484 # do not recompute something already computed 1525 # do not recompute something already computed
1485 - if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley ' + name + '.csv') and 1526 + if (path.isfile(runDir + '/results/geometry/Pyle/angles/flat_angles_pyle_' + name + '.csv') and
1486 - path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")): 1527 + path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")):
1487 return 1528 return
1488 1529
1489 liste_dist = [] 1530 liste_dist = []
...@@ -1522,9 +1563,9 @@ def measures_wadley(name, s, thr_idx): ...@@ -1522,9 +1563,9 @@ def measures_wadley(name, s, thr_idx):
1522 liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv]) 1563 liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv])
1523 1564
1524 df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"]) 1565 df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"])
1525 - df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv") 1566 + df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")
1526 df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"]) 1567 df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"])
1527 - df.to_csv(runDir + "/results/geometry/Pyle/angles/angles_plans_wadley "+name+".csv") 1568 + df.to_csv(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle_"+name+".csv")
1528 1569
1529 @trace_unhandled_exceptions 1570 @trace_unhandled_exceptions
1530 def measures_aa(name, s, thr_idx): 1571 def measures_aa(name, s, thr_idx):
...@@ -1533,7 +1574,7 @@ def measures_aa(name, s, thr_idx): ...@@ -1533,7 +1574,7 @@ def measures_aa(name, s, thr_idx):
1533 """ 1574 """
1534 1575
1535 # do not recompute something already computed 1576 # do not recompute something already computed
1536 - if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms "+name+".csv"): 1577 + if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms_"+name+".csv"):
1537 return 1578 return
1538 1579
1539 last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide 1580 last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide
...@@ -1685,7 +1726,7 @@ def measures_aa(name, s, thr_idx): ...@@ -1685,7 +1726,7 @@ def measures_aa(name, s, thr_idx):
1685 df=pd.concat([df_comm, df_pur, df_pyr], axis = 1) 1726 df=pd.concat([df_comm, df_pur, df_pyr], axis = 1)
1686 pbar.close() 1727 pbar.close()
1687 1728
1688 - df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms " + name + ".csv") 1729 + df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms_" + name + ".csv")
1689 1730
1690 @trace_unhandled_exceptions 1731 @trace_unhandled_exceptions
1691 def measures_hrna(name, s, thr_idx): 1732 def measures_hrna(name, s, thr_idx):
...@@ -1805,94 +1846,92 @@ def measures_hrna_basepairs(name, s, thr_idx): ...@@ -1805,94 +1846,92 @@ def measures_hrna_basepairs(name, s, thr_idx):
1805 chain = next(s[0].get_chains()) 1846 chain = next(s[0].get_chains())
1806 1847
1807 # do not recompute something already computed 1848 # do not recompute something already computed
1808 - if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs "+name+".csv"): 1849 + if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs_"+name+".csv"):
1809 return 1850 return
1810 1851
1811 df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name)) 1852 df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name))
1812 1853
1813 - if df['index_chain'][0]==1:#ignore files with numbering errors 1854 + if df['index_chain'][0] == 1: # ignore files with numbering errors : TODO : remove when we get DSSR Pro, there should not be numbering errors anymore
1814 - l = measures_hrna_basepairs_chain(chain, df, thr_idx) 1855 + l = measures_hrna_basepairs_chain(name, chain, df, thr_idx)
1815 - 1856 + df_calc = pd.DataFrame(l, columns=["type_LW", "nt1_idx", "nt1_res", "nt2_idx", "nt2_res", "Distance",
1816 - df_calc=pd.DataFrame(l, columns=["Chaine", "type LW", "Resseq", "Num paired", "Distance", "C4'-C1'-B1", "C1'-B1-B1pair", "B1-B1pair-C1'pair", "B1pair-C1'pair-C4'pair"]) 1857 + "211_angle", "112_angle", "dB1", "dB2", "alpha1", "alpha2", "3211_torsion", "1123_torsion"])
1817 - df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs '+name+'.csv') 1858 + df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs_' + name + '.csv', float_format="%.3f")
1818 -
1819 1859
1820 @trace_unhandled_exceptions 1860 @trace_unhandled_exceptions
1821 -def measures_hrna_basepairs_chain(chain, df, thr_idx): 1861 +def measures_hrna_basepairs_chain(name, chain, df, thr_idx):
1822 """ 1862 """
1823 Cleanup of the dataset 1863 Cleanup of the dataset
1824 measurements of distances and angles between paired nucleotides in the chain 1864 measurements of distances and angles between paired nucleotides in the chain
1825 """ 1865 """
1826 1866
1827 - liste_dist=[] 1867 + results = []
1828 warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) 1868 warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
1829 1869
1830 pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep 1870 pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep
1831 - for i in range(pairs.shape[0]): #we remove the lines where no pairing (NaN in paired) 1871 + for i in range(pairs.shape[0]): # we remove the lines where no pairing (NaN in paired)
1832 - index_with_nan=pairs.index[pairs.iloc[:,2].isnull()] 1872 + index_with_nan = pairs.index[pairs.iloc[:,2].isnull()]
1833 pairs.drop(index_with_nan, 0, inplace=True) 1873 pairs.drop(index_with_nan, 0, inplace=True)
1834 1874
1835 - paired_int=[] 1875 + paired_int = []
1836 - for i in pairs.index:# convert values ​​from paired to integers or lists of integers 1876 + for i in pairs.index: # convert values ​​from paired to integers or lists of integers
1837 - paired=pairs.at[i, 'paired'] 1877 + paired = pairs.at[i, 'paired']
1838 if type(paired) is np.int64 or type(paired) is np.float64: 1878 if type(paired) is np.int64 or type(paired) is np.float64:
1839 paired_int.append(int(paired)) 1879 paired_int.append(int(paired))
1840 else : #strings 1880 else : #strings
1841 - if len(paired)<3 : #a single pairing 1881 + if len(paired) < 3: # a single pairing
1842 paired_int.append(int(paired)) 1882 paired_int.append(int(paired))
1843 - else : #several pairings 1883 + else : # several pairings
1844 - paired=paired.split(',') 1884 + paired = paired.split(',')
1845 - l=[int(i) for i in paired] 1885 + l = [ int(i) for i in paired ]
1846 paired_int.append(l) 1886 paired_int.append(l)
1847 1887
1848 - pair_type_LW_bis=[] 1888 + pair_type_LW_bis = []
1849 for j in pairs.index: 1889 for j in pairs.index:
1850 pair_type_LW = pairs.at[j, 'pair_type_LW'] 1890 pair_type_LW = pairs.at[j, 'pair_type_LW']
1851 - if len(pair_type_LW)<4 : #a single pairing 1891 + if len(pair_type_LW) < 4 : # a single pairing
1852 pair_type_LW_bis.append(pair_type_LW) 1892 pair_type_LW_bis.append(pair_type_LW)
1853 - else : #several pairings 1893 + else : # several pairings
1854 - pair_type_LW=pair_type_LW.split(',') 1894 + pair_type_LW = pair_type_LW.split(',')
1855 - l=[i for i in pair_type_LW] 1895 + l = [ i for i in pair_type_LW ]
1856 pair_type_LW_bis.append(pair_type_LW) 1896 pair_type_LW_bis.append(pair_type_LW)
1857 1897
1858 - #addition of these new columns 1898 + # addition of these new columns
1859 pairs.insert(4, "paired_int", paired_int, True) 1899 pairs.insert(4, "paired_int", paired_int, True)
1860 pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True) 1900 pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True)
1861 1901
1862 - indexNames=pairs[pairs['paired_int'] == 0].index 1902 + indexNames = pairs[pairs['paired_int'] == 0].index
1863 - pairs.drop(indexNames, inplace=True)#deletion of lines with a 0 in paired_int (matching to another RNA chain) 1903 + pairs.drop(indexNames, inplace=True) # deletion of lines with a 0 in paired_int (matching to another RNA chain)
1864 - 1904 +
1865 - for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {chain} measures_hrna_basepairs_chain", unit="res", leave=False): 1905 + for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna_basepairs_chain", unit="res", leave=False):
1866 - """ 1906 + # calculations for each row of the pairs dataset
1867 - calculations for each row of the pairs dataset 1907 + index = pairs.at[i, 'index_chain']
1868 - """ 1908 + res1 = chain[(' ', index, ' ')].get_resname()
1869 - index=pairs.at[i, 'index_chain'] 1909 + if res1 not in ['A','C','G','U']:
1870 - type_LW=pairs.at[i, 'pair_type_LW_bis'] #pairing type 1910 + continue
1871 - num_paired=pairs.at[i, 'paired_int'] #number (index_chain) of the paired nucleotide 1911 + type_LW = pairs.at[i, 'pair_type_LW_bis'] # pairing type
1912 + num_paired = pairs.at[i, 'paired_int'] # number (index_chain) of the paired nucleotide
1872 1913
1873 if type(num_paired) is int or type(num_paired) is np.int64: 1914 if type(num_paired) is int or type(num_paired) is np.int64:
1874 - try : 1915 + res2 = chain[(' ', num_paired, ' ')].get_resname()
1875 - d = basepair_apex_distance(chain[(' ',index, ' ')], chain[(' ', num_paired, ' ')]) 1916 + if res2 not in ["A","C","G","U"]:
1876 - angle = basepair_flat_angle(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')]) 1917 + continue
1877 - if d != 0.0: 1918 + measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')])
1878 - liste_dist.append([chain, type_LW, index, num_paired, d, angle[0], angle[1], angle[2], angle[3]]) 1919 + if measures is not None:
1879 - except : 1920 + results.append([type_LW, index, res1, num_paired, res2] + measures)
1880 - pass 1921 + else:
1881 - else : 1922 + for j in range(len(num_paired)): # if several pairings, process them one by one
1882 - for j in range(len(num_paired)): #if several pairings, process them one by one 1923 + if num_paired[j] != 0:
1883 - if num_paired[j] != 0 : 1924 + res2 = chain[(' ', num_paired[j], ' ')].get_resname()
1884 - try : 1925 + if res2 not in ["A","C","G","U"]:
1885 - d = basepair_apex_distance(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) 1926 + continue
1886 - angle = basepair_flat_angle(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) 1927 + measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')])
1887 - if d != 0.0: 1928 + if measures is not None:
1888 - liste_dist.append([chain, type_LW[j], index, num_paired[j], d, angle[0], angle[1], angle[2], angle[3]]) 1929 + results.append([type_LW[j], index, res1, num_paired[j], res2] + measures)
1889 - except: 1930 +
1890 - pass 1931 + return results
1891 -
1892 - return(liste_dist)
1893 1932
1894 @trace_unhandled_exceptions 1933 @trace_unhandled_exceptions
1895 -def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=True) : 1934 +def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) :
1896 """ 1935 """
1897 Plot Gaussian-Mixture-Model (with or without histograms) 1936 Plot Gaussian-Mixture-Model (with or without histograms)
1898 """ 1937 """
...@@ -1906,8 +1945,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr ...@@ -1906,8 +1945,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr
1906 1945
1907 # chooses the number of components based on the maximum likelihood value (maxlogv) 1946 # chooses the number of components based on the maximum likelihood value (maxlogv)
1908 n_components_range = np.arange(8)+1 1947 n_components_range = np.arange(8)+1
1909 - aic = [] 1948 + # aic = []
1910 - bic = [] 1949 + # bic = []
1911 maxlogv=[] 1950 maxlogv=[]
1912 md = np.array(data).reshape(-1,1) 1951 md = np.array(data).reshape(-1,1)
1913 nb_components = 1 1952 nb_components = 1
...@@ -1915,8 +1954,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr ...@@ -1915,8 +1954,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr
1915 log_max = 0 1954 log_max = 0
1916 for n_comp in n_components_range: 1955 for n_comp in n_components_range:
1917 gmm = GaussianMixture(n_components=n_comp).fit(md) 1956 gmm = GaussianMixture(n_components=n_comp).fit(md)
1918 - aic.append(abs(gmm.aic(md))) 1957 + # aic.append(abs(gmm.aic(md)))
1919 - bic.append(abs(gmm.bic(md))) 1958 + # bic.append(abs(gmm.bic(md)))
1920 maxlogv.append(gmm.lower_bound_) 1959 maxlogv.append(gmm.lower_bound_)
1921 if gmm.lower_bound_== max(maxlogv) : # takes the maximum 1960 if gmm.lower_bound_== max(maxlogv) : # takes the maximum
1922 nb_components = n_comp 1961 nb_components = n_comp
...@@ -1962,10 +2001,10 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr ...@@ -1962,10 +2001,10 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr
1962 if hist: 2001 if hist:
1963 plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True) 2002 plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True)
1964 if toric: 2003 if toric:
1965 - plt.xlabel("Angle (Degré)") 2004 + plt.xlabel("Angle (Degrees)")
1966 else: 2005 else:
1967 - plt.xlabel("Distance (Angström)") 2006 + plt.xlabel("Distance (Angströms)")
1968 - plt.ylabel("Densité") 2007 + plt.ylabel("Density")
1969 2008
1970 # Prepare the GMM curve with some absciss points 2009 # Prepare the GMM curve with some absciss points
1971 if toric: 2010 if toric:
...@@ -1985,16 +2024,16 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr ...@@ -1985,16 +2024,16 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr
1985 summary_data["std"] = [] 2024 summary_data["std"] = []
1986 2025
1987 # plot 2026 # plot
1988 - courbes = [] 2027 + curves = []
1989 for i in range(nb_components): 2028 for i in range(nb_components):
1990 2029
1991 # store the parameters 2030 # store the parameters
1992 mean = means[i] 2031 mean = means[i]
1993 sigma = np.sqrt(covariances[i]) 2032 sigma = np.sqrt(covariances[i])
1994 weight = weights[i] 2033 weight = weights[i]
1995 - summary_data["means"].append(str(mean)) 2034 + summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]"))))
1996 - summary_data["std"].append(str(sigma)) 2035 + summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]"))))
1997 - summary_data["weights"].append(str(weight)) 2036 + summary_data["weights"].append("{:.2f}".format(float(str(weight).strip("[]"))))
1998 2037
1999 # compute the right x and y data to plot 2038 # compute the right x and y data to plot
2000 y = weight*st.norm.pdf(x, mean, sigma) 2039 y = weight*st.norm.pdf(x, mean, sigma)
...@@ -2022,25 +2061,25 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr ...@@ -2022,25 +2061,25 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr
2022 plt.plot(newx, newy, c=colors[i]) 2061 plt.plot(newx, newy, c=colors[i])
2023 else: 2062 else:
2024 # store for later summation 2063 # store for later summation
2025 - courbes.append(np.array(newy)) 2064 + curves.append(np.array(newy))
2026 2065
2027 if hist: 2066 if hist:
2028 - plt.title("Histogramme " +name_data+ " avec GMM pour " +str(nb_components)+ " composantes (" + str(len(data_ori))+" valeurs)") 2067 + plt.title(f"Histogram of {name_data} with GMM of {nb_components} components (" + str(len(data_ori))+" values)")
2029 if save: 2068 if save:
2030 - plt.savefig("Histogramme " +name_data+ " avec GMM pour " +str(nb_components)+ " composantes (" + str(len(data_ori))+" valeurs).png") 2069 + plt.savefig(f"Histogram_{name_data}_{nb_components}_comps.png")
2031 plt.close() 2070 plt.close()
2032 else: 2071 else:
2033 # Plot their sum, do not save figure yet 2072 # Plot their sum, do not save figure yet
2034 try: 2073 try:
2035 - plt.plot(newx, sum(courbes), c=couleur, label=name_data) 2074 + plt.plot(newx, sum(curves), c=col, label=name_data)
2036 except TypeError: 2075 except TypeError:
2037 - print("N curves:", len(courbes)) 2076 + print("N curves:", len(curves))
2038 - for c in courbes: 2077 + for c in curves:
2039 print(c) 2078 print(c)
2040 plt.legend() 2079 plt.legend()
2041 2080
2042 # Save the json 2081 # Save the json
2043 - with open(runDir + "/results/geometry/json/" +name_data + " .json", 'w', encoding='utf-8') as f: 2082 + with open(runDir + "/results/geometry/json/" +name_data + ".json", 'w', encoding='utf-8') as f:
2044 json.dump(summary_data, f, indent=4) 2083 json.dump(summary_data, f, indent=4)
2045 2084
2046 @trace_unhandled_exceptions 2085 @trace_unhandled_exceptions
...@@ -2122,25 +2161,25 @@ def gmm_aa_dists(): ...@@ -2122,25 +2161,25 @@ def gmm_aa_dists():
2122 GMM_histo(c2p_o2p, "C2'-O2'") 2161 GMM_histo(c2p_o2p, "C2'-O2'")
2123 2162
2124 if len(op3_p) > 0 : 2163 if len(op3_p) > 0 :
2125 - GMM_histo(op3_p, "OP3-P", toric=False, hist=False, couleur= 'lightcoral') 2164 + GMM_histo(op3_p, "OP3-P", toric=False, hist=False, col= 'lightcoral')
2126 - GMM_histo(p_op1, "P-OP1", toric=False, hist=False, couleur='gold') 2165 + GMM_histo(p_op1, "P-OP1", toric=False, hist=False, col='gold')
2127 - GMM_histo(p_op2, "P-OP2", toric=False, hist=False, couleur='lightseagreen') 2166 + GMM_histo(p_op2, "P-OP2", toric=False, hist=False, col='lightseagreen')
2128 - GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, couleur='saddlebrown') 2167 + GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, col='saddlebrown')
2129 - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, couleur='darkturquoise') 2168 + GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkturquoise')
2130 - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, couleur='darkkhaki') 2169 + GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='darkkhaki')
2131 - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, couleur='indigo') 2170 + GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='indigo')
2132 - GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, couleur='maroon') 2171 + GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, col='maroon')
2133 - GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, couleur='burlywood') 2172 + GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, col='burlywood')
2134 - GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, couleur='steelblue') 2173 + GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, col='steelblue')
2135 - GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, couleur='tomato') 2174 + GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, col='tomato')
2136 - GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, couleur='darkolivegreen') 2175 + GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, col='darkolivegreen')
2137 - GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, couleur='orchid') 2176 + GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, col='orchid')
2138 - GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, couleur='deeppink') 2177 + GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, col='deeppink')
2139 axes=plt.gca() 2178 axes=plt.gca()
2140 axes.set_ylim(0, 100) 2179 axes.set_ylim(0, 100)
2141 - plt.xlabel("Distance (Angström)") 2180 + plt.xlabel("Distance (Angströms)")
2142 - plt.title("GMM des distances entre atomes communs ") 2181 + plt.title("GMM of distances between common atoms ")
2143 - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM des distances entre atomes communs .png") 2182 + plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM_distances_common_atoms.png")
2144 plt.close() 2183 plt.close()
2145 2184
2146 os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True) 2185 os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True)
...@@ -2161,25 +2200,25 @@ def gmm_aa_dists(): ...@@ -2161,25 +2200,25 @@ def gmm_aa_dists():
2161 GMM_histo(c4_n9, "C4-N9") 2200 GMM_histo(c4_n9, "C4-N9")
2162 GMM_histo(c4_c5, "C4-C5") 2201 GMM_histo(c4_c5, "C4-C5")
2163 2202
2164 - GMM_histo(c1p_n9, "C1'-N9", hist=False, couleur='lightcoral') 2203 + GMM_histo(c1p_n9, "C1'-N9", hist=False, col='lightcoral')
2165 - GMM_histo(n9_c8, "N9-C8", hist=False, couleur='gold') 2204 + GMM_histo(n9_c8, "N9-C8", hist=False, col='gold')
2166 - GMM_histo(c8_n7, "C8-N7", hist=False, couleur='lightseagreen') 2205 + GMM_histo(c8_n7, "C8-N7", hist=False, col='lightseagreen')
2167 - GMM_histo(n7_c5, "N7-C5", hist=False, couleur='saddlebrown') 2206 + GMM_histo(n7_c5, "N7-C5", hist=False, col='saddlebrown')
2168 - GMM_histo(c5_c6, "C5-C6", hist=False, couleur='darkturquoise') 2207 + GMM_histo(c5_c6, "C5-C6", hist=False, col='darkturquoise')
2169 - GMM_histo(c6_o6, "C6-O6", hist=False, couleur='darkkhaki') 2208 + GMM_histo(c6_o6, "C6-O6", hist=False, col='darkkhaki')
2170 - GMM_histo(c6_n6, "C6-N6", hist=False, couleur='indigo') 2209 + GMM_histo(c6_n6, "C6-N6", hist=False, col='indigo')
2171 - GMM_histo(c6_n1, "C6-N1", hist=False, couleur='maroon') 2210 + GMM_histo(c6_n1, "C6-N1", hist=False, col='maroon')
2172 - GMM_histo(n1_c2, "N1-C2", hist=False, couleur='burlywood') 2211 + GMM_histo(n1_c2, "N1-C2", hist=False, col='burlywood')
2173 - GMM_histo(c2_n2, "C2-N2", hist=False, couleur='steelblue') 2212 + GMM_histo(c2_n2, "C2-N2", hist=False, col='steelblue')
2174 - GMM_histo(c2_n3, "C2-N3", hist=False, couleur='tomato') 2213 + GMM_histo(c2_n3, "C2-N3", hist=False, col='tomato')
2175 - GMM_histo(n3_c4, "N3-C4", hist=False, couleur='darkolivegreen') 2214 + GMM_histo(n3_c4, "N3-C4", hist=False, col='darkolivegreen')
2176 - GMM_histo(c4_n9, "C4-N9", hist=False, couleur='orchid') 2215 + GMM_histo(c4_n9, "C4-N9", hist=False, col='orchid')
2177 - GMM_histo(c4_c5, "C4-C5", hist=False, couleur='deeppink') 2216 + GMM_histo(c4_c5, "C4-C5", hist=False, col='deeppink')
2178 axes=plt.gca() 2217 axes=plt.gca()
2179 axes.set_ylim(0, 100) 2218 axes.set_ylim(0, 100)
2180 - plt.xlabel("Distance (Angström)") 2219 + plt.xlabel("Distance (Angströms)")
2181 - plt.title("GMM des distances entre atomes des cycles purines", fontsize=10) 2220 + plt.title("GMM of distances between atoms of the purine cycles", fontsize=10)
2182 - plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM des distances entre atomes des cycles purines.png") 2221 + plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM_distances_purine_cycles.png")
2183 plt.close() 2222 plt.close()
2184 2223
2185 os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True) 2224 os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True)
...@@ -2197,22 +2236,22 @@ def gmm_aa_dists(): ...@@ -2197,22 +2236,22 @@ def gmm_aa_dists():
2197 GMM_histo(c4_n4, "C4-N4") 2236 GMM_histo(c4_n4, "C4-N4")
2198 GMM_histo(c4_o4, "C4-O4") 2237 GMM_histo(c4_o4, "C4-O4")
2199 2238
2200 - GMM_histo(c1p_n1, "C1'-N1", hist=False, couleur='lightcoral') 2239 + GMM_histo(c1p_n1, "C1'-N1", hist=False, col='lightcoral')
2201 - GMM_histo(n1_c6, "N1-C6", hist=False, couleur='gold') 2240 + GMM_histo(n1_c6, "N1-C6", hist=False, col='gold')
2202 - GMM_histo(c6_c5, "C6-C5", hist=False, couleur='lightseagreen') 2241 + GMM_histo(c6_c5, "C6-C5", hist=False, col='lightseagreen')
2203 - GMM_histo(c5_c4, "C5-C4", hist=False, couleur='deeppink') 2242 + GMM_histo(c5_c4, "C5-C4", hist=False, col='deeppink')
2204 - GMM_histo(c4_n3, "C4-N3", hist=False, couleur='red') 2243 + GMM_histo(c4_n3, "C4-N3", hist=False, col='red')
2205 - GMM_histo(n3_c2, "N3-C2", hist=False, couleur='lime') 2244 + GMM_histo(n3_c2, "N3-C2", hist=False, col='lime')
2206 - GMM_histo(c2_o2, "C2-O2", hist=False, couleur='indigo') 2245 + GMM_histo(c2_o2, "C2-O2", hist=False, col='indigo')
2207 - GMM_histo(c2_n1, "C2-N1", hist=False, couleur='maroon') 2246 + GMM_histo(c2_n1, "C2-N1", hist=False, col='maroon')
2208 - GMM_histo(c4_n4, "C4-N4", hist=False, couleur='burlywood') 2247 + GMM_histo(c4_n4, "C4-N4", hist=False, col='burlywood')
2209 - GMM_histo(c4_o4, "C4-O4", hist=False, couleur='steelblue') 2248 + GMM_histo(c4_o4, "C4-O4", hist=False, col='steelblue')
2210 axes=plt.gca() 2249 axes=plt.gca()
2211 #axes.set_xlim(1, 2) 2250 #axes.set_xlim(1, 2)
2212 axes.set_ylim(0, 100) 2251 axes.set_ylim(0, 100)
2213 - plt.xlabel("Distance (Angström)") 2252 + plt.xlabel("Distance (Angströms")
2214 - plt.title("GMM des distances entre atomes des cycles pyrimidines", fontsize=10) 2253 + plt.title("GMM of distances between atoms of the pyrimidine cycles", fontsize=10)
2215 - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM des distances entre atomes des cycles pyrimidines.png") 2254 + plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM_distances_pyrimidine_cycles.png")
2216 plt.close() 2255 plt.close()
2217 2256
2218 os.chdir(runDir) 2257 os.chdir(runDir)
...@@ -2268,16 +2307,16 @@ def gmm_aa_torsions(): ...@@ -2268,16 +2307,16 @@ def gmm_aa_torsions():
2268 GMM_histo(zeta, "Zeta", toric=True) 2307 GMM_histo(zeta, "Zeta", toric=True)
2269 GMM_histo(chi, "Xhi", toric=True) 2308 GMM_histo(chi, "Xhi", toric=True)
2270 2309
2271 - GMM_histo(alpha, "Alpha", toric=True, hist=False, couleur='red') 2310 + GMM_histo(alpha, "Alpha", toric=True, hist=False, col='red')
2272 - GMM_histo(beta, "Beta", toric=True, hist=False, couleur='firebrick') 2311 + GMM_histo(beta, "Beta", toric=True, hist=False, col='firebrick')
2273 - GMM_histo(gamma, "Gamma", toric=True, hist=False, couleur='limegreen') 2312 + GMM_histo(gamma, "Gamma", toric=True, hist=False, col='limegreen')
2274 - GMM_histo(delta, "Delta", toric=True, hist=False, couleur='darkslateblue') 2313 + GMM_histo(delta, "Delta", toric=True, hist=False, col='darkslateblue')
2275 - GMM_histo(epsilon, "Epsilon", toric=True, hist=False, couleur='goldenrod') 2314 + GMM_histo(epsilon, "Epsilon", toric=True, hist=False, col='goldenrod')
2276 - GMM_histo(zeta, "Zeta", toric=True, hist=False, couleur='teal') 2315 + GMM_histo(zeta, "Zeta", toric=True, hist=False, col='teal')
2277 - GMM_histo(chi, "Xhi", toric=True, hist=False, couleur='hotpink') 2316 + GMM_histo(chi, "Xhi", toric=True, hist=False, col='hotpink')
2278 - plt.xlabel("Angle(Degré)") 2317 + plt.xlabel("Angle (Degrees)")
2279 - plt.title("GMM des angles de torsion") 2318 + plt.title("GMM of torsion angles")
2280 - plt.savefig("GMM des angles de torsion.png") 2319 + plt.savefig("GMM_torsions.png")
2281 plt.close() 2320 plt.close()
2282 2321
2283 os.chdir(runDir) 2322 os.chdir(runDir)
...@@ -2301,20 +2340,20 @@ def gmm_wadley(): ...@@ -2301,20 +2340,20 @@ def gmm_wadley():
2301 2340
2302 GMM_histo(p_c1p, "P-C1'") 2341 GMM_histo(p_c1p, "P-C1'")
2303 GMM_histo(c1p_p, "C1'-P") 2342 GMM_histo(c1p_p, "C1'-P")
2304 - GMM_histo(p_c1p, "P-C4'") 2343 + GMM_histo(p_c4p, "P-C4'")
2305 - GMM_histo(c1p_p, "C4'-P") 2344 + GMM_histo(c4p_p, "C4'-P")
2306 - 2345 +
2307 - GMM_histo(p_c1p, "P-C4'", toric=False, hist=False, couleur='gold') 2346 + GMM_histo(p_c4p, "P-C4'", toric=False, hist=False, col='gold')
2308 - GMM_histo(c1p_p, "C4'-P", toric=False, hist=False, couleur='indigo') 2347 + GMM_histo(c4p_p, "C4'-P", toric=False, hist=False, col='indigo')
2309 - GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, couleur='firebrick') 2348 + GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, col='firebrick')
2310 - GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, couleur='seagreen') 2349 + GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, col='seagreen')
2311 - plt.xlabel("Distance(Angström)") 2350 + plt.xlabel("Distance (Angströms)")
2312 - plt.title("GMM des distances (Pyle model)") 2351 + plt.title("GMM of distances (Pyle model)")
2313 - plt.savefig("GMM des distances (Pyle model).png") 2352 + plt.savefig("GMM_distances_pyle_model.png")
2314 plt.close() 2353 plt.close()
2315 2354
2316 # Flat Angles 2355 # Flat Angles
2317 - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/angles_plans_wadley.csv")) 2356 + df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle.csv"))
2318 2357
2319 p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])]) 2358 p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])])
2320 c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])]) 2359 c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])])
...@@ -2326,11 +2365,11 @@ def gmm_wadley(): ...@@ -2326,11 +2365,11 @@ def gmm_wadley():
2326 GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True) 2365 GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True)
2327 GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True) 2366 GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True)
2328 2367
2329 - GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, couleur='firebrick') 2368 + GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, col='firebrick')
2330 - GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, couleur='seagreen') 2369 + GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, col='seagreen')
2331 - plt.xlabel("Angle(Degré)") 2370 + plt.xlabel("Angle (Degrees)")
2332 - plt.title("GMM des angles plans (Pyle model)") 2371 + plt.title("GMM of flat angles (Pyle model)")
2333 - plt.savefig("GMM des angles plans (Pyle model).png") 2372 + plt.savefig("GMM_flat_angles_pyle_model.png")
2334 plt.close() 2373 plt.close()
2335 2374
2336 # Torsion anfles 2375 # Torsion anfles
...@@ -2367,15 +2406,15 @@ def gmm_wadley(): ...@@ -2367,15 +2406,15 @@ def gmm_wadley():
2367 GMM_histo(eta_base, "Eta''", toric=True) 2406 GMM_histo(eta_base, "Eta''", toric=True)
2368 GMM_histo(theta_base, "Theta''", toric=True) 2407 GMM_histo(theta_base, "Theta''", toric=True)
2369 2408
2370 - GMM_histo(eta, "Eta", toric=True, hist=False, couleur='mediumaquamarine') 2409 + GMM_histo(eta, "Eta", toric=True, hist=False, col='mediumaquamarine')
2371 - GMM_histo(theta, "Theta", toric=True, hist=False, couleur='darkorchid') 2410 + GMM_histo(theta, "Theta", toric=True, hist=False, col='darkorchid')
2372 - GMM_histo(eta_prime, "Eta'", toric=True, hist=False, couleur='cyan') 2411 + GMM_histo(eta_prime, "Eta'", toric=True, hist=False, col='cyan')
2373 - GMM_histo(theta_prime, "Theta'", toric=True, hist=False, couleur='crimson') 2412 + GMM_histo(theta_prime, "Theta'", toric=True, hist=False, col='crimson')
2374 - GMM_histo(eta_base, "Eta''", toric=True, hist=False, couleur='royalblue') 2413 + GMM_histo(eta_base, "Eta''", toric=True, hist=False, col='royalblue')
2375 - GMM_histo(theta_base, "Theta''", toric=True, hist=False, couleur='palevioletred') 2414 + GMM_histo(theta_base, "Theta''", toric=True, hist=False, col='palevioletred')
2376 - plt.xlabel("Angle(Degré)") 2415 + plt.xlabel("Angle (Degrees)")
2377 - plt.title("GMM des angles de pseudotorsion") 2416 + plt.title("GMM of pseudo-torsion angles (Pyle Model)")
2378 - plt.savefig("GMM des angles de pseudotorsion.png") 2417 + plt.savefig("GMM_pseudotorsion_angles_pyle_model.png")
2379 plt.close() 2418 plt.close()
2380 2419
2381 os.chdir(runDir) 2420 os.chdir(runDir)
...@@ -2411,18 +2450,18 @@ def gmm_hrna(): ...@@ -2411,18 +2450,18 @@ def gmm_hrna():
2411 GMM_histo(p_o5p, "P-O5'") 2450 GMM_histo(p_o5p, "P-O5'")
2412 GMM_histo(last_c4p_p, "C4'-P") 2451 GMM_histo(last_c4p_p, "C4'-P")
2413 2452
2414 - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, couleur='lightcoral') 2453 + GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='lightcoral')
2415 - GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, couleur='limegreen') 2454 + GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, col='limegreen')
2416 - GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, couleur='tomato') 2455 + GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, col='tomato')
2417 - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, couleur='aquamarine') 2456 + GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='aquamarine')
2418 - GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, couleur='goldenrod') 2457 + GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, col='goldenrod')
2419 - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, couleur='darkcyan') 2458 + GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkcyan')
2420 - GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, couleur='deeppink') 2459 + GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, col='deeppink')
2421 axes = plt.gca() 2460 axes = plt.gca()
2422 axes.set_ylim(0, 100) 2461 axes.set_ylim(0, 100)
2423 - plt.xlabel("Distance (Angström)") 2462 + plt.xlabel("Distance (Angströms)")
2424 - plt.title("GMM des distances entre atomes HiRE-RNA") 2463 + plt.title("GMM of distances between HiRE-RNA beads")
2425 - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM des distances entre atomes HiRE-RNA.png") 2464 + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM_distances_HiRE_RNA.png")
2426 plt.close() 2465 plt.close()
2427 2466
2428 # Angles 2467 # Angles
...@@ -2449,19 +2488,19 @@ def gmm_hrna(): ...@@ -2449,19 +2488,19 @@ def gmm_hrna():
2449 GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True) 2488 GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True)
2450 GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True) 2489 GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True)
2451 2490
2452 - GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, couleur='lightcoral') 2491 + GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, col='lightcoral')
2453 - GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, couleur='limegreen') 2492 + GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, col='limegreen')
2454 - GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, couleur='tomato') 2493 + GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, col='tomato')
2455 - GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, couleur='aquamarine') 2494 + GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, col='aquamarine')
2456 - GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, couleur='goldenrod') 2495 + GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, col='goldenrod')
2457 - GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, couleur='darkcyan') 2496 + GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, col='darkcyan')
2458 - GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, couleur='deeppink') 2497 + GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, col='deeppink')
2459 - GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, couleur='indigo') 2498 + GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, col='indigo')
2460 axes = plt.gca() 2499 axes = plt.gca()
2461 axes.set_ylim(0, 100) 2500 axes.set_ylim(0, 100)
2462 - plt.xlabel("Angle (Degré)") 2501 + plt.xlabel("Angle (Degres)")
2463 - plt.title("GMM des angles entre atomes HiRE-RNA") 2502 + plt.title("GMM of angles between HiRE-RNA beads")
2464 - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM des angles entre atomes HiRE-RNA.png") 2503 + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM_angles_HiRE_RNA.png")
2465 plt.close() 2504 plt.close()
2466 2505
2467 # Torsions 2506 # Torsions
...@@ -2488,24 +2527,24 @@ def gmm_hrna(): ...@@ -2488,24 +2527,24 @@ def gmm_hrna():
2488 GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True) 2527 GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True)
2489 GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True) 2528 GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True)
2490 2529
2491 - GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, couleur='darkred') 2530 + GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, col='darkred')
2492 - GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, couleur='chocolate') 2531 + GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, col='chocolate')
2493 - GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, couleur='mediumvioletred') 2532 + GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, col='mediumvioletred')
2494 - GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, couleur='cadetblue') 2533 + GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, col='cadetblue')
2495 - GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, couleur='darkkhaki') 2534 + GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, col='darkkhaki')
2496 - GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, couleur='springgreen') 2535 + GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, col='springgreen')
2497 - GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, couleur='indigo') 2536 + GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, col='indigo')
2498 - GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, couleur='gold') 2537 + GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, col='gold')
2499 - plt.xlabel("Angle(Degré)") 2538 + plt.xlabel("Angle (Degrees)")
2500 - plt.title("GMM des angles de torsion (hire-RNA)") 2539 + plt.title("GMM of torsion angles between HiRE-RNA beads")
2501 - plt.savefig("GMM des angles de torsion (hire-RNA).png") 2540 + plt.savefig("GMM_torsions_HiRE_RNA.png")
2502 plt.close() 2541 plt.close()
2503 2542
2504 os.chdir(runDir) 2543 os.chdir(runDir)
2505 setproctitle("GMM (HiRE-RNA) finished") 2544 setproctitle("GMM (HiRE-RNA) finished")
2506 2545
2507 @trace_unhandled_exceptions 2546 @trace_unhandled_exceptions
2508 -def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance): 2547 +def gmm_hrna_basepair_type(type_LW, ntpair, data):
2509 """ 2548 """
2510 function to plot the statistical figures you want 2549 function to plot the statistical figures you want
2511 By type of pairing: 2550 By type of pairing:
...@@ -2520,196 +2559,116 @@ def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance ...@@ -2520,196 +2559,116 @@ def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance
2520 plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5) 2559 plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5)
2521 2560
2522 plt.subplot(2, 1, 1) 2561 plt.subplot(2, 1, 1)
2523 - 2562 + GMM_histo(data["211_angle"], f"{type_LW}_{ntpair}_C1'-B1-B1pair", toric=True, hist=False, col='cyan' )
2524 - if len(angle_1) > 0 : 2563 + GMM_histo(data["112_angle"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair", toric=True, hist=False, col='magenta')
2525 - GMM_histo(angle_1, "C4'-C1'-B1", toric=True, hist=False, couleur='cyan' ) 2564 + GMM_histo(data["3211_torsion"], f"{type_LW}_{ntpair}_C4'-C1'-B1-B1pair", toric=True, hist=False, col='black' )
2526 - if len(angle_2) > 0 : 2565 + GMM_histo(data["1123_torsion"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair-C4'pair", toric=True, hist=False, col='maroon')
2527 - GMM_histo(angle_2, "C1'-B1-B1pair", toric=True, hist=False, couleur='magenta') 2566 + GMM_histo(data["alpha1"], f"{type_LW}_{ntpair}_alpha_1", toric=True, hist=False, col="yellow")
2528 - if len(angle_3) > 0 : 2567 + GMM_histo(data["alpha2"], f"{type_LW}_{ntpair}_alpha_2", toric=True, hist=False, col='olive')
2529 - GMM_histo(angle_3, "B1-B1pair-C1'pair", toric=True, hist=False, couleur="yellow") 2568 + plt.xlabel("Angle (degree)")
2530 - if len(angle_4) > 0 : 2569 + plt.title(f"GMM of plane angles for {type_LW} {ntpair} basepairs", fontsize=10)
2531 - GMM_histo(angle_4, "B1pair-C1'pair-C4'pair", toric=True, hist=False, couleur='olive')
2532 - plt.xlabel("Angle(degré)")
2533 - plt.title("GMM des angles plans pour les measure_hrna_basepairs " +type_LW , fontsize=10)
2534 2570
2535 plt.subplot(2, 1, 2) 2571 plt.subplot(2, 1, 2)
2536 - if len(distance)>0 : 2572 + GMM_histo(data["Distance"], f"Distance between {type_LW} {ntpair} tips", toric=False, hist=False, col="cyan")
2537 - GMM_histo(distance, "Distance pointes " + type_LW, save=False) 2573 + GMM_histo(data["dB1"], f"{type_LW} {ntpair} dB1", toric=False, hist=False, col="tomato")
2538 - 2574 + GMM_histo(data["dB2"], f"{type_LW} {ntpair} dB2", toric=False, hist=False, col="goldenrod")
2539 - plt.savefig("Mesures measure_hrna_basepairs " +type_LW+ ".png" ) 2575 + plt.xlabel("Distance (Angströms)")
2576 + plt.title(f"GMM of distances for {type_LW} {ntpair} basepairs", fontsize=10)
2577 +
2578 + plt.savefig(f"{type_LW}_{ntpair}_basepairs.png" )
2540 plt.close() 2579 plt.close()
2541 - setproctitle(f"GMM (HiRE-RNA {type_LW} basepairs) finished") 2580 + setproctitle(f"GMM (HiRE-RNA {type_LW} {ntpair} basepairs) finished")
2542 2581
2543 @trace_unhandled_exceptions 2582 @trace_unhandled_exceptions
2544 def gmm_hrna_basepairs(): 2583 def gmm_hrna_basepairs():
2545 2584
2546 setproctitle("GMM (HiRE-RNA basepairs)") 2585 setproctitle("GMM (HiRE-RNA basepairs)")
2547 2586
2548 - df=pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv")) 2587 + df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv"))
2549 - 2588 +
2550 - cWW=df[df['type LW']=='cWW'] 2589 + lw = ["cWW", "tWW", "cWH", "tWH", "cHW", "tHW", "cWS", "tWS", "cSW", "tSW", "cHH", "tHH", "cSH", "tSH", "cHS", "tHS", "cSS", "tSS"]
2551 - cWW_dist=list(cWW["Distance"])
2552 - cWW_angle_1=list(cWW["C4'-C1'-B1"])
2553 - cWW_angle_2=list(cWW["C1'-B1-B1pair"])
2554 - cWW_angle_3=list(cWW["B1-B1pair-C1'pair"])
2555 - cWW_angle_4=list(cWW["B1pair-C1'pair-C4'pair"])
2556 - tWW=df[df['type LW']=='tWW']
2557 - tWW_dist=list(tWW["Distance"])
2558 - tWW_angle_1=list(tWW["C4'-C1'-B1"])
2559 - tWW_angle_2=list(tWW["C1'-B1-B1pair"])
2560 - tWW_angle_3=list(tWW["B1-B1pair-C1'pair"])
2561 - tWW_angle_4=list(tWW["B1pair-C1'pair-C4'pair"])
2562 - cWH=df[df['type LW']=='cWH']
2563 - cWH_dist=list(cWH["Distance"])
2564 - cWH_angle_1=list(cWH["C4'-C1'-B1"])
2565 - cWH_angle_2=list(cWH["C1'-B1-B1pair"])
2566 - cWH_angle_3=list(cWH["B1-B1pair-C1'pair"])
2567 - cWH_angle_4=list(cWH["B1pair-C1'pair-C4'pair"])
2568 - tWH=df[df['type LW']=='tWH']
2569 - tWH_dist=list(tWH["Distance"])
2570 - tWH_angle_1=list(tWH["C4'-C1'-B1"])
2571 - tWH_angle_2=list(tWH["C1'-B1-B1pair"])
2572 - tWH_angle_3=list(tWH["B1-B1pair-C1'pair"])
2573 - tWH_angle_4=list(tWH["B1pair-C1'pair-C4'pair"])
2574 - cHW=df[df['type LW']=='cHW']
2575 - cHW_dist=list(cHW["Distance"])
2576 - cHW_angle_1=list(cHW["C4'-C1'-B1"])
2577 - cHW_angle_2=list(cHW["C1'-B1-B1pair"])
2578 - cHW_angle_3=list(cHW["B1-B1pair-C1'pair"])
2579 - cHW_angle_4=list(cHW["B1pair-C1'pair-C4'pair"])
2580 - tHW=df[df['type LW']=='tHW']
2581 - tHW_dist=list(tHW["Distance"])
2582 - tHW_angle_1=list(tHW["C4'-C1'-B1"])
2583 - tHW_angle_2=list(tHW["C1'-B1-B1pair"])
2584 - tHW_angle_3=list(tHW["B1-B1pair-C1'pair"])
2585 - tHW_angle_4=list(tHW["B1pair-C1'pair-C4'pair"])
2586 - cWS=df[df['type LW']=='cWS']
2587 - cWS_dist=list(cWS["Distance"])
2588 - cWS_angle_1=list(cWS["C4'-C1'-B1"])
2589 - cWS_angle_2=list(cWS["C1'-B1-B1pair"])
2590 - cWS_angle_3=list(cWS["B1-B1pair-C1'pair"])
2591 - cWS_angle_4=list(cWS["B1pair-C1'pair-C4'pair"])
2592 - tWS=df[df['type LW']=='tWS']
2593 - tWS_dist=list(tWS["Distance"])
2594 - tWS_angle_1=list(tWS["C4'-C1'-B1"])
2595 - tWS_angle_2=list(tWS["C1'-B1-B1pair"])
2596 - tWS_angle_3=list(tWS["B1-B1pair-C1'pair"])
2597 - tWS_angle_4=list(tWS["B1pair-C1'pair-C4'pair"])
2598 - cSW=df[df['type LW']=='cSW']
2599 - cSW_dist=list(cSW["Distance"])
2600 - cSW_angle_1=list(cSW["C4'-C1'-B1"])
2601 - cSW_angle_2=list(cSW["C1'-B1-B1pair"])
2602 - cSW_angle_3=list(cSW["B1-B1pair-C1'pair"])
2603 - cSW_angle_4=list(cSW["B1pair-C1'pair-C4'pair"])
2604 - tSW=df[df['type LW']=='tSW']
2605 - tSW_dist=list(tSW["Distance"])
2606 - tSW_angle_1=list(tSW["C4'-C1'-B1"])
2607 - tSW_angle_2=list(tSW["C1'-B1-B1pair"])
2608 - tSW_angle_3=list(tSW["B1-B1pair-C1'pair"])
2609 - tSW_angle_4=list(tSW["B1pair-C1'pair-C4'pair"])
2610 - cHH=df[df['type LW']=='cHH']
2611 - cHH_dist=list(cHH["Distance"])
2612 - cHH_angle_1=list(cHH["C4'-C1'-B1"])
2613 - cHH_angle_2=list(cHH["C1'-B1-B1pair"])
2614 - cHH_angle_3=list(cHH["B1-B1pair-C1'pair"])
2615 - cHH_angle_4=list(cHH["B1pair-C1'pair-C4'pair"])
2616 - tHH=df[df['type LW']=='tHH']
2617 - tHH_dist=list(tHH["Distance"])
2618 - tHH_angle_1=list(tHH["C4'-C1'-B1"])
2619 - tHH_angle_2=list(tHH["C1'-B1-B1pair"])
2620 - tHH_angle_3=list(tHH["B1-B1pair-C1'pair"])
2621 - tHH_angle_4=list(tHH["B1pair-C1'pair-C4'pair"])
2622 - cSH=df[df['type LW']=='cSH']
2623 - cSH_dist=list(cSH["Distance"])
2624 - cSH_angle_1=list(cSH["C4'-C1'-B1"])
2625 - cSH_angle_2=list(cSH["C1'-B1-B1pair"])
2626 - cSH_angle_3=list(cSH["B1-B1pair-C1'pair"])
2627 - cSH_angle_4=list(cSH["B1pair-C1'pair-C4'pair"])
2628 - tSH=df[df['type LW']=='tSH']
2629 - tSH_dist=list(tSH["Distance"])
2630 - tSH_angle_1=list(tSH["C4'-C1'-B1"])
2631 - tSH_angle_2=list(tSH["C1'-B1-B1pair"])
2632 - tSH_angle_3=list(tSH["B1-B1pair-C1'pair"])
2633 - tSH_angle_4=list(tSH["B1pair-C1'pair-C4'pair"])
2634 - cHS=df[df['type LW']=='cHS']
2635 - cHS_dist=list(cHS["Distance"])
2636 - cHS_angle_1=list(cHS["C4'-C1'-B1"])
2637 - cHS_angle_2=list(cHS["C1'-B1-B1pair"])
2638 - cHS_angle_3=list(cHS["B1-B1pair-C1'pair"])
2639 - cHS_angle_4=list(cHS["B1pair-C1'pair-C4'pair"])
2640 - tHS=df[df['type LW']=='tHS']
2641 - tHS_dist=list(tHS["Distance"])
2642 - tHS_angle_1=list(tHS["C4'-C1'-B1"])
2643 - tHS_angle_2=list(tHS["C1'-B1-B1pair"])
2644 - tHS_angle_3=list(tHS["B1-B1pair-C1'pair"])
2645 - tHS_angle_4=list(tHS["B1pair-C1'pair-C4'pair"])
2646 - cSS=df[df['type LW']=='cSS']
2647 - cSS_dist=list(cSS["Distance"])
2648 - cSS_angle_1=list(cSS["C4'-C1'-B1"])
2649 - cSS_angle_2=list(cSS["C1'-B1-B1pair"])
2650 - cSS_angle_3=list(cSS["B1-B1pair-C1'pair"])
2651 - cSS_angle_4=list(cSS["B1pair-C1'pair-C4'pair"])
2652 - tSS=df[df['type LW']=='tSS']
2653 - tSS_dist=list(tSS["Distance"])
2654 - tSS_angle_1=list(tSS["C4'-C1'-B1"])
2655 - tSS_angle_2=list(tSS["C1'-B1-B1pair"])
2656 - tSS_angle_3=list(tSS["B1-B1pair-C1'pair"])
2657 - tSS_angle_4=list(tSS["B1pair-C1'pair-C4'pair"])
2658 2590
2659 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) 2591 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True)
2660 os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/") 2592 os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/")
2661 2593
2662 - gmm_hrna_basepair_type('cWW', cWW_angle_1, cWW_angle_2, cWW_angle_3, cWW_angle_4, cWW_dist) 2594 + for lw_type in lw:
2663 - gmm_hrna_basepair_type('tWW', tWW_angle_1, tWW_angle_2, tWW_angle_3, tWW_angle_4, tWW_dist) 2595 + data = df[df['type_LW'] == lw_type ]
2664 - gmm_hrna_basepair_type('cWH', cWH_angle_1, cWH_angle_2, cWH_angle_3, cWH_angle_4, cWH_dist) 2596 + if len(data):
2665 - gmm_hrna_basepair_type('tWH', tWH_angle_1, tWH_angle_2, tWH_angle_3, tWH_angle_4, tWH_dist) 2597 + for b1 in ['A','C','G','U']:
2666 - gmm_hrna_basepair_type('cHW', cHW_angle_1, cHW_angle_2, cHW_angle_3, cHW_angle_4, cHW_dist) 2598 + for b2 in ['A','C','G','U']:
2667 - gmm_hrna_basepair_type('tHW', tHW_angle_1, tHW_angle_2, tHW_angle_3, tHW_angle_4, tHW_dist) 2599 + thisbases = data[(data.nt1_res == b1)&(data.nt2_res == b2)]
2668 - gmm_hrna_basepair_type('tWS', tWS_angle_1, tWS_angle_2, tWS_angle_3, tWS_angle_4, tWS_dist) 2600 + if len(thisbases):
2669 - gmm_hrna_basepair_type('cWS', cWS_angle_1, cWS_angle_2, cWS_angle_3, cWS_angle_4, cWS_dist) 2601 + gmm_hrna_basepair_type(lw_type, b1+b2, thisbases)
2670 - gmm_hrna_basepair_type('tSW', tSW_angle_1, tSW_angle_2, tSW_angle_3, tSW_angle_4, tSW_dist) 2602 +
2671 - gmm_hrna_basepair_type('cSW', cSW_angle_1, cSW_angle_2, cSW_angle_3, cSW_angle_4, cSW_dist) 2603 + # colors = ['lightcoral', "lightseagreen", "black", "goldenrod", "olive", "steelblue", "silver", "deeppink", "navy",
2672 - gmm_hrna_basepair_type('cHH', cHH_angle_1, cHH_angle_2, cHH_angle_3, cHH_angle_4, cHH_dist) 2604 + # "sienna", "maroon", "orange", "mediumaquamarine", "tomato", "indigo", "orchid", "tan", "lime"]
2673 - gmm_hrna_basepair_type('tHH', tHH_angle_1, tHH_angle_2, tHH_angle_3, tHH_angle_4, tHH_dist) 2605 + # for lw_type, col in zip(lw, colors):
2674 - gmm_hrna_basepair_type('cSH', cSH_angle_1, cSH_angle_2, cSH_angle_3, cSH_angle_4, cSH_dist) 2606 + # data = df[df['type LW'] == lw_type]
2675 - gmm_hrna_basepair_type('tSH', tSH_angle_1, tSH_angle_2, tSH_angle_3, tSH_angle_4, tSH_dist) 2607 + # GMM_histo(data.Distance, lw_type, toric=False, hist=False, col=col)
2676 - gmm_hrna_basepair_type('cHS', cHS_angle_1, cHS_angle_2, cHS_angle_3, cHS_angle_4, cHS_dist) 2608 + # plt.xlabel('Distance (Angströms)')
2677 - gmm_hrna_basepair_type('tHS', tHS_angle_1, tHS_angle_2, tHS_angle_3, tHS_angle_4, tHS_dist) 2609 + # plt.title("GMM of distances between base tips ("+str(nt)+ " values)", fontsize=8)
2678 - gmm_hrna_basepair_type('cSS', cSS_angle_1, cSS_angle_2, cSS_angle_3, cSS_angle_4, cSS_dist) 2610 + # plt.savefig("distances_between_tips.png")
2679 - gmm_hrna_basepair_type('tSS', tSS_angle_1, tSS_angle_2, tSS_angle_3, tSS_angle_4, tSS_dist) 2611 + # plt.close()
2680 -
2681 - nc=len(cWW)+len(cHH)+len(cSS)+len(cWH)+len(cHW)+len(cWS)+len(cSW)+len(cHS)+len(cSH)
2682 - GMM_histo(cWW_dist, "cWW", toric=False, hist=False, couleur='lightcoral')
2683 - GMM_histo(cHH_dist, "cHH", toric=False, hist=False, couleur='lightseagreen')
2684 - GMM_histo(cSS_dist, "cSS", toric=False, hist=False, couleur='black')
2685 - GMM_histo(cWH_dist, "cWH", toric=False, hist=False, couleur='goldenrod')
2686 - GMM_histo(cHW_dist, "cHW", toric=False, hist=False, couleur='olive')
2687 - GMM_histo(cWS_dist, "cWS", toric=False, hist=False, couleur='steelblue')
2688 - GMM_histo(cSW_dist, "cSW", toric=False, hist=False, couleur='silver')
2689 - GMM_histo(cHS_dist, "cHS", toric=False, hist=False, couleur='deeppink')
2690 - GMM_histo(cSH_dist, "cSH", toric=False, hist=False, couleur='navy')
2691 - plt.xlabel('Distance (Angström)')
2692 - plt.title("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs cis ("+str(nc)+ " valeurs)", fontsize=8)
2693 - plt.savefig("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs cis (" +str(nc)+ " valeurs).png")
2694 - plt.close()
2695 -
2696 - nt=len(tWW)+len(tHH)+len(tSS)+len(tWH)+len(tHW)+len(tWS)+len(tSW)+len(tHS)+len(tSH)
2697 - GMM_histo(tWW_dist, "tWW", toric=False, hist=False, couleur='sienna')
2698 - GMM_histo(tHH_dist, "tHH", toric=False, hist=False, couleur='maroon')
2699 - GMM_histo(tSS_dist, "tSS", toric=False, hist=False, couleur='orange')
2700 - GMM_histo(tWH_dist, "tWH", toric=False, hist=False, couleur='mediumaquamarine')
2701 - GMM_histo(tHW_dist, "tHW", toric=False, hist=False, couleur='tomato')
2702 - GMM_histo(tWS_dist, "tWS", toric=False, hist=False, couleur='indigo')
2703 - GMM_histo(tSW_dist, "tSW", toric=False, hist=False, couleur='orchid')
2704 - GMM_histo(tHS_dist, "tHS", toric=False, hist=False, couleur='tan')
2705 - GMM_histo(tSH_dist, "tSH", toric=False, hist=False, couleur='lime')
2706 - plt.xlabel('Distance (Angström)')
2707 - plt.title("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs trans ("+str(nt)+ " valeurs)", fontsize=8)
2708 - plt.savefig("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs trans (" +str(nt)+ " valeurs).png")
2709 - plt.close()
2710 2612
2711 os.chdir(runDir) 2613 os.chdir(runDir)
2712 setproctitle(f"GMM (HiRE-RNA basepairs) finished") 2614 setproctitle(f"GMM (HiRE-RNA basepairs) finished")
2615 +
2616 +def merge_jsons():
2617 + # All atom distances
2618 + bonds = ["O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'", "C1'-N9",
2619 + "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5",
2620 + "C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"]
2621 + bonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in bonds ]
2622 + concat_jsons(bonds, runDir + "/results/geometry/json/all_atom_distances.json")
2623 +
2624 +
2625 + # All atom torsions
2626 + torsions = ["Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Xhi", "Zeta"]
2627 + torsions = [ runDir + "/results/geometry/json/" + x + ".json" for x in torsions ]
2628 + concat_jsons(torsions, runDir + "/results/geometry/json/all_atom_torsions.json")
2629 +
2630 + # HiRE-RNA distances
2631 + hrnabonds = ["P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2", "C4'-P"]
2632 + hrnabonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnabonds ]
2633 + concat_jsons(hrnabonds, runDir + "/results/geometry/json/hirerna_distances.json")
2634 +
2635 + # HiRE-RNA angles
2636 + hrnaangles = ["P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2", "C4'-P-O5'", "C5'-C4'-P", "C1'-C4'-P"]
2637 + hrnaangles = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnaangles ]
2638 + concat_jsons(hrnaangles, runDir + "/results/geometry/json/hirerna_angles.json")
2639 +
2640 + # HiRE-RNA torsions
2641 + hrnators = ["P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "C4'-P°-O5'°-C5'°", "C5'-C4'-P°-O5'°", "C1'-C4'-P°-O5'°", "O5'-C5'-C4'-P°"]
2642 + hrnators = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnators ]
2643 + concat_jsons(hrnators, runDir + "/results/geometry/json/hirerna_torsions.json")
2644 +
2645 + # HiRE-RNA basepairs
2646 + for nt1 in ['A', 'C', 'G', 'U']:
2647 + for nt2 in ['A', 'C', 'G', 'U']:
2648 + bps = glob.glob(runDir + f"/results/geometry/json/*{nt1}{nt2}*.json")
2649 + concat_jsons(bps, runDir + f"/results/geometry/json/hirerna_{nt1}{nt2}_basepairs.json")
2650 +
2651 + # Delete previous files
2652 + for f in bonds + torsions + hrnabonds + hrnaangles + hrnators:
2653 + try:
2654 + os.remove(f)
2655 + except FileNotFoundError:
2656 + pass
2657 + for f in glob.glob(runDir + "/results/geometry/json/t*.json"):
2658 + try:
2659 + os.remove(f)
2660 + except FileNotFoundError:
2661 + pass
2662 + for f in glob.glob(runDir + "/results/geometry/json/c*.json"):
2663 + try:
2664 + os.remove(f)
2665 + except FileNotFoundError:
2666 + pass
2667 + for f in glob.glob(runDir + "/results/geometry/json/Distance*.json"):
2668 + try:
2669 + os.remove(f)
2670 + except FileNotFoundError:
2671 + pass
2713 2672
2714 @trace_unhandled_exceptions 2673 @trace_unhandled_exceptions
2715 def concat_dataframes(fpath, outfilename): 2674 def concat_dataframes(fpath, outfilename):
...@@ -2735,6 +2694,23 @@ def concat_dataframes(fpath, outfilename): ...@@ -2735,6 +2694,23 @@ def concat_dataframes(fpath, outfilename):
2735 idxQueue.put(thr_idx) # replace the thread index in the queue 2694 idxQueue.put(thr_idx) # replace the thread index in the queue
2736 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") 2695 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
2737 2696
2697 +def concat_jsons(flist, outfilename):
2698 + """
2699 + Reads JSON files computed by the geometry jobs and merge them into a smaller
2700 + number of files
2701 + """
2702 +
2703 + result = []
2704 + for f in flist:
2705 + # if not path.isfile(f):
2706 + # continue:
2707 + with open(f, "rb") as infile:
2708 + result.append(json.load(infile))
2709 +
2710 + # write the files
2711 + with open(outfilename, 'w', encoding='utf-8') as f:
2712 + json.dump(result, f, indent=4)
2713 +
2738 def process_jobs(joblist): 2714 def process_jobs(joblist):
2739 """ 2715 """
2740 Starts a Pool to run the Job() objects in joblist. 2716 Starts a Pool to run the Job() objects in joblist.
...@@ -2759,7 +2735,6 @@ def process_jobs(joblist): ...@@ -2759,7 +2735,6 @@ def process_jobs(joblist):
2759 print("Something went wrong") 2735 print("Something went wrong")
2760 2736
2761 if __name__ == "__main__": 2737 if __name__ == "__main__":
2762 -
2763 os.makedirs(runDir + "/results/figures/", exist_ok=True) 2738 os.makedirs(runDir + "/results/figures/", exist_ok=True)
2764 2739
2765 # parse options 2740 # parse options
...@@ -2897,29 +2872,29 @@ if __name__ == "__main__": ...@@ -2897,29 +2872,29 @@ if __name__ == "__main__":
2897 2872
2898 # Do general family statistics 2873 # Do general family statistics
2899 2874
2900 - joblist.append(Job(function=stats_len)) # Computes figures about chain lengths 2875 + # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths
2901 - joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) 2876 + # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families)
2902 - for f in famlist: 2877 + # for f in famlist:
2903 - joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) 2878 + # joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family)
2904 - if f not in ignored: 2879 + # if f not in ignored:
2905 - joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) 2880 + # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families)
2906 2881
2907 2882
2908 # Do geometric measures on all chains 2883 # Do geometric measures on all chains
2909 2884
2910 - if n_unmapped_chains: 2885 + # if n_unmapped_chains:
2911 - os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) 2886 + # os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True)
2912 - liste_struct=os.listdir(path_to_3D_data + "renumbered_rna_only") 2887 + # liste_struct = os.listdir(path_to_3D_data + "renumbered_rna_only")
2913 - f_prec = os.listdir(path_to_3D_data + "renumbered_rna_only")[0] 2888 + # if '4zdo_1_E.cif' in liste_struct:
2914 - if '4zdo_1_E.cif' in liste_struct: 2889 + # liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now
2915 - liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now 2890 + # if '4zdp_1_E.cif' in liste_struct:
2916 - if '4zdp_1_E.cif' in liste_struct: 2891 + # liste_struct.remove('4zdp_1_E.cif')
2917 - liste_struct.remove('4zdp_1_E.cif') 2892 + # for f in liste_struct:
2918 - for f in liste_struct: 2893 + # if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]):
2919 - joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances 2894 + # joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances
2920 2895
2921 2896
2922 - process_jobs(joblist) 2897 + # process_jobs(joblist)
2923 2898
2924 # Now process the memory-heavy tasks family by family 2899 # Now process the memory-heavy tasks family by family
2925 if DO_AVG_DISTANCE_MATRIX: 2900 if DO_AVG_DISTANCE_MATRIX:
...@@ -2935,33 +2910,34 @@ if __name__ == "__main__": ...@@ -2935,33 +2910,34 @@ if __name__ == "__main__":
2935 2910
2936 # finish the work after the parallel portions 2911 # finish the work after the parallel portions
2937 2912
2938 - per_chain_stats() # per chain base frequencies en basepair types 2913 + # per_chain_stats() # per chain base frequencies en basepair types
2939 - seq_idty() # identity matrices from pre-computed .npy matrices 2914 + # seq_idty() # identity matrices from pre-computed .npy matrices
2940 - stats_pairs() 2915 + # stats_pairs()
2941 2916
2942 if n_unmapped_chains: 2917 if n_unmapped_chains:
2943 - general_stats() 2918 + # general_stats()
2944 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) 2919 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True)
2945 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) 2920 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True)
2921 + # joblist = []
2922 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')))
2923 + # if DO_HIRE_RNA_MEASURES:
2924 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
2925 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv')))
2926 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv')))
2927 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv')))
2928 + # if DO_WADLEY_ANALYSIS:
2929 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv')))
2930 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv')))
2931 + # process_jobs(joblist)
2946 joblist = [] 2932 joblist = []
2947 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) 2933 + # joblist.append(Job(function=gmm_aa_dists, args=()))
2948 - if DO_HIRE_RNA_MEASURES: 2934 + # joblist.append(Job(function=gmm_aa_torsions, args=()))
2949 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
2950 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv')))
2951 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv')))
2952 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv')))
2953 - if DO_WADLEY_ANALYSIS:
2954 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv')))
2955 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv')))
2956 - process_jobs(joblist)
2957 - joblist = []
2958 - joblist.append(Job(function=gmm_aa_dists, args=()))
2959 - joblist.append(Job(function=gmm_aa_torsions, args=()))
2960 if DO_HIRE_RNA_MEASURES: 2935 if DO_HIRE_RNA_MEASURES:
2961 - joblist.append(Job(function=gmm_hrna, args=())) 2936 + # joblist.append(Job(function=gmm_hrna, args=()))
2962 joblist.append(Job(function=gmm_hrna_basepairs, args=())) 2937 joblist.append(Job(function=gmm_hrna_basepairs, args=()))
2963 if DO_WADLEY_ANALYSIS: 2938 if DO_WADLEY_ANALYSIS:
2964 joblist.append(Job(function=gmm_wadley, args=())) 2939 joblist.append(Job(function=gmm_wadley, args=()))
2965 if len(joblist): 2940 if len(joblist):
2966 process_jobs(joblist) 2941 process_jobs(joblist)
2942 + merge_jsons()
2967 2943
......