last modification for renumbering (issues with OP2)
Showing
3 changed files
with
514 additions
and
452 deletions
... | @@ -321,8 +321,8 @@ class Chain: | ... | @@ -321,8 +321,8 @@ class Chain: |
321 | self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif" | 321 | self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif" |
322 | else: | 322 | else: |
323 | status = f"Extract {self.pdb_id}-{self.pdb_chain_id}" | 323 | status = f"Extract {self.pdb_id}-{self.pdb_chain_id}" |
324 | - self.file = path_to_3D_data+"renumbered_rna_only/"+self.chain_label+".cif" | 324 | + self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif" |
325 | - #self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif" | 325 | + |
326 | 326 | ||
327 | # Check if file exists, if yes, abort (do not recompute) | 327 | # Check if file exists, if yes, abort (do not recompute) |
328 | if os.path.exists(self.file): | 328 | if os.path.exists(self.file): |
... | @@ -405,7 +405,7 @@ class Chain: | ... | @@ -405,7 +405,7 @@ class Chain: |
405 | nt=nums.at[i, "nt_name"] | 405 | nt=nums.at[i, "nt_name"] |
406 | 406 | ||
407 | # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G) | 407 | # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G) |
408 | - if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' and resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : | 408 | + if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' or resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : |
409 | res=chain[(' ', resseq, icode_res)] | 409 | res=chain[(' ', resseq, icode_res)] |
410 | else : #modified nucleotides (e.g. chain 5l4o_1_A) | 410 | else : #modified nucleotides (e.g. chain 5l4o_1_A) |
411 | het='H_' + nt | 411 | het='H_' + nt |
... | @@ -1521,14 +1521,10 @@ class Pipeline: | ... | @@ -1521,14 +1521,10 @@ class Pipeline: |
1521 | if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"): | 1521 | if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"): |
1522 | # for the portions mapped to Rfam | 1522 | # for the portions mapped to Rfam |
1523 | os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam") | 1523 | os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam") |
1524 | - ''' | 1524 | + |
1525 | if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"): | 1525 | if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"): |
1526 | # extract chains of pure RNA | 1526 | # extract chains of pure RNA |
1527 | os.makedirs(path_to_3D_data + "rna_only") | 1527 | os.makedirs(path_to_3D_data + "rna_only") |
1528 | - ''' | ||
1529 | - if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "renumbered_rna_only"): | ||
1530 | - # extract chains of pure RNA | ||
1531 | - os.makedirs(path_to_3D_data + "renumbered_rna_only") | ||
1532 | 1528 | ||
1533 | # define and run jobs | 1529 | # define and run jobs |
1534 | joblist = [] | 1530 | joblist = [] | ... | ... |
scripts/convert_hrna_jsons.py
0 → 100644
1 | +#/usr/bin/python3 | ||
2 | +import json | ||
3 | +import os | ||
4 | +import numpy as np | ||
5 | + | ||
6 | +runDir = os.getcwd() | ||
7 | + | ||
8 | +def get_best(i): | ||
9 | + weights = [ float(x.strip("[]")) for x in i["weights"] ] | ||
10 | + means = [ float(x.strip("[]")) for x in i["means"] ] | ||
11 | + s = sorted(zip(weights, means), reverse=True) | ||
12 | + return s[0][1] | ||
13 | + | ||
14 | +def get_k(lw, bp): | ||
15 | + if lw == "cWW": | ||
16 | + if bp in ["GC", "CG"]: | ||
17 | + return 3.9 | ||
18 | + if bp in ["AU", "UA"]: | ||
19 | + return 3.3 | ||
20 | + if bp in ["GU", "UG"]: | ||
21 | + return 3.15 | ||
22 | + return 2.4 | ||
23 | + if lw == "tWW": | ||
24 | + return 2.4 | ||
25 | + return 0.8 | ||
26 | + | ||
27 | +if __name__ == "__main__": | ||
28 | + print("processing HRNA jsons...") | ||
29 | + | ||
30 | + lws = [] | ||
31 | + for c in "ct": | ||
32 | + for nt1 in "WHS": | ||
33 | + for nt2 in "WHS": | ||
34 | + lws.append(c+nt1+nt2) | ||
35 | + | ||
36 | + bps = [] | ||
37 | + for nt1 in "ACGU": | ||
38 | + for nt2 in "ACGU": | ||
39 | + bps.append(nt1+nt2) | ||
40 | + | ||
41 | + fullresults = dict() | ||
42 | + fullresults["A"] = dict() | ||
43 | + fullresults["C"] = dict() | ||
44 | + fullresults["G"] = dict() | ||
45 | + fullresults["U"] = dict() | ||
46 | + counts = dict() | ||
47 | + for lw in lws: | ||
48 | + counts[lw] = 0 | ||
49 | + for bp in bps: | ||
50 | + fullresults[bp[0]][bp[1]] = [] | ||
51 | + | ||
52 | + # open json file | ||
53 | + with open(runDir + f"/results/geometry/json/hirerna_{bp}_basepairs.json", "rb") as f: | ||
54 | + data = json.load(f) | ||
55 | + | ||
56 | + # consider each BP type | ||
57 | + for lw in lws: | ||
58 | + this = dict() | ||
59 | + | ||
60 | + # gather params | ||
61 | + distance = 0 | ||
62 | + a1 = 0 | ||
63 | + a2 = 0 | ||
64 | + for i in data: | ||
65 | + if i["measure"] == f"Distance between {lw} {bp} tips": | ||
66 | + distance = np.round(get_best(i), 2) | ||
67 | + if i["measure"] == f"{lw}_{bp}_alpha_1": | ||
68 | + a1 = np.round(np.pi/180.0*get_best(i), 2) | ||
69 | + if i["measure"] == f"{lw}_{bp}_alpha_2": | ||
70 | + a2 = np.round(np.pi/180.0*get_best(i), 2) | ||
71 | + | ||
72 | + if distance == 0 and a1 == 0 and a2 == 0: | ||
73 | + # not found | ||
74 | + continue | ||
75 | + | ||
76 | + counts[lw] += 1 | ||
77 | + | ||
78 | + # create entry | ||
79 | + this["rho"] = distance | ||
80 | + this["a1"] = a1 | ||
81 | + this["a2"] = a2 | ||
82 | + this["k"] = get_k(lw, bp) | ||
83 | + this["canonical"] = 1.0 if lw=="cWW" and bp in ["GC", "CG", "GU", "UG", "AU", "UA"] else 0.0 | ||
84 | + this["LW"] = lw | ||
85 | + | ||
86 | + # store entry | ||
87 | + fullresults[bp[0]][bp[1]].append(this) | ||
88 | + | ||
89 | + with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f: | ||
90 | + json.dump(fullresults, f, indent=4) |
... | @@ -19,6 +19,7 @@ import matplotlib.patches as mpatches | ... | @@ -19,6 +19,7 @@ import matplotlib.patches as mpatches |
19 | import scipy.cluster.hierarchy as sch | 19 | import scipy.cluster.hierarchy as sch |
20 | import sklearn | 20 | import sklearn |
21 | import json | 21 | import json |
22 | +import glob | ||
22 | import pickle | 23 | import pickle |
23 | import Bio | 24 | import Bio |
24 | from scipy.spatial.distance import squareform | 25 | from scipy.spatial.distance import squareform |
... | @@ -278,6 +279,7 @@ def stats_len(): | ... | @@ -278,6 +279,7 @@ def stats_len(): |
278 | ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5)) | 279 | ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5)) |
279 | 280 | ||
280 | # Save the figure | 281 | # Save the figure |
282 | + | ||
281 | fig.savefig(runDir + f"/results/figures/lengths_{res_thr}A.png") | 283 | fig.savefig(runDir + f"/results/figures/lengths_{res_thr}A.png") |
282 | idxQueue.put(thr_idx) # replace the thread index in the queue | 284 | idxQueue.put(thr_idx) # replace the thread index in the queue |
283 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | 285 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") |
... | @@ -1377,72 +1379,111 @@ def pos_b2(res): | ... | @@ -1377,72 +1379,111 @@ def pos_b2(res): |
1377 | else: | 1379 | else: |
1378 | return [] | 1380 | return [] |
1379 | 1381 | ||
1380 | -def basepair_apex_distance(res, pair): | 1382 | +@trace_unhandled_exceptions |
1381 | - """ | 1383 | +def basepair_measures(res, pair): |
1382 | - measure of the distance between the tips of the paired nucleotides (B1 / B1 or B1 / B2 or B2 / B2) | ||
1383 | - """ | ||
1384 | - dist=[] | ||
1385 | - d=0 | ||
1386 | - if res.get_resname()=='A' or res.get_resname()=='G' :# different cases if 1 aromatic cycle or 2 | ||
1387 | - atom_res=pos_b2(res) | ||
1388 | - if pair.get_resname()=='A' or pair.get_resname()=='G' : | ||
1389 | - atom_pair=pos_b2(pair) | ||
1390 | - if pair.get_resname()=='C' or pair.get_resname()=='U' : | ||
1391 | - atom_pair=pos_b1(pair) | ||
1392 | - | ||
1393 | - if res.get_resname()=='C' or res.get_resname()=='U' : | ||
1394 | - atom_res=pos_b1(res) | ||
1395 | - if pair.get_resname()=='A' or pair.get_resname()=='G' : | ||
1396 | - atom_pair=pos_b2(pair) | ||
1397 | - if pair.get_resname()=='C' or pair.get_resname()=='U' : | ||
1398 | - atom_pair=pos_b1(pair) | ||
1399 | - | ||
1400 | - dist = get_euclidian_distance(atom_res, atom_pair) | ||
1401 | - | ||
1402 | - return dist | ||
1403 | - | ||
1404 | -def basepair_flat_angle(res, pair): | ||
1405 | """ | 1384 | """ |
1406 | - measurement of the plane angles formed by the vectors C1->B1 of the paired nucleotides | 1385 | + measurement of the flat angles describing a basepair in the HiRE-RNA model |
1407 | """ | 1386 | """ |
1408 | if res.get_resname()=='C' or res.get_resname()=='U' : | 1387 | if res.get_resname()=='C' or res.get_resname()=='U' : |
1409 | atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | 1388 | atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] |
1410 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | 1389 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] |
1411 | atom_b1_res = pos_b1(res) | 1390 | atom_b1_res = pos_b1(res) |
1412 | - a1_res = Vector(atom_c4_res[0]) | 1391 | + if not len(atom_c4_res) or not len(atom_c1p_res) or not len(atom_b1_res): |
1392 | + return | ||
1393 | + a3_res = Vector(atom_c4_res[0]) | ||
1413 | a2_res = Vector(atom_c1p_res[0]) | 1394 | a2_res = Vector(atom_c1p_res[0]) |
1414 | - a3_res = Vector(atom_b1_res[0]) | 1395 | + a1_res = Vector(atom_b1_res[0]) |
1415 | if res.get_resname()=='A' or res.get_resname()=='G' : | 1396 | if res.get_resname()=='A' or res.get_resname()=='G' : |
1416 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | 1397 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] |
1417 | atom_b1_res = pos_b1(res) | 1398 | atom_b1_res = pos_b1(res) |
1418 | atom_b2_res = pos_b2(res) | 1399 | atom_b2_res = pos_b2(res) |
1419 | - a1_res = Vector(atom_c1p_res[0]) | 1400 | + if not len(atom_c1p_res) or not len(atom_b1_res) or not len(atom_b2_res): |
1401 | + return | ||
1402 | + a3_res = Vector(atom_c1p_res[0]) | ||
1420 | a2_res = Vector(atom_b1_res[0]) | 1403 | a2_res = Vector(atom_b1_res[0]) |
1421 | - a3_res = Vector(atom_b2_res[0]) | 1404 | + a1_res = Vector(atom_b2_res[0]) |
1422 | 1405 | ||
1423 | if pair.get_resname()=='C' or pair.get_resname()=='U' : | 1406 | if pair.get_resname()=='C' or pair.get_resname()=='U' : |
1424 | atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] | 1407 | atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] |
1425 | atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] | 1408 | atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] |
1426 | atom_b1_pair = pos_b1(pair) | 1409 | atom_b1_pair = pos_b1(pair) |
1427 | - a1_pair = Vector(atom_c4_pair[0]) | 1410 | + if not len(atom_c4_pair) or not len(atom_c1p_pair) or not len(atom_b1_pair): |
1411 | + return | ||
1412 | + a3_pair = Vector(atom_c4_pair[0]) | ||
1428 | a2_pair = Vector(atom_c1p_pair[0]) | 1413 | a2_pair = Vector(atom_c1p_pair[0]) |
1429 | - a3_pair = Vector(atom_b1_pair) | 1414 | + a1_pair = Vector(atom_b1_pair[0]) |
1430 | if pair.get_resname()=='A' or pair.get_resname()=='G' : | 1415 | if pair.get_resname()=='A' or pair.get_resname()=='G' : |
1431 | atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] | 1416 | atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] |
1432 | atom_b1_pair = pos_b1(pair) | 1417 | atom_b1_pair = pos_b1(pair) |
1433 | atom_b2_pair = pos_b2(pair) | 1418 | atom_b2_pair = pos_b2(pair) |
1434 | - a1_pair = Vector(atom_c1p_pair[0]) | 1419 | + if not len(atom_c1p_pair) or not len(atom_b1_pair) or not len(atom_b2_pair): # No C1' atom in the paired nucleotide, skip measures. |
1420 | + return | ||
1421 | + a3_pair = Vector(atom_c1p_pair[0]) | ||
1435 | a2_pair = Vector(atom_b1_pair[0]) | 1422 | a2_pair = Vector(atom_b1_pair[0]) |
1436 | - a3_pair = Vector(atom_b2_pair[0]) | 1423 | + a1_pair = Vector(atom_b2_pair[0]) |
1437 | 1424 | ||
1438 | - # we calculate the 4 plane angles including these vectors | 1425 | + # Bond vectors |
1426 | + res_32 = a3_res - a2_res | ||
1427 | + res_12 = a1_res - a2_res | ||
1428 | + pair_32 = a3_pair - a2_pair | ||
1429 | + pair_12 = a1_pair - a2_pair | ||
1430 | + rho = a1_res - a1_pair # from pair to res | ||
1431 | + | ||
1432 | + # dist | ||
1433 | + dist = rho.norm() | ||
1434 | + | ||
1435 | + # we calculate the 2 plane angles | ||
1436 | + with warnings.catch_warnings(): | ||
1437 | + warnings.simplefilter('ignore', RuntimeWarning) | ||
1438 | + b = res_12.angle(rho)*(180/np.pi) # equal to the previous implementation | ||
1439 | + c = pair_12.angle(-rho)*(180/np.pi) # | ||
1440 | + # a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) # not required | ||
1441 | + # b = calc_angle(a2_res, a1_res, a1_pair)*(180/np.pi) | ||
1442 | + # c = calc_angle(a1_res, a1_pair, a2_pair)*(180/np.pi) | ||
1443 | + # d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) # not required | ||
1444 | + | ||
1445 | + # Compute plane vectors | ||
1446 | + n1 = (res_32**res_12).normalized() # ** between vectors, is the cross product | ||
1447 | + n2 = (pair_32**pair_12).normalized() | ||
1448 | + | ||
1449 | + # Distances between base tip and the other base's plane (orthogonal projection) | ||
1450 | + # if angle(rho, n) > pi/2 the distance is negative (signed following n) | ||
1451 | + d1 = rho*n1 # projection of rho on axis n1 | ||
1452 | + d2 = rho*n2 | ||
1453 | + | ||
1454 | + # Now the projection of rho in the planes. It's just a sum of the triangles' two other edges. | ||
1455 | + p1 = (-rho+n1**d1).normalized() # between vector and scalar, ** is the multiplication by a scalar | ||
1456 | + p2 = (rho-n2**d2).normalized() | ||
1457 | + | ||
1458 | + # Measure tau, the dihedral | ||
1459 | + u = (res_12**rho).normalized() | ||
1460 | + v = (rho**pair_12).normalized() | ||
1461 | + cosTau1 = n1*u | ||
1462 | + cosTau2 = v*n2 | ||
1439 | 1463 | ||
1440 | - a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) | 1464 | + # cosTau is enough to compute alpha, but we can't distinguish |
1441 | - b = calc_angle(a2_res, a3_res, a3_pair)*(180/np.pi) | 1465 | + # yet betwwen tau and -tau. If the full computation if required, then: |
1442 | - c = calc_angle(a3_res, a3_pair, a2_pair)*(180/np.pi) | 1466 | + tau1 = np.arccos(cosTau1)*(180/np.pi) |
1443 | - d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) | 1467 | + tau2 = np.arccos(cosTau2)*(180/np.pi) |
1444 | - angles = [a, b, c, d] | 1468 | + w1 = u**n1 |
1445 | - return angles | 1469 | + w2 = v**n2 |
1470 | + if res_12*w1 < 0: | ||
1471 | + tau1 = -tau1 | ||
1472 | + if pair_12*w2 < 0: | ||
1473 | + tau2 = -tau2 | ||
1474 | + | ||
1475 | + # And finally, the a1 and a2 angles between res_12 and p1 / pair_12 and p2 | ||
1476 | + with warnings.catch_warnings(): | ||
1477 | + warnings.simplefilter('ignore', RuntimeWarning) | ||
1478 | + a1 = (-res_12).angle(p1)*(180/np.pi) | ||
1479 | + a2 = (-pair_12).angle(p2)*(180/np.pi) | ||
1480 | + if cosTau1 > 0: | ||
1481 | + # CosTau > 0 (Tau < 90 or Tau > 270) implies that alpha > 180. | ||
1482 | + a1 = -a1 | ||
1483 | + if cosTau2 > 0: | ||
1484 | + a2 = -a2 | ||
1485 | + | ||
1486 | + return [dist, b, c, d1, d2, a1, a2, tau1, tau2] | ||
1446 | 1487 | ||
1447 | @trace_unhandled_exceptions | 1488 | @trace_unhandled_exceptions |
1448 | def measure_from_structure(f): | 1489 | def measure_from_structure(f): |
... | @@ -1482,8 +1523,8 @@ def measures_wadley(name, s, thr_idx): | ... | @@ -1482,8 +1523,8 @@ def measures_wadley(name, s, thr_idx): |
1482 | """ | 1523 | """ |
1483 | 1524 | ||
1484 | # do not recompute something already computed | 1525 | # do not recompute something already computed |
1485 | - if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley ' + name + '.csv') and | 1526 | + if (path.isfile(runDir + '/results/geometry/Pyle/angles/flat_angles_pyle_' + name + '.csv') and |
1486 | - path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")): | 1527 | + path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")): |
1487 | return | 1528 | return |
1488 | 1529 | ||
1489 | liste_dist = [] | 1530 | liste_dist = [] |
... | @@ -1522,9 +1563,9 @@ def measures_wadley(name, s, thr_idx): | ... | @@ -1522,9 +1563,9 @@ def measures_wadley(name, s, thr_idx): |
1522 | liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv]) | 1563 | liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv]) |
1523 | 1564 | ||
1524 | df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"]) | 1565 | df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"]) |
1525 | - df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv") | 1566 | + df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv") |
1526 | df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"]) | 1567 | df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"]) |
1527 | - df.to_csv(runDir + "/results/geometry/Pyle/angles/angles_plans_wadley "+name+".csv") | 1568 | + df.to_csv(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle_"+name+".csv") |
1528 | 1569 | ||
1529 | @trace_unhandled_exceptions | 1570 | @trace_unhandled_exceptions |
1530 | def measures_aa(name, s, thr_idx): | 1571 | def measures_aa(name, s, thr_idx): |
... | @@ -1533,7 +1574,7 @@ def measures_aa(name, s, thr_idx): | ... | @@ -1533,7 +1574,7 @@ def measures_aa(name, s, thr_idx): |
1533 | """ | 1574 | """ |
1534 | 1575 | ||
1535 | # do not recompute something already computed | 1576 | # do not recompute something already computed |
1536 | - if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms "+name+".csv"): | 1577 | + if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms_"+name+".csv"): |
1537 | return | 1578 | return |
1538 | 1579 | ||
1539 | last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide | 1580 | last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide |
... | @@ -1685,7 +1726,7 @@ def measures_aa(name, s, thr_idx): | ... | @@ -1685,7 +1726,7 @@ def measures_aa(name, s, thr_idx): |
1685 | df=pd.concat([df_comm, df_pur, df_pyr], axis = 1) | 1726 | df=pd.concat([df_comm, df_pur, df_pyr], axis = 1) |
1686 | pbar.close() | 1727 | pbar.close() |
1687 | 1728 | ||
1688 | - df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms " + name + ".csv") | 1729 | + df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms_" + name + ".csv") |
1689 | 1730 | ||
1690 | @trace_unhandled_exceptions | 1731 | @trace_unhandled_exceptions |
1691 | def measures_hrna(name, s, thr_idx): | 1732 | def measures_hrna(name, s, thr_idx): |
... | @@ -1805,94 +1846,92 @@ def measures_hrna_basepairs(name, s, thr_idx): | ... | @@ -1805,94 +1846,92 @@ def measures_hrna_basepairs(name, s, thr_idx): |
1805 | chain = next(s[0].get_chains()) | 1846 | chain = next(s[0].get_chains()) |
1806 | 1847 | ||
1807 | # do not recompute something already computed | 1848 | # do not recompute something already computed |
1808 | - if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs "+name+".csv"): | 1849 | + if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs_"+name+".csv"): |
1809 | return | 1850 | return |
1810 | 1851 | ||
1811 | df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name)) | 1852 | df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name)) |
1812 | 1853 | ||
1813 | - if df['index_chain'][0]==1:#ignore files with numbering errors | 1854 | + if df['index_chain'][0] == 1: # ignore files with numbering errors : TODO : remove when we get DSSR Pro, there should not be numbering errors anymore |
1814 | - l = measures_hrna_basepairs_chain(chain, df, thr_idx) | 1855 | + l = measures_hrna_basepairs_chain(name, chain, df, thr_idx) |
1815 | - | 1856 | + df_calc = pd.DataFrame(l, columns=["type_LW", "nt1_idx", "nt1_res", "nt2_idx", "nt2_res", "Distance", |
1816 | - df_calc=pd.DataFrame(l, columns=["Chaine", "type LW", "Resseq", "Num paired", "Distance", "C4'-C1'-B1", "C1'-B1-B1pair", "B1-B1pair-C1'pair", "B1pair-C1'pair-C4'pair"]) | 1857 | + "211_angle", "112_angle", "dB1", "dB2", "alpha1", "alpha2", "3211_torsion", "1123_torsion"]) |
1817 | - df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs '+name+'.csv') | 1858 | + df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs_' + name + '.csv', float_format="%.3f") |
1818 | - | ||
1819 | 1859 | ||
1820 | @trace_unhandled_exceptions | 1860 | @trace_unhandled_exceptions |
1821 | -def measures_hrna_basepairs_chain(chain, df, thr_idx): | 1861 | +def measures_hrna_basepairs_chain(name, chain, df, thr_idx): |
1822 | """ | 1862 | """ |
1823 | Cleanup of the dataset | 1863 | Cleanup of the dataset |
1824 | measurements of distances and angles between paired nucleotides in the chain | 1864 | measurements of distances and angles between paired nucleotides in the chain |
1825 | """ | 1865 | """ |
1826 | 1866 | ||
1827 | - liste_dist=[] | 1867 | + results = [] |
1828 | warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) | 1868 | warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) |
1829 | 1869 | ||
1830 | pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep | 1870 | pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep |
1831 | - for i in range(pairs.shape[0]): #we remove the lines where no pairing (NaN in paired) | 1871 | + for i in range(pairs.shape[0]): # we remove the lines where no pairing (NaN in paired) |
1832 | - index_with_nan=pairs.index[pairs.iloc[:,2].isnull()] | 1872 | + index_with_nan = pairs.index[pairs.iloc[:,2].isnull()] |
1833 | pairs.drop(index_with_nan, 0, inplace=True) | 1873 | pairs.drop(index_with_nan, 0, inplace=True) |
1834 | 1874 | ||
1835 | - paired_int=[] | 1875 | + paired_int = [] |
1836 | - for i in pairs.index:# convert values from paired to integers or lists of integers | 1876 | + for i in pairs.index: # convert values from paired to integers or lists of integers |
1837 | - paired=pairs.at[i, 'paired'] | 1877 | + paired = pairs.at[i, 'paired'] |
1838 | if type(paired) is np.int64 or type(paired) is np.float64: | 1878 | if type(paired) is np.int64 or type(paired) is np.float64: |
1839 | paired_int.append(int(paired)) | 1879 | paired_int.append(int(paired)) |
1840 | else : #strings | 1880 | else : #strings |
1841 | - if len(paired)<3 : #a single pairing | 1881 | + if len(paired) < 3: # a single pairing |
1842 | paired_int.append(int(paired)) | 1882 | paired_int.append(int(paired)) |
1843 | - else : #several pairings | 1883 | + else : # several pairings |
1844 | - paired=paired.split(',') | 1884 | + paired = paired.split(',') |
1845 | - l=[int(i) for i in paired] | 1885 | + l = [ int(i) for i in paired ] |
1846 | paired_int.append(l) | 1886 | paired_int.append(l) |
1847 | 1887 | ||
1848 | - pair_type_LW_bis=[] | 1888 | + pair_type_LW_bis = [] |
1849 | for j in pairs.index: | 1889 | for j in pairs.index: |
1850 | pair_type_LW = pairs.at[j, 'pair_type_LW'] | 1890 | pair_type_LW = pairs.at[j, 'pair_type_LW'] |
1851 | - if len(pair_type_LW)<4 : #a single pairing | 1891 | + if len(pair_type_LW) < 4 : # a single pairing |
1852 | pair_type_LW_bis.append(pair_type_LW) | 1892 | pair_type_LW_bis.append(pair_type_LW) |
1853 | - else : #several pairings | 1893 | + else : # several pairings |
1854 | - pair_type_LW=pair_type_LW.split(',') | 1894 | + pair_type_LW = pair_type_LW.split(',') |
1855 | - l=[i for i in pair_type_LW] | 1895 | + l = [ i for i in pair_type_LW ] |
1856 | pair_type_LW_bis.append(pair_type_LW) | 1896 | pair_type_LW_bis.append(pair_type_LW) |
1857 | 1897 | ||
1858 | - #addition of these new columns | 1898 | + # addition of these new columns |
1859 | pairs.insert(4, "paired_int", paired_int, True) | 1899 | pairs.insert(4, "paired_int", paired_int, True) |
1860 | pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True) | 1900 | pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True) |
1861 | 1901 | ||
1862 | - indexNames=pairs[pairs['paired_int'] == 0].index | 1902 | + indexNames = pairs[pairs['paired_int'] == 0].index |
1863 | - pairs.drop(indexNames, inplace=True)#deletion of lines with a 0 in paired_int (matching to another RNA chain) | 1903 | + pairs.drop(indexNames, inplace=True) # deletion of lines with a 0 in paired_int (matching to another RNA chain) |
1864 | - | 1904 | + |
1865 | - for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {chain} measures_hrna_basepairs_chain", unit="res", leave=False): | 1905 | + for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna_basepairs_chain", unit="res", leave=False): |
1866 | - """ | 1906 | + # calculations for each row of the pairs dataset |
1867 | - calculations for each row of the pairs dataset | 1907 | + index = pairs.at[i, 'index_chain'] |
1868 | - """ | 1908 | + res1 = chain[(' ', index, ' ')].get_resname() |
1869 | - index=pairs.at[i, 'index_chain'] | 1909 | + if res1 not in ['A','C','G','U']: |
1870 | - type_LW=pairs.at[i, 'pair_type_LW_bis'] #pairing type | 1910 | + continue |
1871 | - num_paired=pairs.at[i, 'paired_int'] #number (index_chain) of the paired nucleotide | 1911 | + type_LW = pairs.at[i, 'pair_type_LW_bis'] # pairing type |
1912 | + num_paired = pairs.at[i, 'paired_int'] # number (index_chain) of the paired nucleotide | ||
1872 | 1913 | ||
1873 | if type(num_paired) is int or type(num_paired) is np.int64: | 1914 | if type(num_paired) is int or type(num_paired) is np.int64: |
1874 | - try : | 1915 | + res2 = chain[(' ', num_paired, ' ')].get_resname() |
1875 | - d = basepair_apex_distance(chain[(' ',index, ' ')], chain[(' ', num_paired, ' ')]) | 1916 | + if res2 not in ["A","C","G","U"]: |
1876 | - angle = basepair_flat_angle(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')]) | 1917 | + continue |
1877 | - if d != 0.0: | 1918 | + measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')]) |
1878 | - liste_dist.append([chain, type_LW, index, num_paired, d, angle[0], angle[1], angle[2], angle[3]]) | 1919 | + if measures is not None: |
1879 | - except : | 1920 | + results.append([type_LW, index, res1, num_paired, res2] + measures) |
1880 | - pass | 1921 | + else: |
1881 | - else : | 1922 | + for j in range(len(num_paired)): # if several pairings, process them one by one |
1882 | - for j in range(len(num_paired)): #if several pairings, process them one by one | 1923 | + if num_paired[j] != 0: |
1883 | - if num_paired[j] != 0 : | 1924 | + res2 = chain[(' ', num_paired[j], ' ')].get_resname() |
1884 | - try : | 1925 | + if res2 not in ["A","C","G","U"]: |
1885 | - d = basepair_apex_distance(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) | 1926 | + continue |
1886 | - angle = basepair_flat_angle(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) | 1927 | + measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) |
1887 | - if d != 0.0: | 1928 | + if measures is not None: |
1888 | - liste_dist.append([chain, type_LW[j], index, num_paired[j], d, angle[0], angle[1], angle[2], angle[3]]) | 1929 | + results.append([type_LW[j], index, res1, num_paired[j], res2] + measures) |
1889 | - except: | 1930 | + |
1890 | - pass | 1931 | + return results |
1891 | - | ||
1892 | - return(liste_dist) | ||
1893 | 1932 | ||
1894 | @trace_unhandled_exceptions | 1933 | @trace_unhandled_exceptions |
1895 | -def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=True) : | 1934 | +def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) : |
1896 | """ | 1935 | """ |
1897 | Plot Gaussian-Mixture-Model (with or without histograms) | 1936 | Plot Gaussian-Mixture-Model (with or without histograms) |
1898 | """ | 1937 | """ |
... | @@ -1906,8 +1945,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -1906,8 +1945,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
1906 | 1945 | ||
1907 | # chooses the number of components based on the maximum likelihood value (maxlogv) | 1946 | # chooses the number of components based on the maximum likelihood value (maxlogv) |
1908 | n_components_range = np.arange(8)+1 | 1947 | n_components_range = np.arange(8)+1 |
1909 | - aic = [] | 1948 | + # aic = [] |
1910 | - bic = [] | 1949 | + # bic = [] |
1911 | maxlogv=[] | 1950 | maxlogv=[] |
1912 | md = np.array(data).reshape(-1,1) | 1951 | md = np.array(data).reshape(-1,1) |
1913 | nb_components = 1 | 1952 | nb_components = 1 |
... | @@ -1915,8 +1954,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -1915,8 +1954,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
1915 | log_max = 0 | 1954 | log_max = 0 |
1916 | for n_comp in n_components_range: | 1955 | for n_comp in n_components_range: |
1917 | gmm = GaussianMixture(n_components=n_comp).fit(md) | 1956 | gmm = GaussianMixture(n_components=n_comp).fit(md) |
1918 | - aic.append(abs(gmm.aic(md))) | 1957 | + # aic.append(abs(gmm.aic(md))) |
1919 | - bic.append(abs(gmm.bic(md))) | 1958 | + # bic.append(abs(gmm.bic(md))) |
1920 | maxlogv.append(gmm.lower_bound_) | 1959 | maxlogv.append(gmm.lower_bound_) |
1921 | if gmm.lower_bound_== max(maxlogv) : # takes the maximum | 1960 | if gmm.lower_bound_== max(maxlogv) : # takes the maximum |
1922 | nb_components = n_comp | 1961 | nb_components = n_comp |
... | @@ -1962,10 +2001,10 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -1962,10 +2001,10 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
1962 | if hist: | 2001 | if hist: |
1963 | plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True) | 2002 | plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True) |
1964 | if toric: | 2003 | if toric: |
1965 | - plt.xlabel("Angle (Degré)") | 2004 | + plt.xlabel("Angle (Degrees)") |
1966 | else: | 2005 | else: |
1967 | - plt.xlabel("Distance (Angström)") | 2006 | + plt.xlabel("Distance (Angströms)") |
1968 | - plt.ylabel("Densité") | 2007 | + plt.ylabel("Density") |
1969 | 2008 | ||
1970 | # Prepare the GMM curve with some absciss points | 2009 | # Prepare the GMM curve with some absciss points |
1971 | if toric: | 2010 | if toric: |
... | @@ -1985,16 +2024,16 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -1985,16 +2024,16 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
1985 | summary_data["std"] = [] | 2024 | summary_data["std"] = [] |
1986 | 2025 | ||
1987 | # plot | 2026 | # plot |
1988 | - courbes = [] | 2027 | + curves = [] |
1989 | for i in range(nb_components): | 2028 | for i in range(nb_components): |
1990 | 2029 | ||
1991 | # store the parameters | 2030 | # store the parameters |
1992 | mean = means[i] | 2031 | mean = means[i] |
1993 | sigma = np.sqrt(covariances[i]) | 2032 | sigma = np.sqrt(covariances[i]) |
1994 | weight = weights[i] | 2033 | weight = weights[i] |
1995 | - summary_data["means"].append(str(mean)) | 2034 | + summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]")))) |
1996 | - summary_data["std"].append(str(sigma)) | 2035 | + summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]")))) |
1997 | - summary_data["weights"].append(str(weight)) | 2036 | + summary_data["weights"].append("{:.2f}".format(float(str(weight).strip("[]")))) |
1998 | 2037 | ||
1999 | # compute the right x and y data to plot | 2038 | # compute the right x and y data to plot |
2000 | y = weight*st.norm.pdf(x, mean, sigma) | 2039 | y = weight*st.norm.pdf(x, mean, sigma) |
... | @@ -2022,25 +2061,25 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -2022,25 +2061,25 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
2022 | plt.plot(newx, newy, c=colors[i]) | 2061 | plt.plot(newx, newy, c=colors[i]) |
2023 | else: | 2062 | else: |
2024 | # store for later summation | 2063 | # store for later summation |
2025 | - courbes.append(np.array(newy)) | 2064 | + curves.append(np.array(newy)) |
2026 | 2065 | ||
2027 | if hist: | 2066 | if hist: |
2028 | - plt.title("Histogramme " +name_data+ " avec GMM pour " +str(nb_components)+ " composantes (" + str(len(data_ori))+" valeurs)") | 2067 | + plt.title(f"Histogram of {name_data} with GMM of {nb_components} components (" + str(len(data_ori))+" values)") |
2029 | if save: | 2068 | if save: |
2030 | - plt.savefig("Histogramme " +name_data+ " avec GMM pour " +str(nb_components)+ " composantes (" + str(len(data_ori))+" valeurs).png") | 2069 | + plt.savefig(f"Histogram_{name_data}_{nb_components}_comps.png") |
2031 | plt.close() | 2070 | plt.close() |
2032 | else: | 2071 | else: |
2033 | # Plot their sum, do not save figure yet | 2072 | # Plot their sum, do not save figure yet |
2034 | try: | 2073 | try: |
2035 | - plt.plot(newx, sum(courbes), c=couleur, label=name_data) | 2074 | + plt.plot(newx, sum(curves), c=col, label=name_data) |
2036 | except TypeError: | 2075 | except TypeError: |
2037 | - print("N curves:", len(courbes)) | 2076 | + print("N curves:", len(curves)) |
2038 | - for c in courbes: | 2077 | + for c in curves: |
2039 | print(c) | 2078 | print(c) |
2040 | plt.legend() | 2079 | plt.legend() |
2041 | 2080 | ||
2042 | # Save the json | 2081 | # Save the json |
2043 | - with open(runDir + "/results/geometry/json/" +name_data + " .json", 'w', encoding='utf-8') as f: | 2082 | + with open(runDir + "/results/geometry/json/" +name_data + ".json", 'w', encoding='utf-8') as f: |
2044 | json.dump(summary_data, f, indent=4) | 2083 | json.dump(summary_data, f, indent=4) |
2045 | 2084 | ||
2046 | @trace_unhandled_exceptions | 2085 | @trace_unhandled_exceptions |
... | @@ -2122,25 +2161,25 @@ def gmm_aa_dists(): | ... | @@ -2122,25 +2161,25 @@ def gmm_aa_dists(): |
2122 | GMM_histo(c2p_o2p, "C2'-O2'") | 2161 | GMM_histo(c2p_o2p, "C2'-O2'") |
2123 | 2162 | ||
2124 | if len(op3_p) > 0 : | 2163 | if len(op3_p) > 0 : |
2125 | - GMM_histo(op3_p, "OP3-P", toric=False, hist=False, couleur= 'lightcoral') | 2164 | + GMM_histo(op3_p, "OP3-P", toric=False, hist=False, col= 'lightcoral') |
2126 | - GMM_histo(p_op1, "P-OP1", toric=False, hist=False, couleur='gold') | 2165 | + GMM_histo(p_op1, "P-OP1", toric=False, hist=False, col='gold') |
2127 | - GMM_histo(p_op2, "P-OP2", toric=False, hist=False, couleur='lightseagreen') | 2166 | + GMM_histo(p_op2, "P-OP2", toric=False, hist=False, col='lightseagreen') |
2128 | - GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, couleur='saddlebrown') | 2167 | + GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, col='saddlebrown') |
2129 | - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, couleur='darkturquoise') | 2168 | + GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkturquoise') |
2130 | - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, couleur='darkkhaki') | 2169 | + GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='darkkhaki') |
2131 | - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, couleur='indigo') | 2170 | + GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='indigo') |
2132 | - GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, couleur='maroon') | 2171 | + GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, col='maroon') |
2133 | - GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, couleur='burlywood') | 2172 | + GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, col='burlywood') |
2134 | - GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, couleur='steelblue') | 2173 | + GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, col='steelblue') |
2135 | - GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, couleur='tomato') | 2174 | + GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, col='tomato') |
2136 | - GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, couleur='darkolivegreen') | 2175 | + GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, col='darkolivegreen') |
2137 | - GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, couleur='orchid') | 2176 | + GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, col='orchid') |
2138 | - GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, couleur='deeppink') | 2177 | + GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, col='deeppink') |
2139 | axes=plt.gca() | 2178 | axes=plt.gca() |
2140 | axes.set_ylim(0, 100) | 2179 | axes.set_ylim(0, 100) |
2141 | - plt.xlabel("Distance (Angström)") | 2180 | + plt.xlabel("Distance (Angströms)") |
2142 | - plt.title("GMM des distances entre atomes communs ") | 2181 | + plt.title("GMM of distances between common atoms ") |
2143 | - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM des distances entre atomes communs .png") | 2182 | + plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM_distances_common_atoms.png") |
2144 | plt.close() | 2183 | plt.close() |
2145 | 2184 | ||
2146 | os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True) | 2185 | os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True) |
... | @@ -2161,25 +2200,25 @@ def gmm_aa_dists(): | ... | @@ -2161,25 +2200,25 @@ def gmm_aa_dists(): |
2161 | GMM_histo(c4_n9, "C4-N9") | 2200 | GMM_histo(c4_n9, "C4-N9") |
2162 | GMM_histo(c4_c5, "C4-C5") | 2201 | GMM_histo(c4_c5, "C4-C5") |
2163 | 2202 | ||
2164 | - GMM_histo(c1p_n9, "C1'-N9", hist=False, couleur='lightcoral') | 2203 | + GMM_histo(c1p_n9, "C1'-N9", hist=False, col='lightcoral') |
2165 | - GMM_histo(n9_c8, "N9-C8", hist=False, couleur='gold') | 2204 | + GMM_histo(n9_c8, "N9-C8", hist=False, col='gold') |
2166 | - GMM_histo(c8_n7, "C8-N7", hist=False, couleur='lightseagreen') | 2205 | + GMM_histo(c8_n7, "C8-N7", hist=False, col='lightseagreen') |
2167 | - GMM_histo(n7_c5, "N7-C5", hist=False, couleur='saddlebrown') | 2206 | + GMM_histo(n7_c5, "N7-C5", hist=False, col='saddlebrown') |
2168 | - GMM_histo(c5_c6, "C5-C6", hist=False, couleur='darkturquoise') | 2207 | + GMM_histo(c5_c6, "C5-C6", hist=False, col='darkturquoise') |
2169 | - GMM_histo(c6_o6, "C6-O6", hist=False, couleur='darkkhaki') | 2208 | + GMM_histo(c6_o6, "C6-O6", hist=False, col='darkkhaki') |
2170 | - GMM_histo(c6_n6, "C6-N6", hist=False, couleur='indigo') | 2209 | + GMM_histo(c6_n6, "C6-N6", hist=False, col='indigo') |
2171 | - GMM_histo(c6_n1, "C6-N1", hist=False, couleur='maroon') | 2210 | + GMM_histo(c6_n1, "C6-N1", hist=False, col='maroon') |
2172 | - GMM_histo(n1_c2, "N1-C2", hist=False, couleur='burlywood') | 2211 | + GMM_histo(n1_c2, "N1-C2", hist=False, col='burlywood') |
2173 | - GMM_histo(c2_n2, "C2-N2", hist=False, couleur='steelblue') | 2212 | + GMM_histo(c2_n2, "C2-N2", hist=False, col='steelblue') |
2174 | - GMM_histo(c2_n3, "C2-N3", hist=False, couleur='tomato') | 2213 | + GMM_histo(c2_n3, "C2-N3", hist=False, col='tomato') |
2175 | - GMM_histo(n3_c4, "N3-C4", hist=False, couleur='darkolivegreen') | 2214 | + GMM_histo(n3_c4, "N3-C4", hist=False, col='darkolivegreen') |
2176 | - GMM_histo(c4_n9, "C4-N9", hist=False, couleur='orchid') | 2215 | + GMM_histo(c4_n9, "C4-N9", hist=False, col='orchid') |
2177 | - GMM_histo(c4_c5, "C4-C5", hist=False, couleur='deeppink') | 2216 | + GMM_histo(c4_c5, "C4-C5", hist=False, col='deeppink') |
2178 | axes=plt.gca() | 2217 | axes=plt.gca() |
2179 | axes.set_ylim(0, 100) | 2218 | axes.set_ylim(0, 100) |
2180 | - plt.xlabel("Distance (Angström)") | 2219 | + plt.xlabel("Distance (Angströms)") |
2181 | - plt.title("GMM des distances entre atomes des cycles purines", fontsize=10) | 2220 | + plt.title("GMM of distances between atoms of the purine cycles", fontsize=10) |
2182 | - plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM des distances entre atomes des cycles purines.png") | 2221 | + plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM_distances_purine_cycles.png") |
2183 | plt.close() | 2222 | plt.close() |
2184 | 2223 | ||
2185 | os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True) | 2224 | os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True) |
... | @@ -2197,22 +2236,22 @@ def gmm_aa_dists(): | ... | @@ -2197,22 +2236,22 @@ def gmm_aa_dists(): |
2197 | GMM_histo(c4_n4, "C4-N4") | 2236 | GMM_histo(c4_n4, "C4-N4") |
2198 | GMM_histo(c4_o4, "C4-O4") | 2237 | GMM_histo(c4_o4, "C4-O4") |
2199 | 2238 | ||
2200 | - GMM_histo(c1p_n1, "C1'-N1", hist=False, couleur='lightcoral') | 2239 | + GMM_histo(c1p_n1, "C1'-N1", hist=False, col='lightcoral') |
2201 | - GMM_histo(n1_c6, "N1-C6", hist=False, couleur='gold') | 2240 | + GMM_histo(n1_c6, "N1-C6", hist=False, col='gold') |
2202 | - GMM_histo(c6_c5, "C6-C5", hist=False, couleur='lightseagreen') | 2241 | + GMM_histo(c6_c5, "C6-C5", hist=False, col='lightseagreen') |
2203 | - GMM_histo(c5_c4, "C5-C4", hist=False, couleur='deeppink') | 2242 | + GMM_histo(c5_c4, "C5-C4", hist=False, col='deeppink') |
2204 | - GMM_histo(c4_n3, "C4-N3", hist=False, couleur='red') | 2243 | + GMM_histo(c4_n3, "C4-N3", hist=False, col='red') |
2205 | - GMM_histo(n3_c2, "N3-C2", hist=False, couleur='lime') | 2244 | + GMM_histo(n3_c2, "N3-C2", hist=False, col='lime') |
2206 | - GMM_histo(c2_o2, "C2-O2", hist=False, couleur='indigo') | 2245 | + GMM_histo(c2_o2, "C2-O2", hist=False, col='indigo') |
2207 | - GMM_histo(c2_n1, "C2-N1", hist=False, couleur='maroon') | 2246 | + GMM_histo(c2_n1, "C2-N1", hist=False, col='maroon') |
2208 | - GMM_histo(c4_n4, "C4-N4", hist=False, couleur='burlywood') | 2247 | + GMM_histo(c4_n4, "C4-N4", hist=False, col='burlywood') |
2209 | - GMM_histo(c4_o4, "C4-O4", hist=False, couleur='steelblue') | 2248 | + GMM_histo(c4_o4, "C4-O4", hist=False, col='steelblue') |
2210 | axes=plt.gca() | 2249 | axes=plt.gca() |
2211 | #axes.set_xlim(1, 2) | 2250 | #axes.set_xlim(1, 2) |
2212 | axes.set_ylim(0, 100) | 2251 | axes.set_ylim(0, 100) |
2213 | - plt.xlabel("Distance (Angström)") | 2252 | + plt.xlabel("Distance (Angströms") |
2214 | - plt.title("GMM des distances entre atomes des cycles pyrimidines", fontsize=10) | 2253 | + plt.title("GMM of distances between atoms of the pyrimidine cycles", fontsize=10) |
2215 | - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM des distances entre atomes des cycles pyrimidines.png") | 2254 | + plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM_distances_pyrimidine_cycles.png") |
2216 | plt.close() | 2255 | plt.close() |
2217 | 2256 | ||
2218 | os.chdir(runDir) | 2257 | os.chdir(runDir) |
... | @@ -2268,16 +2307,16 @@ def gmm_aa_torsions(): | ... | @@ -2268,16 +2307,16 @@ def gmm_aa_torsions(): |
2268 | GMM_histo(zeta, "Zeta", toric=True) | 2307 | GMM_histo(zeta, "Zeta", toric=True) |
2269 | GMM_histo(chi, "Xhi", toric=True) | 2308 | GMM_histo(chi, "Xhi", toric=True) |
2270 | 2309 | ||
2271 | - GMM_histo(alpha, "Alpha", toric=True, hist=False, couleur='red') | 2310 | + GMM_histo(alpha, "Alpha", toric=True, hist=False, col='red') |
2272 | - GMM_histo(beta, "Beta", toric=True, hist=False, couleur='firebrick') | 2311 | + GMM_histo(beta, "Beta", toric=True, hist=False, col='firebrick') |
2273 | - GMM_histo(gamma, "Gamma", toric=True, hist=False, couleur='limegreen') | 2312 | + GMM_histo(gamma, "Gamma", toric=True, hist=False, col='limegreen') |
2274 | - GMM_histo(delta, "Delta", toric=True, hist=False, couleur='darkslateblue') | 2313 | + GMM_histo(delta, "Delta", toric=True, hist=False, col='darkslateblue') |
2275 | - GMM_histo(epsilon, "Epsilon", toric=True, hist=False, couleur='goldenrod') | 2314 | + GMM_histo(epsilon, "Epsilon", toric=True, hist=False, col='goldenrod') |
2276 | - GMM_histo(zeta, "Zeta", toric=True, hist=False, couleur='teal') | 2315 | + GMM_histo(zeta, "Zeta", toric=True, hist=False, col='teal') |
2277 | - GMM_histo(chi, "Xhi", toric=True, hist=False, couleur='hotpink') | 2316 | + GMM_histo(chi, "Xhi", toric=True, hist=False, col='hotpink') |
2278 | - plt.xlabel("Angle(Degré)") | 2317 | + plt.xlabel("Angle (Degrees)") |
2279 | - plt.title("GMM des angles de torsion") | 2318 | + plt.title("GMM of torsion angles") |
2280 | - plt.savefig("GMM des angles de torsion.png") | 2319 | + plt.savefig("GMM_torsions.png") |
2281 | plt.close() | 2320 | plt.close() |
2282 | 2321 | ||
2283 | os.chdir(runDir) | 2322 | os.chdir(runDir) |
... | @@ -2301,20 +2340,20 @@ def gmm_wadley(): | ... | @@ -2301,20 +2340,20 @@ def gmm_wadley(): |
2301 | 2340 | ||
2302 | GMM_histo(p_c1p, "P-C1'") | 2341 | GMM_histo(p_c1p, "P-C1'") |
2303 | GMM_histo(c1p_p, "C1'-P") | 2342 | GMM_histo(c1p_p, "C1'-P") |
2304 | - GMM_histo(p_c1p, "P-C4'") | 2343 | + GMM_histo(p_c4p, "P-C4'") |
2305 | - GMM_histo(c1p_p, "C4'-P") | 2344 | + GMM_histo(c4p_p, "C4'-P") |
2306 | - | 2345 | + |
2307 | - GMM_histo(p_c1p, "P-C4'", toric=False, hist=False, couleur='gold') | 2346 | + GMM_histo(p_c4p, "P-C4'", toric=False, hist=False, col='gold') |
2308 | - GMM_histo(c1p_p, "C4'-P", toric=False, hist=False, couleur='indigo') | 2347 | + GMM_histo(c4p_p, "C4'-P", toric=False, hist=False, col='indigo') |
2309 | - GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, couleur='firebrick') | 2348 | + GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, col='firebrick') |
2310 | - GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, couleur='seagreen') | 2349 | + GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, col='seagreen') |
2311 | - plt.xlabel("Distance(Angström)") | 2350 | + plt.xlabel("Distance (Angströms)") |
2312 | - plt.title("GMM des distances (Pyle model)") | 2351 | + plt.title("GMM of distances (Pyle model)") |
2313 | - plt.savefig("GMM des distances (Pyle model).png") | 2352 | + plt.savefig("GMM_distances_pyle_model.png") |
2314 | plt.close() | 2353 | plt.close() |
2315 | 2354 | ||
2316 | # Flat Angles | 2355 | # Flat Angles |
2317 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/angles_plans_wadley.csv")) | 2356 | + df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle.csv")) |
2318 | 2357 | ||
2319 | p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])]) | 2358 | p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])]) |
2320 | c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])]) | 2359 | c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])]) |
... | @@ -2326,11 +2365,11 @@ def gmm_wadley(): | ... | @@ -2326,11 +2365,11 @@ def gmm_wadley(): |
2326 | GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True) | 2365 | GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True) |
2327 | GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True) | 2366 | GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True) |
2328 | 2367 | ||
2329 | - GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, couleur='firebrick') | 2368 | + GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, col='firebrick') |
2330 | - GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, couleur='seagreen') | 2369 | + GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, col='seagreen') |
2331 | - plt.xlabel("Angle(Degré)") | 2370 | + plt.xlabel("Angle (Degrees)") |
2332 | - plt.title("GMM des angles plans (Pyle model)") | 2371 | + plt.title("GMM of flat angles (Pyle model)") |
2333 | - plt.savefig("GMM des angles plans (Pyle model).png") | 2372 | + plt.savefig("GMM_flat_angles_pyle_model.png") |
2334 | plt.close() | 2373 | plt.close() |
2335 | 2374 | ||
2336 | # Torsion anfles | 2375 | # Torsion anfles |
... | @@ -2367,15 +2406,15 @@ def gmm_wadley(): | ... | @@ -2367,15 +2406,15 @@ def gmm_wadley(): |
2367 | GMM_histo(eta_base, "Eta''", toric=True) | 2406 | GMM_histo(eta_base, "Eta''", toric=True) |
2368 | GMM_histo(theta_base, "Theta''", toric=True) | 2407 | GMM_histo(theta_base, "Theta''", toric=True) |
2369 | 2408 | ||
2370 | - GMM_histo(eta, "Eta", toric=True, hist=False, couleur='mediumaquamarine') | 2409 | + GMM_histo(eta, "Eta", toric=True, hist=False, col='mediumaquamarine') |
2371 | - GMM_histo(theta, "Theta", toric=True, hist=False, couleur='darkorchid') | 2410 | + GMM_histo(theta, "Theta", toric=True, hist=False, col='darkorchid') |
2372 | - GMM_histo(eta_prime, "Eta'", toric=True, hist=False, couleur='cyan') | 2411 | + GMM_histo(eta_prime, "Eta'", toric=True, hist=False, col='cyan') |
2373 | - GMM_histo(theta_prime, "Theta'", toric=True, hist=False, couleur='crimson') | 2412 | + GMM_histo(theta_prime, "Theta'", toric=True, hist=False, col='crimson') |
2374 | - GMM_histo(eta_base, "Eta''", toric=True, hist=False, couleur='royalblue') | 2413 | + GMM_histo(eta_base, "Eta''", toric=True, hist=False, col='royalblue') |
2375 | - GMM_histo(theta_base, "Theta''", toric=True, hist=False, couleur='palevioletred') | 2414 | + GMM_histo(theta_base, "Theta''", toric=True, hist=False, col='palevioletred') |
2376 | - plt.xlabel("Angle(Degré)") | 2415 | + plt.xlabel("Angle (Degrees)") |
2377 | - plt.title("GMM des angles de pseudotorsion") | 2416 | + plt.title("GMM of pseudo-torsion angles (Pyle Model)") |
2378 | - plt.savefig("GMM des angles de pseudotorsion.png") | 2417 | + plt.savefig("GMM_pseudotorsion_angles_pyle_model.png") |
2379 | plt.close() | 2418 | plt.close() |
2380 | 2419 | ||
2381 | os.chdir(runDir) | 2420 | os.chdir(runDir) |
... | @@ -2411,18 +2450,18 @@ def gmm_hrna(): | ... | @@ -2411,18 +2450,18 @@ def gmm_hrna(): |
2411 | GMM_histo(p_o5p, "P-O5'") | 2450 | GMM_histo(p_o5p, "P-O5'") |
2412 | GMM_histo(last_c4p_p, "C4'-P") | 2451 | GMM_histo(last_c4p_p, "C4'-P") |
2413 | 2452 | ||
2414 | - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, couleur='lightcoral') | 2453 | + GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='lightcoral') |
2415 | - GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, couleur='limegreen') | 2454 | + GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, col='limegreen') |
2416 | - GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, couleur='tomato') | 2455 | + GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, col='tomato') |
2417 | - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, couleur='aquamarine') | 2456 | + GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='aquamarine') |
2418 | - GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, couleur='goldenrod') | 2457 | + GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, col='goldenrod') |
2419 | - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, couleur='darkcyan') | 2458 | + GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkcyan') |
2420 | - GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, couleur='deeppink') | 2459 | + GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, col='deeppink') |
2421 | axes = plt.gca() | 2460 | axes = plt.gca() |
2422 | axes.set_ylim(0, 100) | 2461 | axes.set_ylim(0, 100) |
2423 | - plt.xlabel("Distance (Angström)") | 2462 | + plt.xlabel("Distance (Angströms)") |
2424 | - plt.title("GMM des distances entre atomes HiRE-RNA") | 2463 | + plt.title("GMM of distances between HiRE-RNA beads") |
2425 | - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM des distances entre atomes HiRE-RNA.png") | 2464 | + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM_distances_HiRE_RNA.png") |
2426 | plt.close() | 2465 | plt.close() |
2427 | 2466 | ||
2428 | # Angles | 2467 | # Angles |
... | @@ -2449,19 +2488,19 @@ def gmm_hrna(): | ... | @@ -2449,19 +2488,19 @@ def gmm_hrna(): |
2449 | GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True) | 2488 | GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True) |
2450 | GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True) | 2489 | GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True) |
2451 | 2490 | ||
2452 | - GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, couleur='lightcoral') | 2491 | + GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, col='lightcoral') |
2453 | - GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, couleur='limegreen') | 2492 | + GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, col='limegreen') |
2454 | - GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, couleur='tomato') | 2493 | + GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, col='tomato') |
2455 | - GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, couleur='aquamarine') | 2494 | + GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, col='aquamarine') |
2456 | - GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, couleur='goldenrod') | 2495 | + GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, col='goldenrod') |
2457 | - GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, couleur='darkcyan') | 2496 | + GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, col='darkcyan') |
2458 | - GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, couleur='deeppink') | 2497 | + GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, col='deeppink') |
2459 | - GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, couleur='indigo') | 2498 | + GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, col='indigo') |
2460 | axes = plt.gca() | 2499 | axes = plt.gca() |
2461 | axes.set_ylim(0, 100) | 2500 | axes.set_ylim(0, 100) |
2462 | - plt.xlabel("Angle (Degré)") | 2501 | + plt.xlabel("Angle (Degres)") |
2463 | - plt.title("GMM des angles entre atomes HiRE-RNA") | 2502 | + plt.title("GMM of angles between HiRE-RNA beads") |
2464 | - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM des angles entre atomes HiRE-RNA.png") | 2503 | + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM_angles_HiRE_RNA.png") |
2465 | plt.close() | 2504 | plt.close() |
2466 | 2505 | ||
2467 | # Torsions | 2506 | # Torsions |
... | @@ -2488,24 +2527,24 @@ def gmm_hrna(): | ... | @@ -2488,24 +2527,24 @@ def gmm_hrna(): |
2488 | GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True) | 2527 | GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True) |
2489 | GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True) | 2528 | GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True) |
2490 | 2529 | ||
2491 | - GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, couleur='darkred') | 2530 | + GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, col='darkred') |
2492 | - GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, couleur='chocolate') | 2531 | + GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, col='chocolate') |
2493 | - GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, couleur='mediumvioletred') | 2532 | + GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, col='mediumvioletred') |
2494 | - GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, couleur='cadetblue') | 2533 | + GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, col='cadetblue') |
2495 | - GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, couleur='darkkhaki') | 2534 | + GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, col='darkkhaki') |
2496 | - GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, couleur='springgreen') | 2535 | + GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, col='springgreen') |
2497 | - GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, couleur='indigo') | 2536 | + GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, col='indigo') |
2498 | - GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, couleur='gold') | 2537 | + GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, col='gold') |
2499 | - plt.xlabel("Angle(Degré)") | 2538 | + plt.xlabel("Angle (Degrees)") |
2500 | - plt.title("GMM des angles de torsion (hire-RNA)") | 2539 | + plt.title("GMM of torsion angles between HiRE-RNA beads") |
2501 | - plt.savefig("GMM des angles de torsion (hire-RNA).png") | 2540 | + plt.savefig("GMM_torsions_HiRE_RNA.png") |
2502 | plt.close() | 2541 | plt.close() |
2503 | 2542 | ||
2504 | os.chdir(runDir) | 2543 | os.chdir(runDir) |
2505 | setproctitle("GMM (HiRE-RNA) finished") | 2544 | setproctitle("GMM (HiRE-RNA) finished") |
2506 | 2545 | ||
2507 | @trace_unhandled_exceptions | 2546 | @trace_unhandled_exceptions |
2508 | -def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance): | 2547 | +def gmm_hrna_basepair_type(type_LW, ntpair, data): |
2509 | """ | 2548 | """ |
2510 | function to plot the statistical figures you want | 2549 | function to plot the statistical figures you want |
2511 | By type of pairing: | 2550 | By type of pairing: |
... | @@ -2520,196 +2559,116 @@ def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance | ... | @@ -2520,196 +2559,116 @@ def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance |
2520 | plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5) | 2559 | plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5) |
2521 | 2560 | ||
2522 | plt.subplot(2, 1, 1) | 2561 | plt.subplot(2, 1, 1) |
2523 | - | 2562 | + GMM_histo(data["211_angle"], f"{type_LW}_{ntpair}_C1'-B1-B1pair", toric=True, hist=False, col='cyan' ) |
2524 | - if len(angle_1) > 0 : | 2563 | + GMM_histo(data["112_angle"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair", toric=True, hist=False, col='magenta') |
2525 | - GMM_histo(angle_1, "C4'-C1'-B1", toric=True, hist=False, couleur='cyan' ) | 2564 | + GMM_histo(data["3211_torsion"], f"{type_LW}_{ntpair}_C4'-C1'-B1-B1pair", toric=True, hist=False, col='black' ) |
2526 | - if len(angle_2) > 0 : | 2565 | + GMM_histo(data["1123_torsion"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair-C4'pair", toric=True, hist=False, col='maroon') |
2527 | - GMM_histo(angle_2, "C1'-B1-B1pair", toric=True, hist=False, couleur='magenta') | 2566 | + GMM_histo(data["alpha1"], f"{type_LW}_{ntpair}_alpha_1", toric=True, hist=False, col="yellow") |
2528 | - if len(angle_3) > 0 : | 2567 | + GMM_histo(data["alpha2"], f"{type_LW}_{ntpair}_alpha_2", toric=True, hist=False, col='olive') |
2529 | - GMM_histo(angle_3, "B1-B1pair-C1'pair", toric=True, hist=False, couleur="yellow") | 2568 | + plt.xlabel("Angle (degree)") |
2530 | - if len(angle_4) > 0 : | 2569 | + plt.title(f"GMM of plane angles for {type_LW} {ntpair} basepairs", fontsize=10) |
2531 | - GMM_histo(angle_4, "B1pair-C1'pair-C4'pair", toric=True, hist=False, couleur='olive') | ||
2532 | - plt.xlabel("Angle(degré)") | ||
2533 | - plt.title("GMM des angles plans pour les measure_hrna_basepairs " +type_LW , fontsize=10) | ||
2534 | 2570 | ||
2535 | plt.subplot(2, 1, 2) | 2571 | plt.subplot(2, 1, 2) |
2536 | - if len(distance)>0 : | 2572 | + GMM_histo(data["Distance"], f"Distance between {type_LW} {ntpair} tips", toric=False, hist=False, col="cyan") |
2537 | - GMM_histo(distance, "Distance pointes " + type_LW, save=False) | 2573 | + GMM_histo(data["dB1"], f"{type_LW} {ntpair} dB1", toric=False, hist=False, col="tomato") |
2538 | - | 2574 | + GMM_histo(data["dB2"], f"{type_LW} {ntpair} dB2", toric=False, hist=False, col="goldenrod") |
2539 | - plt.savefig("Mesures measure_hrna_basepairs " +type_LW+ ".png" ) | 2575 | + plt.xlabel("Distance (Angströms)") |
2576 | + plt.title(f"GMM of distances for {type_LW} {ntpair} basepairs", fontsize=10) | ||
2577 | + | ||
2578 | + plt.savefig(f"{type_LW}_{ntpair}_basepairs.png" ) | ||
2540 | plt.close() | 2579 | plt.close() |
2541 | - setproctitle(f"GMM (HiRE-RNA {type_LW} basepairs) finished") | 2580 | + setproctitle(f"GMM (HiRE-RNA {type_LW} {ntpair} basepairs) finished") |
2542 | 2581 | ||
2543 | @trace_unhandled_exceptions | 2582 | @trace_unhandled_exceptions |
2544 | def gmm_hrna_basepairs(): | 2583 | def gmm_hrna_basepairs(): |
2545 | 2584 | ||
2546 | setproctitle("GMM (HiRE-RNA basepairs)") | 2585 | setproctitle("GMM (HiRE-RNA basepairs)") |
2547 | 2586 | ||
2548 | - df=pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv")) | 2587 | + df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv")) |
2549 | - | 2588 | + |
2550 | - cWW=df[df['type LW']=='cWW'] | 2589 | + lw = ["cWW", "tWW", "cWH", "tWH", "cHW", "tHW", "cWS", "tWS", "cSW", "tSW", "cHH", "tHH", "cSH", "tSH", "cHS", "tHS", "cSS", "tSS"] |
2551 | - cWW_dist=list(cWW["Distance"]) | ||
2552 | - cWW_angle_1=list(cWW["C4'-C1'-B1"]) | ||
2553 | - cWW_angle_2=list(cWW["C1'-B1-B1pair"]) | ||
2554 | - cWW_angle_3=list(cWW["B1-B1pair-C1'pair"]) | ||
2555 | - cWW_angle_4=list(cWW["B1pair-C1'pair-C4'pair"]) | ||
2556 | - tWW=df[df['type LW']=='tWW'] | ||
2557 | - tWW_dist=list(tWW["Distance"]) | ||
2558 | - tWW_angle_1=list(tWW["C4'-C1'-B1"]) | ||
2559 | - tWW_angle_2=list(tWW["C1'-B1-B1pair"]) | ||
2560 | - tWW_angle_3=list(tWW["B1-B1pair-C1'pair"]) | ||
2561 | - tWW_angle_4=list(tWW["B1pair-C1'pair-C4'pair"]) | ||
2562 | - cWH=df[df['type LW']=='cWH'] | ||
2563 | - cWH_dist=list(cWH["Distance"]) | ||
2564 | - cWH_angle_1=list(cWH["C4'-C1'-B1"]) | ||
2565 | - cWH_angle_2=list(cWH["C1'-B1-B1pair"]) | ||
2566 | - cWH_angle_3=list(cWH["B1-B1pair-C1'pair"]) | ||
2567 | - cWH_angle_4=list(cWH["B1pair-C1'pair-C4'pair"]) | ||
2568 | - tWH=df[df['type LW']=='tWH'] | ||
2569 | - tWH_dist=list(tWH["Distance"]) | ||
2570 | - tWH_angle_1=list(tWH["C4'-C1'-B1"]) | ||
2571 | - tWH_angle_2=list(tWH["C1'-B1-B1pair"]) | ||
2572 | - tWH_angle_3=list(tWH["B1-B1pair-C1'pair"]) | ||
2573 | - tWH_angle_4=list(tWH["B1pair-C1'pair-C4'pair"]) | ||
2574 | - cHW=df[df['type LW']=='cHW'] | ||
2575 | - cHW_dist=list(cHW["Distance"]) | ||
2576 | - cHW_angle_1=list(cHW["C4'-C1'-B1"]) | ||
2577 | - cHW_angle_2=list(cHW["C1'-B1-B1pair"]) | ||
2578 | - cHW_angle_3=list(cHW["B1-B1pair-C1'pair"]) | ||
2579 | - cHW_angle_4=list(cHW["B1pair-C1'pair-C4'pair"]) | ||
2580 | - tHW=df[df['type LW']=='tHW'] | ||
2581 | - tHW_dist=list(tHW["Distance"]) | ||
2582 | - tHW_angle_1=list(tHW["C4'-C1'-B1"]) | ||
2583 | - tHW_angle_2=list(tHW["C1'-B1-B1pair"]) | ||
2584 | - tHW_angle_3=list(tHW["B1-B1pair-C1'pair"]) | ||
2585 | - tHW_angle_4=list(tHW["B1pair-C1'pair-C4'pair"]) | ||
2586 | - cWS=df[df['type LW']=='cWS'] | ||
2587 | - cWS_dist=list(cWS["Distance"]) | ||
2588 | - cWS_angle_1=list(cWS["C4'-C1'-B1"]) | ||
2589 | - cWS_angle_2=list(cWS["C1'-B1-B1pair"]) | ||
2590 | - cWS_angle_3=list(cWS["B1-B1pair-C1'pair"]) | ||
2591 | - cWS_angle_4=list(cWS["B1pair-C1'pair-C4'pair"]) | ||
2592 | - tWS=df[df['type LW']=='tWS'] | ||
2593 | - tWS_dist=list(tWS["Distance"]) | ||
2594 | - tWS_angle_1=list(tWS["C4'-C1'-B1"]) | ||
2595 | - tWS_angle_2=list(tWS["C1'-B1-B1pair"]) | ||
2596 | - tWS_angle_3=list(tWS["B1-B1pair-C1'pair"]) | ||
2597 | - tWS_angle_4=list(tWS["B1pair-C1'pair-C4'pair"]) | ||
2598 | - cSW=df[df['type LW']=='cSW'] | ||
2599 | - cSW_dist=list(cSW["Distance"]) | ||
2600 | - cSW_angle_1=list(cSW["C4'-C1'-B1"]) | ||
2601 | - cSW_angle_2=list(cSW["C1'-B1-B1pair"]) | ||
2602 | - cSW_angle_3=list(cSW["B1-B1pair-C1'pair"]) | ||
2603 | - cSW_angle_4=list(cSW["B1pair-C1'pair-C4'pair"]) | ||
2604 | - tSW=df[df['type LW']=='tSW'] | ||
2605 | - tSW_dist=list(tSW["Distance"]) | ||
2606 | - tSW_angle_1=list(tSW["C4'-C1'-B1"]) | ||
2607 | - tSW_angle_2=list(tSW["C1'-B1-B1pair"]) | ||
2608 | - tSW_angle_3=list(tSW["B1-B1pair-C1'pair"]) | ||
2609 | - tSW_angle_4=list(tSW["B1pair-C1'pair-C4'pair"]) | ||
2610 | - cHH=df[df['type LW']=='cHH'] | ||
2611 | - cHH_dist=list(cHH["Distance"]) | ||
2612 | - cHH_angle_1=list(cHH["C4'-C1'-B1"]) | ||
2613 | - cHH_angle_2=list(cHH["C1'-B1-B1pair"]) | ||
2614 | - cHH_angle_3=list(cHH["B1-B1pair-C1'pair"]) | ||
2615 | - cHH_angle_4=list(cHH["B1pair-C1'pair-C4'pair"]) | ||
2616 | - tHH=df[df['type LW']=='tHH'] | ||
2617 | - tHH_dist=list(tHH["Distance"]) | ||
2618 | - tHH_angle_1=list(tHH["C4'-C1'-B1"]) | ||
2619 | - tHH_angle_2=list(tHH["C1'-B1-B1pair"]) | ||
2620 | - tHH_angle_3=list(tHH["B1-B1pair-C1'pair"]) | ||
2621 | - tHH_angle_4=list(tHH["B1pair-C1'pair-C4'pair"]) | ||
2622 | - cSH=df[df['type LW']=='cSH'] | ||
2623 | - cSH_dist=list(cSH["Distance"]) | ||
2624 | - cSH_angle_1=list(cSH["C4'-C1'-B1"]) | ||
2625 | - cSH_angle_2=list(cSH["C1'-B1-B1pair"]) | ||
2626 | - cSH_angle_3=list(cSH["B1-B1pair-C1'pair"]) | ||
2627 | - cSH_angle_4=list(cSH["B1pair-C1'pair-C4'pair"]) | ||
2628 | - tSH=df[df['type LW']=='tSH'] | ||
2629 | - tSH_dist=list(tSH["Distance"]) | ||
2630 | - tSH_angle_1=list(tSH["C4'-C1'-B1"]) | ||
2631 | - tSH_angle_2=list(tSH["C1'-B1-B1pair"]) | ||
2632 | - tSH_angle_3=list(tSH["B1-B1pair-C1'pair"]) | ||
2633 | - tSH_angle_4=list(tSH["B1pair-C1'pair-C4'pair"]) | ||
2634 | - cHS=df[df['type LW']=='cHS'] | ||
2635 | - cHS_dist=list(cHS["Distance"]) | ||
2636 | - cHS_angle_1=list(cHS["C4'-C1'-B1"]) | ||
2637 | - cHS_angle_2=list(cHS["C1'-B1-B1pair"]) | ||
2638 | - cHS_angle_3=list(cHS["B1-B1pair-C1'pair"]) | ||
2639 | - cHS_angle_4=list(cHS["B1pair-C1'pair-C4'pair"]) | ||
2640 | - tHS=df[df['type LW']=='tHS'] | ||
2641 | - tHS_dist=list(tHS["Distance"]) | ||
2642 | - tHS_angle_1=list(tHS["C4'-C1'-B1"]) | ||
2643 | - tHS_angle_2=list(tHS["C1'-B1-B1pair"]) | ||
2644 | - tHS_angle_3=list(tHS["B1-B1pair-C1'pair"]) | ||
2645 | - tHS_angle_4=list(tHS["B1pair-C1'pair-C4'pair"]) | ||
2646 | - cSS=df[df['type LW']=='cSS'] | ||
2647 | - cSS_dist=list(cSS["Distance"]) | ||
2648 | - cSS_angle_1=list(cSS["C4'-C1'-B1"]) | ||
2649 | - cSS_angle_2=list(cSS["C1'-B1-B1pair"]) | ||
2650 | - cSS_angle_3=list(cSS["B1-B1pair-C1'pair"]) | ||
2651 | - cSS_angle_4=list(cSS["B1pair-C1'pair-C4'pair"]) | ||
2652 | - tSS=df[df['type LW']=='tSS'] | ||
2653 | - tSS_dist=list(tSS["Distance"]) | ||
2654 | - tSS_angle_1=list(tSS["C4'-C1'-B1"]) | ||
2655 | - tSS_angle_2=list(tSS["C1'-B1-B1pair"]) | ||
2656 | - tSS_angle_3=list(tSS["B1-B1pair-C1'pair"]) | ||
2657 | - tSS_angle_4=list(tSS["B1pair-C1'pair-C4'pair"]) | ||
2658 | 2590 | ||
2659 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) | 2591 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) |
2660 | os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/") | 2592 | os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/") |
2661 | 2593 | ||
2662 | - gmm_hrna_basepair_type('cWW', cWW_angle_1, cWW_angle_2, cWW_angle_3, cWW_angle_4, cWW_dist) | 2594 | + for lw_type in lw: |
2663 | - gmm_hrna_basepair_type('tWW', tWW_angle_1, tWW_angle_2, tWW_angle_3, tWW_angle_4, tWW_dist) | 2595 | + data = df[df['type_LW'] == lw_type ] |
2664 | - gmm_hrna_basepair_type('cWH', cWH_angle_1, cWH_angle_2, cWH_angle_3, cWH_angle_4, cWH_dist) | 2596 | + if len(data): |
2665 | - gmm_hrna_basepair_type('tWH', tWH_angle_1, tWH_angle_2, tWH_angle_3, tWH_angle_4, tWH_dist) | 2597 | + for b1 in ['A','C','G','U']: |
2666 | - gmm_hrna_basepair_type('cHW', cHW_angle_1, cHW_angle_2, cHW_angle_3, cHW_angle_4, cHW_dist) | 2598 | + for b2 in ['A','C','G','U']: |
2667 | - gmm_hrna_basepair_type('tHW', tHW_angle_1, tHW_angle_2, tHW_angle_3, tHW_angle_4, tHW_dist) | 2599 | + thisbases = data[(data.nt1_res == b1)&(data.nt2_res == b2)] |
2668 | - gmm_hrna_basepair_type('tWS', tWS_angle_1, tWS_angle_2, tWS_angle_3, tWS_angle_4, tWS_dist) | 2600 | + if len(thisbases): |
2669 | - gmm_hrna_basepair_type('cWS', cWS_angle_1, cWS_angle_2, cWS_angle_3, cWS_angle_4, cWS_dist) | 2601 | + gmm_hrna_basepair_type(lw_type, b1+b2, thisbases) |
2670 | - gmm_hrna_basepair_type('tSW', tSW_angle_1, tSW_angle_2, tSW_angle_3, tSW_angle_4, tSW_dist) | 2602 | + |
2671 | - gmm_hrna_basepair_type('cSW', cSW_angle_1, cSW_angle_2, cSW_angle_3, cSW_angle_4, cSW_dist) | 2603 | + # colors = ['lightcoral', "lightseagreen", "black", "goldenrod", "olive", "steelblue", "silver", "deeppink", "navy", |
2672 | - gmm_hrna_basepair_type('cHH', cHH_angle_1, cHH_angle_2, cHH_angle_3, cHH_angle_4, cHH_dist) | 2604 | + # "sienna", "maroon", "orange", "mediumaquamarine", "tomato", "indigo", "orchid", "tan", "lime"] |
2673 | - gmm_hrna_basepair_type('tHH', tHH_angle_1, tHH_angle_2, tHH_angle_3, tHH_angle_4, tHH_dist) | 2605 | + # for lw_type, col in zip(lw, colors): |
2674 | - gmm_hrna_basepair_type('cSH', cSH_angle_1, cSH_angle_2, cSH_angle_3, cSH_angle_4, cSH_dist) | 2606 | + # data = df[df['type LW'] == lw_type] |
2675 | - gmm_hrna_basepair_type('tSH', tSH_angle_1, tSH_angle_2, tSH_angle_3, tSH_angle_4, tSH_dist) | 2607 | + # GMM_histo(data.Distance, lw_type, toric=False, hist=False, col=col) |
2676 | - gmm_hrna_basepair_type('cHS', cHS_angle_1, cHS_angle_2, cHS_angle_3, cHS_angle_4, cHS_dist) | 2608 | + # plt.xlabel('Distance (Angströms)') |
2677 | - gmm_hrna_basepair_type('tHS', tHS_angle_1, tHS_angle_2, tHS_angle_3, tHS_angle_4, tHS_dist) | 2609 | + # plt.title("GMM of distances between base tips ("+str(nt)+ " values)", fontsize=8) |
2678 | - gmm_hrna_basepair_type('cSS', cSS_angle_1, cSS_angle_2, cSS_angle_3, cSS_angle_4, cSS_dist) | 2610 | + # plt.savefig("distances_between_tips.png") |
2679 | - gmm_hrna_basepair_type('tSS', tSS_angle_1, tSS_angle_2, tSS_angle_3, tSS_angle_4, tSS_dist) | 2611 | + # plt.close() |
2680 | - | ||
2681 | - nc=len(cWW)+len(cHH)+len(cSS)+len(cWH)+len(cHW)+len(cWS)+len(cSW)+len(cHS)+len(cSH) | ||
2682 | - GMM_histo(cWW_dist, "cWW", toric=False, hist=False, couleur='lightcoral') | ||
2683 | - GMM_histo(cHH_dist, "cHH", toric=False, hist=False, couleur='lightseagreen') | ||
2684 | - GMM_histo(cSS_dist, "cSS", toric=False, hist=False, couleur='black') | ||
2685 | - GMM_histo(cWH_dist, "cWH", toric=False, hist=False, couleur='goldenrod') | ||
2686 | - GMM_histo(cHW_dist, "cHW", toric=False, hist=False, couleur='olive') | ||
2687 | - GMM_histo(cWS_dist, "cWS", toric=False, hist=False, couleur='steelblue') | ||
2688 | - GMM_histo(cSW_dist, "cSW", toric=False, hist=False, couleur='silver') | ||
2689 | - GMM_histo(cHS_dist, "cHS", toric=False, hist=False, couleur='deeppink') | ||
2690 | - GMM_histo(cSH_dist, "cSH", toric=False, hist=False, couleur='navy') | ||
2691 | - plt.xlabel('Distance (Angström)') | ||
2692 | - plt.title("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs cis ("+str(nc)+ " valeurs)", fontsize=8) | ||
2693 | - plt.savefig("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs cis (" +str(nc)+ " valeurs).png") | ||
2694 | - plt.close() | ||
2695 | - | ||
2696 | - nt=len(tWW)+len(tHH)+len(tSS)+len(tWH)+len(tHW)+len(tWS)+len(tSW)+len(tHS)+len(tSH) | ||
2697 | - GMM_histo(tWW_dist, "tWW", toric=False, hist=False, couleur='sienna') | ||
2698 | - GMM_histo(tHH_dist, "tHH", toric=False, hist=False, couleur='maroon') | ||
2699 | - GMM_histo(tSS_dist, "tSS", toric=False, hist=False, couleur='orange') | ||
2700 | - GMM_histo(tWH_dist, "tWH", toric=False, hist=False, couleur='mediumaquamarine') | ||
2701 | - GMM_histo(tHW_dist, "tHW", toric=False, hist=False, couleur='tomato') | ||
2702 | - GMM_histo(tWS_dist, "tWS", toric=False, hist=False, couleur='indigo') | ||
2703 | - GMM_histo(tSW_dist, "tSW", toric=False, hist=False, couleur='orchid') | ||
2704 | - GMM_histo(tHS_dist, "tHS", toric=False, hist=False, couleur='tan') | ||
2705 | - GMM_histo(tSH_dist, "tSH", toric=False, hist=False, couleur='lime') | ||
2706 | - plt.xlabel('Distance (Angström)') | ||
2707 | - plt.title("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs trans ("+str(nt)+ " valeurs)", fontsize=8) | ||
2708 | - plt.savefig("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs trans (" +str(nt)+ " valeurs).png") | ||
2709 | - plt.close() | ||
2710 | 2612 | ||
2711 | os.chdir(runDir) | 2613 | os.chdir(runDir) |
2712 | setproctitle(f"GMM (HiRE-RNA basepairs) finished") | 2614 | setproctitle(f"GMM (HiRE-RNA basepairs) finished") |
2615 | + | ||
2616 | +def merge_jsons(): | ||
2617 | + # All atom distances | ||
2618 | + bonds = ["O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'", "C1'-N9", | ||
2619 | + "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5", | ||
2620 | + "C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"] | ||
2621 | + bonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in bonds ] | ||
2622 | + concat_jsons(bonds, runDir + "/results/geometry/json/all_atom_distances.json") | ||
2623 | + | ||
2624 | + | ||
2625 | + # All atom torsions | ||
2626 | + torsions = ["Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Xhi", "Zeta"] | ||
2627 | + torsions = [ runDir + "/results/geometry/json/" + x + ".json" for x in torsions ] | ||
2628 | + concat_jsons(torsions, runDir + "/results/geometry/json/all_atom_torsions.json") | ||
2629 | + | ||
2630 | + # HiRE-RNA distances | ||
2631 | + hrnabonds = ["P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2", "C4'-P"] | ||
2632 | + hrnabonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnabonds ] | ||
2633 | + concat_jsons(hrnabonds, runDir + "/results/geometry/json/hirerna_distances.json") | ||
2634 | + | ||
2635 | + # HiRE-RNA angles | ||
2636 | + hrnaangles = ["P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2", "C4'-P-O5'", "C5'-C4'-P", "C1'-C4'-P"] | ||
2637 | + hrnaangles = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnaangles ] | ||
2638 | + concat_jsons(hrnaangles, runDir + "/results/geometry/json/hirerna_angles.json") | ||
2639 | + | ||
2640 | + # HiRE-RNA torsions | ||
2641 | + hrnators = ["P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "C4'-P°-O5'°-C5'°", "C5'-C4'-P°-O5'°", "C1'-C4'-P°-O5'°", "O5'-C5'-C4'-P°"] | ||
2642 | + hrnators = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnators ] | ||
2643 | + concat_jsons(hrnators, runDir + "/results/geometry/json/hirerna_torsions.json") | ||
2644 | + | ||
2645 | + # HiRE-RNA basepairs | ||
2646 | + for nt1 in ['A', 'C', 'G', 'U']: | ||
2647 | + for nt2 in ['A', 'C', 'G', 'U']: | ||
2648 | + bps = glob.glob(runDir + f"/results/geometry/json/*{nt1}{nt2}*.json") | ||
2649 | + concat_jsons(bps, runDir + f"/results/geometry/json/hirerna_{nt1}{nt2}_basepairs.json") | ||
2650 | + | ||
2651 | + # Delete previous files | ||
2652 | + for f in bonds + torsions + hrnabonds + hrnaangles + hrnators: | ||
2653 | + try: | ||
2654 | + os.remove(f) | ||
2655 | + except FileNotFoundError: | ||
2656 | + pass | ||
2657 | + for f in glob.glob(runDir + "/results/geometry/json/t*.json"): | ||
2658 | + try: | ||
2659 | + os.remove(f) | ||
2660 | + except FileNotFoundError: | ||
2661 | + pass | ||
2662 | + for f in glob.glob(runDir + "/results/geometry/json/c*.json"): | ||
2663 | + try: | ||
2664 | + os.remove(f) | ||
2665 | + except FileNotFoundError: | ||
2666 | + pass | ||
2667 | + for f in glob.glob(runDir + "/results/geometry/json/Distance*.json"): | ||
2668 | + try: | ||
2669 | + os.remove(f) | ||
2670 | + except FileNotFoundError: | ||
2671 | + pass | ||
2713 | 2672 | ||
2714 | @trace_unhandled_exceptions | 2673 | @trace_unhandled_exceptions |
2715 | def concat_dataframes(fpath, outfilename): | 2674 | def concat_dataframes(fpath, outfilename): |
... | @@ -2735,6 +2694,23 @@ def concat_dataframes(fpath, outfilename): | ... | @@ -2735,6 +2694,23 @@ def concat_dataframes(fpath, outfilename): |
2735 | idxQueue.put(thr_idx) # replace the thread index in the queue | 2694 | idxQueue.put(thr_idx) # replace the thread index in the queue |
2736 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | 2695 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") |
2737 | 2696 | ||
2697 | +def concat_jsons(flist, outfilename): | ||
2698 | + """ | ||
2699 | + Reads JSON files computed by the geometry jobs and merge them into a smaller | ||
2700 | + number of files | ||
2701 | + """ | ||
2702 | + | ||
2703 | + result = [] | ||
2704 | + for f in flist: | ||
2705 | + # if not path.isfile(f): | ||
2706 | + # continue: | ||
2707 | + with open(f, "rb") as infile: | ||
2708 | + result.append(json.load(infile)) | ||
2709 | + | ||
2710 | + # write the files | ||
2711 | + with open(outfilename, 'w', encoding='utf-8') as f: | ||
2712 | + json.dump(result, f, indent=4) | ||
2713 | + | ||
2738 | def process_jobs(joblist): | 2714 | def process_jobs(joblist): |
2739 | """ | 2715 | """ |
2740 | Starts a Pool to run the Job() objects in joblist. | 2716 | Starts a Pool to run the Job() objects in joblist. |
... | @@ -2759,7 +2735,6 @@ def process_jobs(joblist): | ... | @@ -2759,7 +2735,6 @@ def process_jobs(joblist): |
2759 | print("Something went wrong") | 2735 | print("Something went wrong") |
2760 | 2736 | ||
2761 | if __name__ == "__main__": | 2737 | if __name__ == "__main__": |
2762 | - | ||
2763 | os.makedirs(runDir + "/results/figures/", exist_ok=True) | 2738 | os.makedirs(runDir + "/results/figures/", exist_ok=True) |
2764 | 2739 | ||
2765 | # parse options | 2740 | # parse options |
... | @@ -2897,29 +2872,29 @@ if __name__ == "__main__": | ... | @@ -2897,29 +2872,29 @@ if __name__ == "__main__": |
2897 | 2872 | ||
2898 | # Do general family statistics | 2873 | # Do general family statistics |
2899 | 2874 | ||
2900 | - joblist.append(Job(function=stats_len)) # Computes figures about chain lengths | 2875 | + # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths |
2901 | - joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) | 2876 | + # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) |
2902 | - for f in famlist: | 2877 | + # for f in famlist: |
2903 | - joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) | 2878 | + # joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) |
2904 | - if f not in ignored: | 2879 | + # if f not in ignored: |
2905 | - joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) | 2880 | + # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) |
2906 | 2881 | ||
2907 | 2882 | ||
2908 | # Do geometric measures on all chains | 2883 | # Do geometric measures on all chains |
2909 | 2884 | ||
2910 | - if n_unmapped_chains: | 2885 | + # if n_unmapped_chains: |
2911 | - os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) | 2886 | + # os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) |
2912 | - liste_struct=os.listdir(path_to_3D_data + "renumbered_rna_only") | 2887 | + # liste_struct = os.listdir(path_to_3D_data + "renumbered_rna_only") |
2913 | - f_prec = os.listdir(path_to_3D_data + "renumbered_rna_only")[0] | 2888 | + # if '4zdo_1_E.cif' in liste_struct: |
2914 | - if '4zdo_1_E.cif' in liste_struct: | 2889 | + # liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now |
2915 | - liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now | 2890 | + # if '4zdp_1_E.cif' in liste_struct: |
2916 | - if '4zdp_1_E.cif' in liste_struct: | 2891 | + # liste_struct.remove('4zdp_1_E.cif') |
2917 | - liste_struct.remove('4zdp_1_E.cif') | 2892 | + # for f in liste_struct: |
2918 | - for f in liste_struct: | 2893 | + # if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]): |
2919 | - joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances | 2894 | + # joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances |
2920 | 2895 | ||
2921 | 2896 | ||
2922 | - process_jobs(joblist) | 2897 | + # process_jobs(joblist) |
2923 | 2898 | ||
2924 | # Now process the memory-heavy tasks family by family | 2899 | # Now process the memory-heavy tasks family by family |
2925 | if DO_AVG_DISTANCE_MATRIX: | 2900 | if DO_AVG_DISTANCE_MATRIX: |
... | @@ -2935,33 +2910,34 @@ if __name__ == "__main__": | ... | @@ -2935,33 +2910,34 @@ if __name__ == "__main__": |
2935 | 2910 | ||
2936 | # finish the work after the parallel portions | 2911 | # finish the work after the parallel portions |
2937 | 2912 | ||
2938 | - per_chain_stats() # per chain base frequencies en basepair types | 2913 | + # per_chain_stats() # per chain base frequencies en basepair types |
2939 | - seq_idty() # identity matrices from pre-computed .npy matrices | 2914 | + # seq_idty() # identity matrices from pre-computed .npy matrices |
2940 | - stats_pairs() | 2915 | + # stats_pairs() |
2941 | 2916 | ||
2942 | if n_unmapped_chains: | 2917 | if n_unmapped_chains: |
2943 | - general_stats() | 2918 | + # general_stats() |
2944 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) | 2919 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) |
2945 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) | 2920 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) |
2921 | + # joblist = [] | ||
2922 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) | ||
2923 | + # if DO_HIRE_RNA_MEASURES: | ||
2924 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) | ||
2925 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) | ||
2926 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) | ||
2927 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) | ||
2928 | + # if DO_WADLEY_ANALYSIS: | ||
2929 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) | ||
2930 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv'))) | ||
2931 | + # process_jobs(joblist) | ||
2946 | joblist = [] | 2932 | joblist = [] |
2947 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) | 2933 | + # joblist.append(Job(function=gmm_aa_dists, args=())) |
2948 | - if DO_HIRE_RNA_MEASURES: | 2934 | + # joblist.append(Job(function=gmm_aa_torsions, args=())) |
2949 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) | ||
2950 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) | ||
2951 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) | ||
2952 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) | ||
2953 | - if DO_WADLEY_ANALYSIS: | ||
2954 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) | ||
2955 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv'))) | ||
2956 | - process_jobs(joblist) | ||
2957 | - joblist = [] | ||
2958 | - joblist.append(Job(function=gmm_aa_dists, args=())) | ||
2959 | - joblist.append(Job(function=gmm_aa_torsions, args=())) | ||
2960 | if DO_HIRE_RNA_MEASURES: | 2935 | if DO_HIRE_RNA_MEASURES: |
2961 | - joblist.append(Job(function=gmm_hrna, args=())) | 2936 | + # joblist.append(Job(function=gmm_hrna, args=())) |
2962 | joblist.append(Job(function=gmm_hrna_basepairs, args=())) | 2937 | joblist.append(Job(function=gmm_hrna_basepairs, args=())) |
2963 | if DO_WADLEY_ANALYSIS: | 2938 | if DO_WADLEY_ANALYSIS: |
2964 | joblist.append(Job(function=gmm_wadley, args=())) | 2939 | joblist.append(Job(function=gmm_wadley, args=())) |
2965 | if len(joblist): | 2940 | if len(joblist): |
2966 | process_jobs(joblist) | 2941 | process_jobs(joblist) |
2942 | + merge_jsons() | ||
2967 | 2943 | ... | ... |
-
mentioned in commit 6a223ef3
-
Please register or login to post a comment