Showing
1 changed file
with
400 additions
and
429 deletions
... | @@ -19,6 +19,7 @@ import matplotlib.patches as mpatches | ... | @@ -19,6 +19,7 @@ import matplotlib.patches as mpatches |
19 | import scipy.cluster.hierarchy as sch | 19 | import scipy.cluster.hierarchy as sch |
20 | import sklearn | 20 | import sklearn |
21 | import json | 21 | import json |
22 | +import glob | ||
22 | import pickle | 23 | import pickle |
23 | import Bio | 24 | import Bio |
24 | from scipy.spatial.distance import squareform | 25 | from scipy.spatial.distance import squareform |
... | @@ -1377,72 +1378,107 @@ def pos_b2(res): | ... | @@ -1377,72 +1378,107 @@ def pos_b2(res): |
1377 | else: | 1378 | else: |
1378 | return [] | 1379 | return [] |
1379 | 1380 | ||
1380 | -def basepair_apex_distance(res, pair): | 1381 | +@trace_unhandled_exceptions |
1381 | - """ | 1382 | +def basepair_measures(res, pair): |
1382 | - measure of the distance between the tips of the paired nucleotides (B1 / B1 or B1 / B2 or B2 / B2) | ||
1383 | - """ | ||
1384 | - dist=[] | ||
1385 | - d=0 | ||
1386 | - if res.get_resname()=='A' or res.get_resname()=='G' :# different cases if 1 aromatic cycle or 2 | ||
1387 | - atom_res=pos_b2(res) | ||
1388 | - if pair.get_resname()=='A' or pair.get_resname()=='G' : | ||
1389 | - atom_pair=pos_b2(pair) | ||
1390 | - if pair.get_resname()=='C' or pair.get_resname()=='U' : | ||
1391 | - atom_pair=pos_b1(pair) | ||
1392 | - | ||
1393 | - if res.get_resname()=='C' or res.get_resname()=='U' : | ||
1394 | - atom_res=pos_b1(res) | ||
1395 | - if pair.get_resname()=='A' or pair.get_resname()=='G' : | ||
1396 | - atom_pair=pos_b2(pair) | ||
1397 | - if pair.get_resname()=='C' or pair.get_resname()=='U' : | ||
1398 | - atom_pair=pos_b1(pair) | ||
1399 | - | ||
1400 | - dist = get_euclidian_distance(atom_res, atom_pair) | ||
1401 | - | ||
1402 | - return dist | ||
1403 | - | ||
1404 | -def basepair_flat_angle(res, pair): | ||
1405 | """ | 1383 | """ |
1406 | - measurement of the plane angles formed by the vectors C1->B1 of the paired nucleotides | 1384 | + measurement of the flat angles describing a basepair in the HiRE-RNA model |
1407 | """ | 1385 | """ |
1408 | if res.get_resname()=='C' or res.get_resname()=='U' : | 1386 | if res.get_resname()=='C' or res.get_resname()=='U' : |
1409 | atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | 1387 | atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] |
1410 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | 1388 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] |
1411 | atom_b1_res = pos_b1(res) | 1389 | atom_b1_res = pos_b1(res) |
1412 | - a1_res = Vector(atom_c4_res[0]) | 1390 | + if not len(atom_c4_res) or not len(atom_c1p_res) or not len(atom_b1_res): |
1391 | + return | ||
1392 | + a3_res = Vector(atom_c4_res[0]) | ||
1413 | a2_res = Vector(atom_c1p_res[0]) | 1393 | a2_res = Vector(atom_c1p_res[0]) |
1414 | - a3_res = Vector(atom_b1_res[0]) | 1394 | + a1_res = Vector(atom_b1_res[0]) |
1415 | if res.get_resname()=='A' or res.get_resname()=='G' : | 1395 | if res.get_resname()=='A' or res.get_resname()=='G' : |
1416 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | 1396 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] |
1417 | atom_b1_res = pos_b1(res) | 1397 | atom_b1_res = pos_b1(res) |
1418 | atom_b2_res = pos_b2(res) | 1398 | atom_b2_res = pos_b2(res) |
1419 | - a1_res = Vector(atom_c1p_res[0]) | 1399 | + if not len(atom_c1p_res) or not len(atom_b1_res) or not len(atom_b2_res): |
1400 | + return | ||
1401 | + a3_res = Vector(atom_c1p_res[0]) | ||
1420 | a2_res = Vector(atom_b1_res[0]) | 1402 | a2_res = Vector(atom_b1_res[0]) |
1421 | - a3_res = Vector(atom_b2_res[0]) | 1403 | + a1_res = Vector(atom_b2_res[0]) |
1422 | 1404 | ||
1423 | if pair.get_resname()=='C' or pair.get_resname()=='U' : | 1405 | if pair.get_resname()=='C' or pair.get_resname()=='U' : |
1424 | atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] | 1406 | atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] |
1425 | atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] | 1407 | atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] |
1426 | atom_b1_pair = pos_b1(pair) | 1408 | atom_b1_pair = pos_b1(pair) |
1427 | - a1_pair = Vector(atom_c4_pair[0]) | 1409 | + if not len(atom_c4_pair) or not len(atom_c1p_pair) or not len(atom_b1_pair): |
1410 | + return | ||
1411 | + a3_pair = Vector(atom_c4_pair[0]) | ||
1428 | a2_pair = Vector(atom_c1p_pair[0]) | 1412 | a2_pair = Vector(atom_c1p_pair[0]) |
1429 | - a3_pair = Vector(atom_b1_pair) | 1413 | + a1_pair = Vector(atom_b1_pair[0]) |
1430 | if pair.get_resname()=='A' or pair.get_resname()=='G' : | 1414 | if pair.get_resname()=='A' or pair.get_resname()=='G' : |
1431 | atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] | 1415 | atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] |
1432 | atom_b1_pair = pos_b1(pair) | 1416 | atom_b1_pair = pos_b1(pair) |
1433 | atom_b2_pair = pos_b2(pair) | 1417 | atom_b2_pair = pos_b2(pair) |
1434 | - a1_pair = Vector(atom_c1p_pair[0]) | 1418 | + if not len(atom_c1p_pair) or not len(atom_b1_pair) or not len(atom_b2_pair): # No C1' atom in the paired nucleotide, skip measures. |
1419 | + return | ||
1420 | + a3_pair = Vector(atom_c1p_pair[0]) | ||
1435 | a2_pair = Vector(atom_b1_pair[0]) | 1421 | a2_pair = Vector(atom_b1_pair[0]) |
1436 | - a3_pair = Vector(atom_b2_pair[0]) | 1422 | + a1_pair = Vector(atom_b2_pair[0]) |
1437 | 1423 | ||
1438 | - # we calculate the 4 plane angles including these vectors | 1424 | + # Bond vectors |
1425 | + res_32 = a3_res - a2_res | ||
1426 | + res_12 = a1_res - a2_res | ||
1427 | + pair_32 = a3_pair - a2_pair | ||
1428 | + pair_12 = a1_pair - a2_pair | ||
1429 | + rho = a1_res - a1_pair # from pair to res | ||
1439 | 1430 | ||
1440 | - a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) | 1431 | + # dist |
1441 | - b = calc_angle(a2_res, a3_res, a3_pair)*(180/np.pi) | 1432 | + dist = rho.norm() |
1442 | - c = calc_angle(a3_res, a3_pair, a2_pair)*(180/np.pi) | 1433 | + |
1443 | - d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) | 1434 | + # we calculate the 2 plane angles |
1444 | - angles = [a, b, c, d] | 1435 | + with warnings.catch_warnings(): |
1445 | - return angles | 1436 | + warnings.simplefilter('ignore', RuntimeWarning) |
1437 | + b = res_12.angle(rho)*(180/np.pi) # equal to the previous implementation | ||
1438 | + c = pair_12.angle(-rho)*(180/np.pi) # | ||
1439 | + # a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) # not required | ||
1440 | + # b = calc_angle(a2_res, a1_res, a1_pair)*(180/np.pi) | ||
1441 | + # c = calc_angle(a1_res, a1_pair, a2_pair)*(180/np.pi) | ||
1442 | + # d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) # not required | ||
1443 | + | ||
1444 | + # Compute plane vectors | ||
1445 | + n1 = (res_32**res_12).normalized() # ** between vectors, is the cross product | ||
1446 | + n2 = (pair_32**pair_12).normalized() | ||
1447 | + | ||
1448 | + # Distances between base tip and the other base's plane (orthogonal projection) | ||
1449 | + # if angle(rho, n) > pi/2 the distance is negative (signed following n) | ||
1450 | + d1 = rho*n1 # projection of rho on axis n1 | ||
1451 | + d2 = rho*n2 | ||
1452 | + | ||
1453 | + # Now the projection of rho in the planes. It's just a sum of the triangles' two other edges. | ||
1454 | + p1 = (-rho+n1**d1).normalized() # between vector and scalar, ** is the multiplication by a scalar | ||
1455 | + p2 = (rho-n2**d2).normalized() | ||
1456 | + | ||
1457 | + # Measure tau, the dihedral | ||
1458 | + u = (res_12**rho).normalized() | ||
1459 | + v = (rho**pair_12).normalized() | ||
1460 | + w1 = n1**u | ||
1461 | + w2 = v**n2 | ||
1462 | + cosTau1 = n1*u | ||
1463 | + cosTau2 = v*n2 | ||
1464 | + tau1 = np.arccos(cosTau1)*(180/np.pi) | ||
1465 | + tau2 = np.arccos(cosTau2)*(180/np.pi) | ||
1466 | + if res_12*w1 < 0: | ||
1467 | + tau1 = -tau1 | ||
1468 | + if pair_12*w2 < 0: | ||
1469 | + tau2 = -tau2 | ||
1470 | + | ||
1471 | + # And finally, the a1 and a2 angles between res_12 and p1 / pair_12 and p2 | ||
1472 | + with warnings.catch_warnings(): | ||
1473 | + warnings.simplefilter('ignore', RuntimeWarning) | ||
1474 | + a1 = res_12.angle(p1)*(180/np.pi) | ||
1475 | + a2 = pair_12.angle(p2)*(180/np.pi) | ||
1476 | + if cosTau1 < 0: | ||
1477 | + a1 = -a1 | ||
1478 | + if cosTau2 < 0: | ||
1479 | + a2 = -a2 | ||
1480 | + | ||
1481 | + return [dist, b, c, d1, d2, a1, a2, tau1, tau2] | ||
1446 | 1482 | ||
1447 | @trace_unhandled_exceptions | 1483 | @trace_unhandled_exceptions |
1448 | def measure_from_structure(f): | 1484 | def measure_from_structure(f): |
... | @@ -1482,7 +1518,7 @@ def measures_wadley(name, s, thr_idx): | ... | @@ -1482,7 +1518,7 @@ def measures_wadley(name, s, thr_idx): |
1482 | """ | 1518 | """ |
1483 | 1519 | ||
1484 | # do not recompute something already computed | 1520 | # do not recompute something already computed |
1485 | - if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley ' + name + '.csv') and | 1521 | + if (path.isfile(runDir + '/results/geometry/Pyle/angles/flat_angles_pyle ' + name + '.csv') and |
1486 | path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")): | 1522 | path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")): |
1487 | return | 1523 | return |
1488 | 1524 | ||
... | @@ -1524,7 +1560,7 @@ def measures_wadley(name, s, thr_idx): | ... | @@ -1524,7 +1560,7 @@ def measures_wadley(name, s, thr_idx): |
1524 | df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"]) | 1560 | df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"]) |
1525 | df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv") | 1561 | df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv") |
1526 | df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"]) | 1562 | df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"]) |
1527 | - df.to_csv(runDir + "/results/geometry/Pyle/angles/angles_plans_wadley "+name+".csv") | 1563 | + df.to_csv(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle "+name+".csv") |
1528 | 1564 | ||
1529 | @trace_unhandled_exceptions | 1565 | @trace_unhandled_exceptions |
1530 | def measures_aa(name, s, thr_idx): | 1566 | def measures_aa(name, s, thr_idx): |
... | @@ -1533,7 +1569,7 @@ def measures_aa(name, s, thr_idx): | ... | @@ -1533,7 +1569,7 @@ def measures_aa(name, s, thr_idx): |
1533 | """ | 1569 | """ |
1534 | 1570 | ||
1535 | # do not recompute something already computed | 1571 | # do not recompute something already computed |
1536 | - if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms "+name+".csv"): | 1572 | + if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms_"+name+".csv"): |
1537 | return | 1573 | return |
1538 | 1574 | ||
1539 | last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide | 1575 | last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide |
... | @@ -1685,7 +1721,7 @@ def measures_aa(name, s, thr_idx): | ... | @@ -1685,7 +1721,7 @@ def measures_aa(name, s, thr_idx): |
1685 | df=pd.concat([df_comm, df_pur, df_pyr], axis = 1) | 1721 | df=pd.concat([df_comm, df_pur, df_pyr], axis = 1) |
1686 | pbar.close() | 1722 | pbar.close() |
1687 | 1723 | ||
1688 | - df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms " + name + ".csv") | 1724 | + df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms_" + name + ".csv") |
1689 | 1725 | ||
1690 | @trace_unhandled_exceptions | 1726 | @trace_unhandled_exceptions |
1691 | def measures_hrna(name, s, thr_idx): | 1727 | def measures_hrna(name, s, thr_idx): |
... | @@ -1810,89 +1846,87 @@ def measures_hrna_basepairs(name, s, thr_idx): | ... | @@ -1810,89 +1846,87 @@ def measures_hrna_basepairs(name, s, thr_idx): |
1810 | 1846 | ||
1811 | df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name)) | 1847 | df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name)) |
1812 | 1848 | ||
1813 | - if df['index_chain'][0]==1:#ignore files with numbering errors | 1849 | + if df['index_chain'][0] == 1: # ignore files with numbering errors : TODO : remove when we get DSSR Pro, there should not be numbering errors anymore |
1814 | - l = measures_hrna_basepairs_chain(chain, df, thr_idx) | 1850 | + l = measures_hrna_basepairs_chain(name, chain, df, thr_idx) |
1815 | - | 1851 | + df_calc = pd.DataFrame(l, columns=["type_LW", "nt1_idx", "nt1_res", "nt2_idx", "nt2_res", "Distance", |
1816 | - df_calc=pd.DataFrame(l, columns=["Chaine", "type LW", "Resseq", "Num paired", "Distance", "C4'-C1'-B1", "C1'-B1-B1pair", "B1-B1pair-C1'pair", "B1pair-C1'pair-C4'pair"]) | 1852 | + "211_angle", "112_angle", "dB1", "dB2", "alpha1", "alpha2", "3211_torsion", "1123_torsion"]) |
1817 | - df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs '+name+'.csv') | 1853 | + df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs ' + name + '.csv', float_format="%.3f") |
1818 | - | ||
1819 | 1854 | ||
1820 | @trace_unhandled_exceptions | 1855 | @trace_unhandled_exceptions |
1821 | -def measures_hrna_basepairs_chain(chain, df, thr_idx): | 1856 | +def measures_hrna_basepairs_chain(name, chain, df, thr_idx): |
1822 | """ | 1857 | """ |
1823 | Cleanup of the dataset | 1858 | Cleanup of the dataset |
1824 | measurements of distances and angles between paired nucleotides in the chain | 1859 | measurements of distances and angles between paired nucleotides in the chain |
1825 | """ | 1860 | """ |
1826 | 1861 | ||
1827 | - liste_dist=[] | 1862 | + results = [] |
1828 | warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) | 1863 | warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) |
1829 | 1864 | ||
1830 | pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep | 1865 | pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep |
1831 | - for i in range(pairs.shape[0]): #we remove the lines where no pairing (NaN in paired) | 1866 | + for i in range(pairs.shape[0]): # we remove the lines where no pairing (NaN in paired) |
1832 | - index_with_nan=pairs.index[pairs.iloc[:,2].isnull()] | 1867 | + index_with_nan = pairs.index[pairs.iloc[:,2].isnull()] |
1833 | pairs.drop(index_with_nan, 0, inplace=True) | 1868 | pairs.drop(index_with_nan, 0, inplace=True) |
1834 | 1869 | ||
1835 | - paired_int=[] | 1870 | + paired_int = [] |
1836 | - for i in pairs.index:# convert values from paired to integers or lists of integers | 1871 | + for i in pairs.index: # convert values from paired to integers or lists of integers |
1837 | - paired=pairs.at[i, 'paired'] | 1872 | + paired = pairs.at[i, 'paired'] |
1838 | if type(paired) is np.int64 or type(paired) is np.float64: | 1873 | if type(paired) is np.int64 or type(paired) is np.float64: |
1839 | paired_int.append(int(paired)) | 1874 | paired_int.append(int(paired)) |
1840 | else : #strings | 1875 | else : #strings |
1841 | - if len(paired)<3 : #a single pairing | 1876 | + if len(paired) < 3: # a single pairing |
1842 | paired_int.append(int(paired)) | 1877 | paired_int.append(int(paired)) |
1843 | - else : #several pairings | 1878 | + else : # several pairings |
1844 | - paired=paired.split(',') | 1879 | + paired = paired.split(',') |
1845 | - l=[int(i) for i in paired] | 1880 | + l = [ int(i) for i in paired ] |
1846 | paired_int.append(l) | 1881 | paired_int.append(l) |
1847 | 1882 | ||
1848 | - pair_type_LW_bis=[] | 1883 | + pair_type_LW_bis = [] |
1849 | for j in pairs.index: | 1884 | for j in pairs.index: |
1850 | pair_type_LW = pairs.at[j, 'pair_type_LW'] | 1885 | pair_type_LW = pairs.at[j, 'pair_type_LW'] |
1851 | - if len(pair_type_LW)<4 : #a single pairing | 1886 | + if len(pair_type_LW) < 4 : # a single pairing |
1852 | pair_type_LW_bis.append(pair_type_LW) | 1887 | pair_type_LW_bis.append(pair_type_LW) |
1853 | - else : #several pairings | 1888 | + else : # several pairings |
1854 | - pair_type_LW=pair_type_LW.split(',') | 1889 | + pair_type_LW = pair_type_LW.split(',') |
1855 | - l=[i for i in pair_type_LW] | 1890 | + l = [ i for i in pair_type_LW ] |
1856 | pair_type_LW_bis.append(pair_type_LW) | 1891 | pair_type_LW_bis.append(pair_type_LW) |
1857 | 1892 | ||
1858 | - #addition of these new columns | 1893 | + # addition of these new columns |
1859 | pairs.insert(4, "paired_int", paired_int, True) | 1894 | pairs.insert(4, "paired_int", paired_int, True) |
1860 | pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True) | 1895 | pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True) |
1861 | 1896 | ||
1862 | - indexNames=pairs[pairs['paired_int'] == 0].index | 1897 | + indexNames = pairs[pairs['paired_int'] == 0].index |
1863 | - pairs.drop(indexNames, inplace=True)#deletion of lines with a 0 in paired_int (matching to another RNA chain) | 1898 | + pairs.drop(indexNames, inplace=True) # deletion of lines with a 0 in paired_int (matching to another RNA chain) |
1864 | 1899 | ||
1865 | - for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {chain} measures_hrna_basepairs_chain", unit="res", leave=False): | 1900 | + for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna_basepairs_chain", unit="res", leave=False): |
1866 | - """ | 1901 | + # calculations for each row of the pairs dataset |
1867 | - calculations for each row of the pairs dataset | 1902 | + index = pairs.at[i, 'index_chain'] |
1868 | - """ | 1903 | + res1 = chain[(' ', index, ' ')].get_resname() |
1869 | - index=pairs.at[i, 'index_chain'] | 1904 | + if res1 not in ['A','C','G','U']: |
1870 | - type_LW=pairs.at[i, 'pair_type_LW_bis'] #pairing type | 1905 | + continue |
1871 | - num_paired=pairs.at[i, 'paired_int'] #number (index_chain) of the paired nucleotide | 1906 | + type_LW = pairs.at[i, 'pair_type_LW_bis'] # pairing type |
1907 | + num_paired = pairs.at[i, 'paired_int'] # number (index_chain) of the paired nucleotide | ||
1872 | 1908 | ||
1873 | if type(num_paired) is int or type(num_paired) is np.int64: | 1909 | if type(num_paired) is int or type(num_paired) is np.int64: |
1874 | - try : | 1910 | + res2 = chain[(' ', num_paired, ' ')].get_resname() |
1875 | - d = basepair_apex_distance(chain[(' ',index, ' ')], chain[(' ', num_paired, ' ')]) | 1911 | + if res2 not in ["A","C","G","U"]: |
1876 | - angle = basepair_flat_angle(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')]) | 1912 | + continue |
1877 | - if d != 0.0: | 1913 | + measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')]) |
1878 | - liste_dist.append([chain, type_LW, index, num_paired, d, angle[0], angle[1], angle[2], angle[3]]) | 1914 | + if measures is not None: |
1879 | - except : | 1915 | + results.append([type_LW, index, res1, num_paired, res2] + measures) |
1880 | - pass | 1916 | + else: |
1881 | - else : | 1917 | + for j in range(len(num_paired)): # if several pairings, process them one by one |
1882 | - for j in range(len(num_paired)): #if several pairings, process them one by one | 1918 | + if num_paired[j] != 0: |
1883 | - if num_paired[j] != 0 : | 1919 | + res2 = chain[(' ', num_paired[j], ' ')].get_resname() |
1884 | - try : | 1920 | + if res2 not in ["A","C","G","U"]: |
1885 | - d = basepair_apex_distance(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) | 1921 | + continue |
1886 | - angle = basepair_flat_angle(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) | 1922 | + measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) |
1887 | - if d != 0.0: | 1923 | + if measures is not None: |
1888 | - liste_dist.append([chain, type_LW[j], index, num_paired[j], d, angle[0], angle[1], angle[2], angle[3]]) | 1924 | + results.append([type_LW[j], index, res1, num_paired[j], res2] + measures) |
1889 | - except: | ||
1890 | - pass | ||
1891 | 1925 | ||
1892 | - return(liste_dist) | 1926 | + return results |
1893 | 1927 | ||
1894 | @trace_unhandled_exceptions | 1928 | @trace_unhandled_exceptions |
1895 | -def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=True) : | 1929 | +def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) : |
1896 | """ | 1930 | """ |
1897 | Plot Gaussian-Mixture-Model (with or without histograms) | 1931 | Plot Gaussian-Mixture-Model (with or without histograms) |
1898 | """ | 1932 | """ |
... | @@ -1906,8 +1940,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -1906,8 +1940,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
1906 | 1940 | ||
1907 | # chooses the number of components based on the maximum likelihood value (maxlogv) | 1941 | # chooses the number of components based on the maximum likelihood value (maxlogv) |
1908 | n_components_range = np.arange(8)+1 | 1942 | n_components_range = np.arange(8)+1 |
1909 | - aic = [] | 1943 | + # aic = [] |
1910 | - bic = [] | 1944 | + # bic = [] |
1911 | maxlogv=[] | 1945 | maxlogv=[] |
1912 | md = np.array(data).reshape(-1,1) | 1946 | md = np.array(data).reshape(-1,1) |
1913 | nb_components = 1 | 1947 | nb_components = 1 |
... | @@ -1915,8 +1949,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -1915,8 +1949,8 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
1915 | log_max = 0 | 1949 | log_max = 0 |
1916 | for n_comp in n_components_range: | 1950 | for n_comp in n_components_range: |
1917 | gmm = GaussianMixture(n_components=n_comp).fit(md) | 1951 | gmm = GaussianMixture(n_components=n_comp).fit(md) |
1918 | - aic.append(abs(gmm.aic(md))) | 1952 | + # aic.append(abs(gmm.aic(md))) |
1919 | - bic.append(abs(gmm.bic(md))) | 1953 | + # bic.append(abs(gmm.bic(md))) |
1920 | maxlogv.append(gmm.lower_bound_) | 1954 | maxlogv.append(gmm.lower_bound_) |
1921 | if gmm.lower_bound_== max(maxlogv) : # takes the maximum | 1955 | if gmm.lower_bound_== max(maxlogv) : # takes the maximum |
1922 | nb_components = n_comp | 1956 | nb_components = n_comp |
... | @@ -1962,10 +1996,10 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -1962,10 +1996,10 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
1962 | if hist: | 1996 | if hist: |
1963 | plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True) | 1997 | plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True) |
1964 | if toric: | 1998 | if toric: |
1965 | - plt.xlabel("Angle (Degré)") | 1999 | + plt.xlabel("Angle (Degrees)") |
1966 | else: | 2000 | else: |
1967 | - plt.xlabel("Distance (Angström)") | 2001 | + plt.xlabel("Distance (Angströms)") |
1968 | - plt.ylabel("Densité") | 2002 | + plt.ylabel("Density") |
1969 | 2003 | ||
1970 | # Prepare the GMM curve with some absciss points | 2004 | # Prepare the GMM curve with some absciss points |
1971 | if toric: | 2005 | if toric: |
... | @@ -1985,7 +2019,7 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -1985,7 +2019,7 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
1985 | summary_data["std"] = [] | 2019 | summary_data["std"] = [] |
1986 | 2020 | ||
1987 | # plot | 2021 | # plot |
1988 | - courbes = [] | 2022 | + curves = [] |
1989 | for i in range(nb_components): | 2023 | for i in range(nb_components): |
1990 | 2024 | ||
1991 | # store the parameters | 2025 | # store the parameters |
... | @@ -2022,25 +2056,25 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr | ... | @@ -2022,25 +2056,25 @@ def GMM_histo(data_ori, name_data, toric=False, hist=True, couleur=None, save=Tr |
2022 | plt.plot(newx, newy, c=colors[i]) | 2056 | plt.plot(newx, newy, c=colors[i]) |
2023 | else: | 2057 | else: |
2024 | # store for later summation | 2058 | # store for later summation |
2025 | - courbes.append(np.array(newy)) | 2059 | + curves.append(np.array(newy)) |
2026 | 2060 | ||
2027 | if hist: | 2061 | if hist: |
2028 | - plt.title("Histogramme " +name_data+ " avec GMM pour " +str(nb_components)+ " composantes (" + str(len(data_ori))+" valeurs)") | 2062 | + plt.title(f"Histogram of {name_data} with GMM of {nb_components} components (" + str(len(data_ori))+" values)") |
2029 | if save: | 2063 | if save: |
2030 | - plt.savefig("Histogramme " +name_data+ " avec GMM pour " +str(nb_components)+ " composantes (" + str(len(data_ori))+" valeurs).png") | 2064 | + plt.savefig(f"Histogram_{name_data}_{nb_components}_comps.png") |
2031 | plt.close() | 2065 | plt.close() |
2032 | else: | 2066 | else: |
2033 | # Plot their sum, do not save figure yet | 2067 | # Plot their sum, do not save figure yet |
2034 | try: | 2068 | try: |
2035 | - plt.plot(newx, sum(courbes), c=couleur, label=name_data) | 2069 | + plt.plot(newx, sum(curves), c=col, label=name_data) |
2036 | except TypeError: | 2070 | except TypeError: |
2037 | - print("N curves:", len(courbes)) | 2071 | + print("N curves:", len(curves)) |
2038 | - for c in courbes: | 2072 | + for c in curves: |
2039 | print(c) | 2073 | print(c) |
2040 | plt.legend() | 2074 | plt.legend() |
2041 | 2075 | ||
2042 | # Save the json | 2076 | # Save the json |
2043 | - with open(runDir + "/results/geometry/json/" +name_data + " .json", 'w', encoding='utf-8') as f: | 2077 | + with open(runDir + "/results/geometry/json/" +name_data + ".json", 'w', encoding='utf-8') as f: |
2044 | json.dump(summary_data, f, indent=4) | 2078 | json.dump(summary_data, f, indent=4) |
2045 | 2079 | ||
2046 | @trace_unhandled_exceptions | 2080 | @trace_unhandled_exceptions |
... | @@ -2122,25 +2156,25 @@ def gmm_aa_dists(): | ... | @@ -2122,25 +2156,25 @@ def gmm_aa_dists(): |
2122 | GMM_histo(c2p_o2p, "C2'-O2'") | 2156 | GMM_histo(c2p_o2p, "C2'-O2'") |
2123 | 2157 | ||
2124 | if len(op3_p) > 0 : | 2158 | if len(op3_p) > 0 : |
2125 | - GMM_histo(op3_p, "OP3-P", toric=False, hist=False, couleur= 'lightcoral') | 2159 | + GMM_histo(op3_p, "OP3-P", toric=False, hist=False, col= 'lightcoral') |
2126 | - GMM_histo(p_op1, "P-OP1", toric=False, hist=False, couleur='gold') | 2160 | + GMM_histo(p_op1, "P-OP1", toric=False, hist=False, col='gold') |
2127 | - GMM_histo(p_op2, "P-OP2", toric=False, hist=False, couleur='lightseagreen') | 2161 | + GMM_histo(p_op2, "P-OP2", toric=False, hist=False, col='lightseagreen') |
2128 | - GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, couleur='saddlebrown') | 2162 | + GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, col='saddlebrown') |
2129 | - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, couleur='darkturquoise') | 2163 | + GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkturquoise') |
2130 | - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, couleur='darkkhaki') | 2164 | + GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='darkkhaki') |
2131 | - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, couleur='indigo') | 2165 | + GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='indigo') |
2132 | - GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, couleur='maroon') | 2166 | + GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, col='maroon') |
2133 | - GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, couleur='burlywood') | 2167 | + GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, col='burlywood') |
2134 | - GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, couleur='steelblue') | 2168 | + GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, col='steelblue') |
2135 | - GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, couleur='tomato') | 2169 | + GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, col='tomato') |
2136 | - GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, couleur='darkolivegreen') | 2170 | + GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, col='darkolivegreen') |
2137 | - GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, couleur='orchid') | 2171 | + GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, col='orchid') |
2138 | - GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, couleur='deeppink') | 2172 | + GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, col='deeppink') |
2139 | axes=plt.gca() | 2173 | axes=plt.gca() |
2140 | axes.set_ylim(0, 100) | 2174 | axes.set_ylim(0, 100) |
2141 | - plt.xlabel("Distance (Angström)") | 2175 | + plt.xlabel("Distance (Angströms)") |
2142 | - plt.title("GMM des distances entre atomes communs ") | 2176 | + plt.title("GMM of distances between common atoms ") |
2143 | - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM des distances entre atomes communs .png") | 2177 | + plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM_distances_common_atoms.png") |
2144 | plt.close() | 2178 | plt.close() |
2145 | 2179 | ||
2146 | os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True) | 2180 | os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True) |
... | @@ -2161,25 +2195,25 @@ def gmm_aa_dists(): | ... | @@ -2161,25 +2195,25 @@ def gmm_aa_dists(): |
2161 | GMM_histo(c4_n9, "C4-N9") | 2195 | GMM_histo(c4_n9, "C4-N9") |
2162 | GMM_histo(c4_c5, "C4-C5") | 2196 | GMM_histo(c4_c5, "C4-C5") |
2163 | 2197 | ||
2164 | - GMM_histo(c1p_n9, "C1'-N9", hist=False, couleur='lightcoral') | 2198 | + GMM_histo(c1p_n9, "C1'-N9", hist=False, col='lightcoral') |
2165 | - GMM_histo(n9_c8, "N9-C8", hist=False, couleur='gold') | 2199 | + GMM_histo(n9_c8, "N9-C8", hist=False, col='gold') |
2166 | - GMM_histo(c8_n7, "C8-N7", hist=False, couleur='lightseagreen') | 2200 | + GMM_histo(c8_n7, "C8-N7", hist=False, col='lightseagreen') |
2167 | - GMM_histo(n7_c5, "N7-C5", hist=False, couleur='saddlebrown') | 2201 | + GMM_histo(n7_c5, "N7-C5", hist=False, col='saddlebrown') |
2168 | - GMM_histo(c5_c6, "C5-C6", hist=False, couleur='darkturquoise') | 2202 | + GMM_histo(c5_c6, "C5-C6", hist=False, col='darkturquoise') |
2169 | - GMM_histo(c6_o6, "C6-O6", hist=False, couleur='darkkhaki') | 2203 | + GMM_histo(c6_o6, "C6-O6", hist=False, col='darkkhaki') |
2170 | - GMM_histo(c6_n6, "C6-N6", hist=False, couleur='indigo') | 2204 | + GMM_histo(c6_n6, "C6-N6", hist=False, col='indigo') |
2171 | - GMM_histo(c6_n1, "C6-N1", hist=False, couleur='maroon') | 2205 | + GMM_histo(c6_n1, "C6-N1", hist=False, col='maroon') |
2172 | - GMM_histo(n1_c2, "N1-C2", hist=False, couleur='burlywood') | 2206 | + GMM_histo(n1_c2, "N1-C2", hist=False, col='burlywood') |
2173 | - GMM_histo(c2_n2, "C2-N2", hist=False, couleur='steelblue') | 2207 | + GMM_histo(c2_n2, "C2-N2", hist=False, col='steelblue') |
2174 | - GMM_histo(c2_n3, "C2-N3", hist=False, couleur='tomato') | 2208 | + GMM_histo(c2_n3, "C2-N3", hist=False, col='tomato') |
2175 | - GMM_histo(n3_c4, "N3-C4", hist=False, couleur='darkolivegreen') | 2209 | + GMM_histo(n3_c4, "N3-C4", hist=False, col='darkolivegreen') |
2176 | - GMM_histo(c4_n9, "C4-N9", hist=False, couleur='orchid') | 2210 | + GMM_histo(c4_n9, "C4-N9", hist=False, col='orchid') |
2177 | - GMM_histo(c4_c5, "C4-C5", hist=False, couleur='deeppink') | 2211 | + GMM_histo(c4_c5, "C4-C5", hist=False, col='deeppink') |
2178 | axes=plt.gca() | 2212 | axes=plt.gca() |
2179 | axes.set_ylim(0, 100) | 2213 | axes.set_ylim(0, 100) |
2180 | - plt.xlabel("Distance (Angström)") | 2214 | + plt.xlabel("Distance (Angströms)") |
2181 | - plt.title("GMM des distances entre atomes des cycles purines", fontsize=10) | 2215 | + plt.title("GMM of distances between atoms of the purine cycles", fontsize=10) |
2182 | - plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM des distances entre atomes des cycles purines.png") | 2216 | + plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM_distances_purine_cycles.png") |
2183 | plt.close() | 2217 | plt.close() |
2184 | 2218 | ||
2185 | os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True) | 2219 | os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True) |
... | @@ -2197,22 +2231,22 @@ def gmm_aa_dists(): | ... | @@ -2197,22 +2231,22 @@ def gmm_aa_dists(): |
2197 | GMM_histo(c4_n4, "C4-N4") | 2231 | GMM_histo(c4_n4, "C4-N4") |
2198 | GMM_histo(c4_o4, "C4-O4") | 2232 | GMM_histo(c4_o4, "C4-O4") |
2199 | 2233 | ||
2200 | - GMM_histo(c1p_n1, "C1'-N1", hist=False, couleur='lightcoral') | 2234 | + GMM_histo(c1p_n1, "C1'-N1", hist=False, col='lightcoral') |
2201 | - GMM_histo(n1_c6, "N1-C6", hist=False, couleur='gold') | 2235 | + GMM_histo(n1_c6, "N1-C6", hist=False, col='gold') |
2202 | - GMM_histo(c6_c5, "C6-C5", hist=False, couleur='lightseagreen') | 2236 | + GMM_histo(c6_c5, "C6-C5", hist=False, col='lightseagreen') |
2203 | - GMM_histo(c5_c4, "C5-C4", hist=False, couleur='deeppink') | 2237 | + GMM_histo(c5_c4, "C5-C4", hist=False, col='deeppink') |
2204 | - GMM_histo(c4_n3, "C4-N3", hist=False, couleur='red') | 2238 | + GMM_histo(c4_n3, "C4-N3", hist=False, col='red') |
2205 | - GMM_histo(n3_c2, "N3-C2", hist=False, couleur='lime') | 2239 | + GMM_histo(n3_c2, "N3-C2", hist=False, col='lime') |
2206 | - GMM_histo(c2_o2, "C2-O2", hist=False, couleur='indigo') | 2240 | + GMM_histo(c2_o2, "C2-O2", hist=False, col='indigo') |
2207 | - GMM_histo(c2_n1, "C2-N1", hist=False, couleur='maroon') | 2241 | + GMM_histo(c2_n1, "C2-N1", hist=False, col='maroon') |
2208 | - GMM_histo(c4_n4, "C4-N4", hist=False, couleur='burlywood') | 2242 | + GMM_histo(c4_n4, "C4-N4", hist=False, col='burlywood') |
2209 | - GMM_histo(c4_o4, "C4-O4", hist=False, couleur='steelblue') | 2243 | + GMM_histo(c4_o4, "C4-O4", hist=False, col='steelblue') |
2210 | axes=plt.gca() | 2244 | axes=plt.gca() |
2211 | #axes.set_xlim(1, 2) | 2245 | #axes.set_xlim(1, 2) |
2212 | axes.set_ylim(0, 100) | 2246 | axes.set_ylim(0, 100) |
2213 | - plt.xlabel("Distance (Angström)") | 2247 | + plt.xlabel("Distance (Angströms") |
2214 | - plt.title("GMM des distances entre atomes des cycles pyrimidines", fontsize=10) | 2248 | + plt.title("GMM of distances between atoms of the pyrimidine cycles", fontsize=10) |
2215 | - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM des distances entre atomes des cycles pyrimidines.png") | 2249 | + plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM_distances_pyrimidine_cycles.png") |
2216 | plt.close() | 2250 | plt.close() |
2217 | 2251 | ||
2218 | os.chdir(runDir) | 2252 | os.chdir(runDir) |
... | @@ -2268,16 +2302,16 @@ def gmm_aa_torsions(): | ... | @@ -2268,16 +2302,16 @@ def gmm_aa_torsions(): |
2268 | GMM_histo(zeta, "Zeta", toric=True) | 2302 | GMM_histo(zeta, "Zeta", toric=True) |
2269 | GMM_histo(chi, "Xhi", toric=True) | 2303 | GMM_histo(chi, "Xhi", toric=True) |
2270 | 2304 | ||
2271 | - GMM_histo(alpha, "Alpha", toric=True, hist=False, couleur='red') | 2305 | + GMM_histo(alpha, "Alpha", toric=True, hist=False, col='red') |
2272 | - GMM_histo(beta, "Beta", toric=True, hist=False, couleur='firebrick') | 2306 | + GMM_histo(beta, "Beta", toric=True, hist=False, col='firebrick') |
2273 | - GMM_histo(gamma, "Gamma", toric=True, hist=False, couleur='limegreen') | 2307 | + GMM_histo(gamma, "Gamma", toric=True, hist=False, col='limegreen') |
2274 | - GMM_histo(delta, "Delta", toric=True, hist=False, couleur='darkslateblue') | 2308 | + GMM_histo(delta, "Delta", toric=True, hist=False, col='darkslateblue') |
2275 | - GMM_histo(epsilon, "Epsilon", toric=True, hist=False, couleur='goldenrod') | 2309 | + GMM_histo(epsilon, "Epsilon", toric=True, hist=False, col='goldenrod') |
2276 | - GMM_histo(zeta, "Zeta", toric=True, hist=False, couleur='teal') | 2310 | + GMM_histo(zeta, "Zeta", toric=True, hist=False, col='teal') |
2277 | - GMM_histo(chi, "Xhi", toric=True, hist=False, couleur='hotpink') | 2311 | + GMM_histo(chi, "Xhi", toric=True, hist=False, col='hotpink') |
2278 | - plt.xlabel("Angle(Degré)") | 2312 | + plt.xlabel("Angle (Degrees)") |
2279 | - plt.title("GMM des angles de torsion") | 2313 | + plt.title("GMM of torsion angles") |
2280 | - plt.savefig("GMM des angles de torsion.png") | 2314 | + plt.savefig("GMM_torsions.png") |
2281 | plt.close() | 2315 | plt.close() |
2282 | 2316 | ||
2283 | os.chdir(runDir) | 2317 | os.chdir(runDir) |
... | @@ -2304,17 +2338,17 @@ def gmm_wadley(): | ... | @@ -2304,17 +2338,17 @@ def gmm_wadley(): |
2304 | GMM_histo(p_c1p, "P-C4'") | 2338 | GMM_histo(p_c1p, "P-C4'") |
2305 | GMM_histo(c1p_p, "C4'-P") | 2339 | GMM_histo(c1p_p, "C4'-P") |
2306 | 2340 | ||
2307 | - GMM_histo(p_c1p, "P-C4'", toric=False, hist=False, couleur='gold') | 2341 | + GMM_histo(p_c1p, "P-C4'", toric=False, hist=False, col='gold') |
2308 | - GMM_histo(c1p_p, "C4'-P", toric=False, hist=False, couleur='indigo') | 2342 | + GMM_histo(c1p_p, "C4'-P", toric=False, hist=False, col='indigo') |
2309 | - GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, couleur='firebrick') | 2343 | + GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, col='firebrick') |
2310 | - GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, couleur='seagreen') | 2344 | + GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, col='seagreen') |
2311 | - plt.xlabel("Distance(Angström)") | 2345 | + plt.xlabel("Distance (Angströms)") |
2312 | - plt.title("GMM des distances (Pyle model)") | 2346 | + plt.title("GMM of distances (Pyle model)") |
2313 | - plt.savefig("GMM des distances (Pyle model).png") | 2347 | + plt.savefig("GMM_distances_pyle_model.png") |
2314 | plt.close() | 2348 | plt.close() |
2315 | 2349 | ||
2316 | # Flat Angles | 2350 | # Flat Angles |
2317 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/angles_plans_wadley.csv")) | 2351 | + df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle.csv")) |
2318 | 2352 | ||
2319 | p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])]) | 2353 | p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])]) |
2320 | c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])]) | 2354 | c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])]) |
... | @@ -2326,11 +2360,11 @@ def gmm_wadley(): | ... | @@ -2326,11 +2360,11 @@ def gmm_wadley(): |
2326 | GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True) | 2360 | GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True) |
2327 | GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True) | 2361 | GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True) |
2328 | 2362 | ||
2329 | - GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, couleur='firebrick') | 2363 | + GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, col='firebrick') |
2330 | - GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, couleur='seagreen') | 2364 | + GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, col='seagreen') |
2331 | - plt.xlabel("Angle(Degré)") | 2365 | + plt.xlabel("Angle (Degrees)") |
2332 | - plt.title("GMM des angles plans (Pyle model)") | 2366 | + plt.title("GMM of flat angles (Pyle model)") |
2333 | - plt.savefig("GMM des angles plans (Pyle model).png") | 2367 | + plt.savefig("GMM_flat_angles_pyle_model.png") |
2334 | plt.close() | 2368 | plt.close() |
2335 | 2369 | ||
2336 | # Torsion anfles | 2370 | # Torsion anfles |
... | @@ -2367,15 +2401,15 @@ def gmm_wadley(): | ... | @@ -2367,15 +2401,15 @@ def gmm_wadley(): |
2367 | GMM_histo(eta_base, "Eta''", toric=True) | 2401 | GMM_histo(eta_base, "Eta''", toric=True) |
2368 | GMM_histo(theta_base, "Theta''", toric=True) | 2402 | GMM_histo(theta_base, "Theta''", toric=True) |
2369 | 2403 | ||
2370 | - GMM_histo(eta, "Eta", toric=True, hist=False, couleur='mediumaquamarine') | 2404 | + GMM_histo(eta, "Eta", toric=True, hist=False, col='mediumaquamarine') |
2371 | - GMM_histo(theta, "Theta", toric=True, hist=False, couleur='darkorchid') | 2405 | + GMM_histo(theta, "Theta", toric=True, hist=False, col='darkorchid') |
2372 | - GMM_histo(eta_prime, "Eta'", toric=True, hist=False, couleur='cyan') | 2406 | + GMM_histo(eta_prime, "Eta'", toric=True, hist=False, col='cyan') |
2373 | - GMM_histo(theta_prime, "Theta'", toric=True, hist=False, couleur='crimson') | 2407 | + GMM_histo(theta_prime, "Theta'", toric=True, hist=False, col='crimson') |
2374 | - GMM_histo(eta_base, "Eta''", toric=True, hist=False, couleur='royalblue') | 2408 | + GMM_histo(eta_base, "Eta''", toric=True, hist=False, col='royalblue') |
2375 | - GMM_histo(theta_base, "Theta''", toric=True, hist=False, couleur='palevioletred') | 2409 | + GMM_histo(theta_base, "Theta''", toric=True, hist=False, col='palevioletred') |
2376 | - plt.xlabel("Angle(Degré)") | 2410 | + plt.xlabel("Angle (Degrees)") |
2377 | - plt.title("GMM des angles de pseudotorsion") | 2411 | + plt.title("GMM of pseudo-torsion angles (Pyle Model)") |
2378 | - plt.savefig("GMM des angles de pseudotorsion.png") | 2412 | + plt.savefig("GMM_pseudotorsion_angles_pyle_model.png") |
2379 | plt.close() | 2413 | plt.close() |
2380 | 2414 | ||
2381 | os.chdir(runDir) | 2415 | os.chdir(runDir) |
... | @@ -2411,18 +2445,18 @@ def gmm_hrna(): | ... | @@ -2411,18 +2445,18 @@ def gmm_hrna(): |
2411 | GMM_histo(p_o5p, "P-O5'") | 2445 | GMM_histo(p_o5p, "P-O5'") |
2412 | GMM_histo(last_c4p_p, "C4'-P") | 2446 | GMM_histo(last_c4p_p, "C4'-P") |
2413 | 2447 | ||
2414 | - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, couleur='lightcoral') | 2448 | + GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='lightcoral') |
2415 | - GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, couleur='limegreen') | 2449 | + GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, col='limegreen') |
2416 | - GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, couleur='tomato') | 2450 | + GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, col='tomato') |
2417 | - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, couleur='aquamarine') | 2451 | + GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='aquamarine') |
2418 | - GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, couleur='goldenrod') | 2452 | + GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, col='goldenrod') |
2419 | - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, couleur='darkcyan') | 2453 | + GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkcyan') |
2420 | - GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, couleur='deeppink') | 2454 | + GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, col='deeppink') |
2421 | axes = plt.gca() | 2455 | axes = plt.gca() |
2422 | axes.set_ylim(0, 100) | 2456 | axes.set_ylim(0, 100) |
2423 | - plt.xlabel("Distance (Angström)") | 2457 | + plt.xlabel("Distance (Angströms)") |
2424 | - plt.title("GMM des distances entre atomes HiRE-RNA") | 2458 | + plt.title("GMM of distances between HiRE-RNA beads") |
2425 | - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM des distances entre atomes HiRE-RNA.png") | 2459 | + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM_distances_HiRE_RNA.png") |
2426 | plt.close() | 2460 | plt.close() |
2427 | 2461 | ||
2428 | # Angles | 2462 | # Angles |
... | @@ -2449,19 +2483,19 @@ def gmm_hrna(): | ... | @@ -2449,19 +2483,19 @@ def gmm_hrna(): |
2449 | GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True) | 2483 | GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True) |
2450 | GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True) | 2484 | GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True) |
2451 | 2485 | ||
2452 | - GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, couleur='lightcoral') | 2486 | + GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, col='lightcoral') |
2453 | - GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, couleur='limegreen') | 2487 | + GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, col='limegreen') |
2454 | - GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, couleur='tomato') | 2488 | + GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, col='tomato') |
2455 | - GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, couleur='aquamarine') | 2489 | + GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, col='aquamarine') |
2456 | - GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, couleur='goldenrod') | 2490 | + GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, col='goldenrod') |
2457 | - GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, couleur='darkcyan') | 2491 | + GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, col='darkcyan') |
2458 | - GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, couleur='deeppink') | 2492 | + GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, col='deeppink') |
2459 | - GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, couleur='indigo') | 2493 | + GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, col='indigo') |
2460 | axes = plt.gca() | 2494 | axes = plt.gca() |
2461 | axes.set_ylim(0, 100) | 2495 | axes.set_ylim(0, 100) |
2462 | - plt.xlabel("Angle (Degré)") | 2496 | + plt.xlabel("Angle (Degres)") |
2463 | - plt.title("GMM des angles entre atomes HiRE-RNA") | 2497 | + plt.title("GMM of angles between HiRE-RNA beads") |
2464 | - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM des angles entre atomes HiRE-RNA.png") | 2498 | + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM_angles_HiRE_RNA.png") |
2465 | plt.close() | 2499 | plt.close() |
2466 | 2500 | ||
2467 | # Torsions | 2501 | # Torsions |
... | @@ -2488,24 +2522,24 @@ def gmm_hrna(): | ... | @@ -2488,24 +2522,24 @@ def gmm_hrna(): |
2488 | GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True) | 2522 | GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True) |
2489 | GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True) | 2523 | GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True) |
2490 | 2524 | ||
2491 | - GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, couleur='darkred') | 2525 | + GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, col='darkred') |
2492 | - GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, couleur='chocolate') | 2526 | + GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, col='chocolate') |
2493 | - GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, couleur='mediumvioletred') | 2527 | + GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, col='mediumvioletred') |
2494 | - GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, couleur='cadetblue') | 2528 | + GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, col='cadetblue') |
2495 | - GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, couleur='darkkhaki') | 2529 | + GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, col='darkkhaki') |
2496 | - GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, couleur='springgreen') | 2530 | + GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, col='springgreen') |
2497 | - GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, couleur='indigo') | 2531 | + GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, col='indigo') |
2498 | - GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, couleur='gold') | 2532 | + GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, col='gold') |
2499 | - plt.xlabel("Angle(Degré)") | 2533 | + plt.xlabel("Angle (Degrees)") |
2500 | - plt.title("GMM des angles de torsion (hire-RNA)") | 2534 | + plt.title("GMM of torsion angles between HiRE-RNA beads") |
2501 | - plt.savefig("GMM des angles de torsion (hire-RNA).png") | 2535 | + plt.savefig("GMM_torsions_HiRE_RNA.png") |
2502 | plt.close() | 2536 | plt.close() |
2503 | 2537 | ||
2504 | os.chdir(runDir) | 2538 | os.chdir(runDir) |
2505 | setproctitle("GMM (HiRE-RNA) finished") | 2539 | setproctitle("GMM (HiRE-RNA) finished") |
2506 | 2540 | ||
2507 | @trace_unhandled_exceptions | 2541 | @trace_unhandled_exceptions |
2508 | -def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance): | 2542 | +def gmm_hrna_basepair_type(type_LW, ntpair, data): |
2509 | """ | 2543 | """ |
2510 | function to plot the statistical figures you want | 2544 | function to plot the statistical figures you want |
2511 | By type of pairing: | 2545 | By type of pairing: |
... | @@ -2520,197 +2554,117 @@ def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance | ... | @@ -2520,197 +2554,117 @@ def gmm_hrna_basepair_type(type_LW, angle_1, angle_2, angle_3, angle_4, distance |
2520 | plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5) | 2554 | plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5) |
2521 | 2555 | ||
2522 | plt.subplot(2, 1, 1) | 2556 | plt.subplot(2, 1, 1) |
2523 | - | 2557 | + GMM_histo(data["211_angle"], f"{type_LW}_{ntpair}_C1'-B1-B1pair", toric=True, hist=False, col='cyan' ) |
2524 | - if len(angle_1) > 0 : | 2558 | + GMM_histo(data["112_angle"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair", toric=True, hist=False, col='magenta') |
2525 | - GMM_histo(angle_1, "C4'-C1'-B1", toric=True, hist=False, couleur='cyan' ) | 2559 | + GMM_histo(data["3211_torsion"], f"{type_LW}_{ntpair}_C4'-C1'-B1-B1pair", toric=True, hist=False, col='black' ) |
2526 | - if len(angle_2) > 0 : | 2560 | + GMM_histo(data["1123_torsion"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair-C4'pair", toric=True, hist=False, col='maroon') |
2527 | - GMM_histo(angle_2, "C1'-B1-B1pair", toric=True, hist=False, couleur='magenta') | 2561 | + GMM_histo(data["alpha1"], f"{type_LW}_{ntpair}_alpha_1", toric=True, hist=False, col="yellow") |
2528 | - if len(angle_3) > 0 : | 2562 | + GMM_histo(data["alpha2"], f"{type_LW}_{ntpair}_alpha_2", toric=True, hist=False, col='olive') |
2529 | - GMM_histo(angle_3, "B1-B1pair-C1'pair", toric=True, hist=False, couleur="yellow") | 2563 | + plt.xlabel("Angle (degree)") |
2530 | - if len(angle_4) > 0 : | 2564 | + plt.title(f"GMM of plane angles for {type_LW} {ntpair} basepairs", fontsize=10) |
2531 | - GMM_histo(angle_4, "B1pair-C1'pair-C4'pair", toric=True, hist=False, couleur='olive') | ||
2532 | - plt.xlabel("Angle(degré)") | ||
2533 | - plt.title("GMM des angles plans pour les measure_hrna_basepairs " +type_LW , fontsize=10) | ||
2534 | 2565 | ||
2535 | plt.subplot(2, 1, 2) | 2566 | plt.subplot(2, 1, 2) |
2536 | - if len(distance)>0 : | 2567 | + GMM_histo(data["Distance"], f"Distance between {type_LW} {ntpair} tips", toric=False, hist=False, col="cyan") |
2537 | - GMM_histo(distance, "Distance pointes " + type_LW, save=False) | 2568 | + GMM_histo(data["dB1"], f"{type_LW} {ntpair} dB1", toric=False, hist=False, col="tomato") |
2569 | + GMM_histo(data["dB2"], f"{type_LW} {ntpair} dB2", toric=False, hist=False, col="goldenrod") | ||
2570 | + plt.xlabel("Distance (Angströms)") | ||
2571 | + plt.title(f"GMM of distances for {type_LW} {ntpair} basepairs", fontsize=10) | ||
2538 | 2572 | ||
2539 | - plt.savefig("Mesures measure_hrna_basepairs " +type_LW+ ".png" ) | 2573 | + plt.savefig(f"{type_LW}_{ntpair}_basepairs.png" ) |
2540 | plt.close() | 2574 | plt.close() |
2541 | - setproctitle(f"GMM (HiRE-RNA {type_LW} basepairs) finished") | 2575 | + setproctitle(f"GMM (HiRE-RNA {type_LW} {ntpair} basepairs) finished") |
2542 | 2576 | ||
2543 | @trace_unhandled_exceptions | 2577 | @trace_unhandled_exceptions |
2544 | def gmm_hrna_basepairs(): | 2578 | def gmm_hrna_basepairs(): |
2545 | 2579 | ||
2546 | setproctitle("GMM (HiRE-RNA basepairs)") | 2580 | setproctitle("GMM (HiRE-RNA basepairs)") |
2547 | 2581 | ||
2548 | - df=pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv")) | 2582 | + df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv")) |
2549 | - | 2583 | + |
2550 | - cWW=df[df['type LW']=='cWW'] | 2584 | + lw = ["cWW", "tWW", "cWH", "tWH", "cHW", "tHW", "cWS", "tWS", "cSW", "tSW", "cHH", "tHH", "cSH", "tSH", "cHS", "tHS", "cSS", "tSS"] |
2551 | - cWW_dist=list(cWW["Distance"]) | ||
2552 | - cWW_angle_1=list(cWW["C4'-C1'-B1"]) | ||
2553 | - cWW_angle_2=list(cWW["C1'-B1-B1pair"]) | ||
2554 | - cWW_angle_3=list(cWW["B1-B1pair-C1'pair"]) | ||
2555 | - cWW_angle_4=list(cWW["B1pair-C1'pair-C4'pair"]) | ||
2556 | - tWW=df[df['type LW']=='tWW'] | ||
2557 | - tWW_dist=list(tWW["Distance"]) | ||
2558 | - tWW_angle_1=list(tWW["C4'-C1'-B1"]) | ||
2559 | - tWW_angle_2=list(tWW["C1'-B1-B1pair"]) | ||
2560 | - tWW_angle_3=list(tWW["B1-B1pair-C1'pair"]) | ||
2561 | - tWW_angle_4=list(tWW["B1pair-C1'pair-C4'pair"]) | ||
2562 | - cWH=df[df['type LW']=='cWH'] | ||
2563 | - cWH_dist=list(cWH["Distance"]) | ||
2564 | - cWH_angle_1=list(cWH["C4'-C1'-B1"]) | ||
2565 | - cWH_angle_2=list(cWH["C1'-B1-B1pair"]) | ||
2566 | - cWH_angle_3=list(cWH["B1-B1pair-C1'pair"]) | ||
2567 | - cWH_angle_4=list(cWH["B1pair-C1'pair-C4'pair"]) | ||
2568 | - tWH=df[df['type LW']=='tWH'] | ||
2569 | - tWH_dist=list(tWH["Distance"]) | ||
2570 | - tWH_angle_1=list(tWH["C4'-C1'-B1"]) | ||
2571 | - tWH_angle_2=list(tWH["C1'-B1-B1pair"]) | ||
2572 | - tWH_angle_3=list(tWH["B1-B1pair-C1'pair"]) | ||
2573 | - tWH_angle_4=list(tWH["B1pair-C1'pair-C4'pair"]) | ||
2574 | - cHW=df[df['type LW']=='cHW'] | ||
2575 | - cHW_dist=list(cHW["Distance"]) | ||
2576 | - cHW_angle_1=list(cHW["C4'-C1'-B1"]) | ||
2577 | - cHW_angle_2=list(cHW["C1'-B1-B1pair"]) | ||
2578 | - cHW_angle_3=list(cHW["B1-B1pair-C1'pair"]) | ||
2579 | - cHW_angle_4=list(cHW["B1pair-C1'pair-C4'pair"]) | ||
2580 | - tHW=df[df['type LW']=='tHW'] | ||
2581 | - tHW_dist=list(tHW["Distance"]) | ||
2582 | - tHW_angle_1=list(tHW["C4'-C1'-B1"]) | ||
2583 | - tHW_angle_2=list(tHW["C1'-B1-B1pair"]) | ||
2584 | - tHW_angle_3=list(tHW["B1-B1pair-C1'pair"]) | ||
2585 | - tHW_angle_4=list(tHW["B1pair-C1'pair-C4'pair"]) | ||
2586 | - cWS=df[df['type LW']=='cWS'] | ||
2587 | - cWS_dist=list(cWS["Distance"]) | ||
2588 | - cWS_angle_1=list(cWS["C4'-C1'-B1"]) | ||
2589 | - cWS_angle_2=list(cWS["C1'-B1-B1pair"]) | ||
2590 | - cWS_angle_3=list(cWS["B1-B1pair-C1'pair"]) | ||
2591 | - cWS_angle_4=list(cWS["B1pair-C1'pair-C4'pair"]) | ||
2592 | - tWS=df[df['type LW']=='tWS'] | ||
2593 | - tWS_dist=list(tWS["Distance"]) | ||
2594 | - tWS_angle_1=list(tWS["C4'-C1'-B1"]) | ||
2595 | - tWS_angle_2=list(tWS["C1'-B1-B1pair"]) | ||
2596 | - tWS_angle_3=list(tWS["B1-B1pair-C1'pair"]) | ||
2597 | - tWS_angle_4=list(tWS["B1pair-C1'pair-C4'pair"]) | ||
2598 | - cSW=df[df['type LW']=='cSW'] | ||
2599 | - cSW_dist=list(cSW["Distance"]) | ||
2600 | - cSW_angle_1=list(cSW["C4'-C1'-B1"]) | ||
2601 | - cSW_angle_2=list(cSW["C1'-B1-B1pair"]) | ||
2602 | - cSW_angle_3=list(cSW["B1-B1pair-C1'pair"]) | ||
2603 | - cSW_angle_4=list(cSW["B1pair-C1'pair-C4'pair"]) | ||
2604 | - tSW=df[df['type LW']=='tSW'] | ||
2605 | - tSW_dist=list(tSW["Distance"]) | ||
2606 | - tSW_angle_1=list(tSW["C4'-C1'-B1"]) | ||
2607 | - tSW_angle_2=list(tSW["C1'-B1-B1pair"]) | ||
2608 | - tSW_angle_3=list(tSW["B1-B1pair-C1'pair"]) | ||
2609 | - tSW_angle_4=list(tSW["B1pair-C1'pair-C4'pair"]) | ||
2610 | - cHH=df[df['type LW']=='cHH'] | ||
2611 | - cHH_dist=list(cHH["Distance"]) | ||
2612 | - cHH_angle_1=list(cHH["C4'-C1'-B1"]) | ||
2613 | - cHH_angle_2=list(cHH["C1'-B1-B1pair"]) | ||
2614 | - cHH_angle_3=list(cHH["B1-B1pair-C1'pair"]) | ||
2615 | - cHH_angle_4=list(cHH["B1pair-C1'pair-C4'pair"]) | ||
2616 | - tHH=df[df['type LW']=='tHH'] | ||
2617 | - tHH_dist=list(tHH["Distance"]) | ||
2618 | - tHH_angle_1=list(tHH["C4'-C1'-B1"]) | ||
2619 | - tHH_angle_2=list(tHH["C1'-B1-B1pair"]) | ||
2620 | - tHH_angle_3=list(tHH["B1-B1pair-C1'pair"]) | ||
2621 | - tHH_angle_4=list(tHH["B1pair-C1'pair-C4'pair"]) | ||
2622 | - cSH=df[df['type LW']=='cSH'] | ||
2623 | - cSH_dist=list(cSH["Distance"]) | ||
2624 | - cSH_angle_1=list(cSH["C4'-C1'-B1"]) | ||
2625 | - cSH_angle_2=list(cSH["C1'-B1-B1pair"]) | ||
2626 | - cSH_angle_3=list(cSH["B1-B1pair-C1'pair"]) | ||
2627 | - cSH_angle_4=list(cSH["B1pair-C1'pair-C4'pair"]) | ||
2628 | - tSH=df[df['type LW']=='tSH'] | ||
2629 | - tSH_dist=list(tSH["Distance"]) | ||
2630 | - tSH_angle_1=list(tSH["C4'-C1'-B1"]) | ||
2631 | - tSH_angle_2=list(tSH["C1'-B1-B1pair"]) | ||
2632 | - tSH_angle_3=list(tSH["B1-B1pair-C1'pair"]) | ||
2633 | - tSH_angle_4=list(tSH["B1pair-C1'pair-C4'pair"]) | ||
2634 | - cHS=df[df['type LW']=='cHS'] | ||
2635 | - cHS_dist=list(cHS["Distance"]) | ||
2636 | - cHS_angle_1=list(cHS["C4'-C1'-B1"]) | ||
2637 | - cHS_angle_2=list(cHS["C1'-B1-B1pair"]) | ||
2638 | - cHS_angle_3=list(cHS["B1-B1pair-C1'pair"]) | ||
2639 | - cHS_angle_4=list(cHS["B1pair-C1'pair-C4'pair"]) | ||
2640 | - tHS=df[df['type LW']=='tHS'] | ||
2641 | - tHS_dist=list(tHS["Distance"]) | ||
2642 | - tHS_angle_1=list(tHS["C4'-C1'-B1"]) | ||
2643 | - tHS_angle_2=list(tHS["C1'-B1-B1pair"]) | ||
2644 | - tHS_angle_3=list(tHS["B1-B1pair-C1'pair"]) | ||
2645 | - tHS_angle_4=list(tHS["B1pair-C1'pair-C4'pair"]) | ||
2646 | - cSS=df[df['type LW']=='cSS'] | ||
2647 | - cSS_dist=list(cSS["Distance"]) | ||
2648 | - cSS_angle_1=list(cSS["C4'-C1'-B1"]) | ||
2649 | - cSS_angle_2=list(cSS["C1'-B1-B1pair"]) | ||
2650 | - cSS_angle_3=list(cSS["B1-B1pair-C1'pair"]) | ||
2651 | - cSS_angle_4=list(cSS["B1pair-C1'pair-C4'pair"]) | ||
2652 | - tSS=df[df['type LW']=='tSS'] | ||
2653 | - tSS_dist=list(tSS["Distance"]) | ||
2654 | - tSS_angle_1=list(tSS["C4'-C1'-B1"]) | ||
2655 | - tSS_angle_2=list(tSS["C1'-B1-B1pair"]) | ||
2656 | - tSS_angle_3=list(tSS["B1-B1pair-C1'pair"]) | ||
2657 | - tSS_angle_4=list(tSS["B1pair-C1'pair-C4'pair"]) | ||
2658 | 2585 | ||
2659 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) | 2586 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) |
2660 | os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/") | 2587 | os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/") |
2661 | 2588 | ||
2662 | - gmm_hrna_basepair_type('cWW', cWW_angle_1, cWW_angle_2, cWW_angle_3, cWW_angle_4, cWW_dist) | 2589 | + for lw_type in lw: |
2663 | - gmm_hrna_basepair_type('tWW', tWW_angle_1, tWW_angle_2, tWW_angle_3, tWW_angle_4, tWW_dist) | 2590 | + data = df[df['type_LW'] == lw_type ] |
2664 | - gmm_hrna_basepair_type('cWH', cWH_angle_1, cWH_angle_2, cWH_angle_3, cWH_angle_4, cWH_dist) | 2591 | + if len(data): |
2665 | - gmm_hrna_basepair_type('tWH', tWH_angle_1, tWH_angle_2, tWH_angle_3, tWH_angle_4, tWH_dist) | 2592 | + for b1 in ['A','C','G','U']: |
2666 | - gmm_hrna_basepair_type('cHW', cHW_angle_1, cHW_angle_2, cHW_angle_3, cHW_angle_4, cHW_dist) | 2593 | + for b2 in ['A','C','G','U']: |
2667 | - gmm_hrna_basepair_type('tHW', tHW_angle_1, tHW_angle_2, tHW_angle_3, tHW_angle_4, tHW_dist) | 2594 | + thisbases = data[(data.nt1_res == b1)&(data.nt2_res == b2)] |
2668 | - gmm_hrna_basepair_type('tWS', tWS_angle_1, tWS_angle_2, tWS_angle_3, tWS_angle_4, tWS_dist) | 2595 | + if len(thisbases): |
2669 | - gmm_hrna_basepair_type('cWS', cWS_angle_1, cWS_angle_2, cWS_angle_3, cWS_angle_4, cWS_dist) | 2596 | + gmm_hrna_basepair_type(lw_type, b1+b2, thisbases) |
2670 | - gmm_hrna_basepair_type('tSW', tSW_angle_1, tSW_angle_2, tSW_angle_3, tSW_angle_4, tSW_dist) | 2597 | + |
2671 | - gmm_hrna_basepair_type('cSW', cSW_angle_1, cSW_angle_2, cSW_angle_3, cSW_angle_4, cSW_dist) | 2598 | + # colors = ['lightcoral', "lightseagreen", "black", "goldenrod", "olive", "steelblue", "silver", "deeppink", "navy", |
2672 | - gmm_hrna_basepair_type('cHH', cHH_angle_1, cHH_angle_2, cHH_angle_3, cHH_angle_4, cHH_dist) | 2599 | + # "sienna", "maroon", "orange", "mediumaquamarine", "tomato", "indigo", "orchid", "tan", "lime"] |
2673 | - gmm_hrna_basepair_type('tHH', tHH_angle_1, tHH_angle_2, tHH_angle_3, tHH_angle_4, tHH_dist) | 2600 | + # for lw_type, col in zip(lw, colors): |
2674 | - gmm_hrna_basepair_type('cSH', cSH_angle_1, cSH_angle_2, cSH_angle_3, cSH_angle_4, cSH_dist) | 2601 | + # data = df[df['type LW'] == lw_type] |
2675 | - gmm_hrna_basepair_type('tSH', tSH_angle_1, tSH_angle_2, tSH_angle_3, tSH_angle_4, tSH_dist) | 2602 | + # GMM_histo(data.Distance, lw_type, toric=False, hist=False, col=col) |
2676 | - gmm_hrna_basepair_type('cHS', cHS_angle_1, cHS_angle_2, cHS_angle_3, cHS_angle_4, cHS_dist) | 2603 | + # plt.xlabel('Distance (Angströms)') |
2677 | - gmm_hrna_basepair_type('tHS', tHS_angle_1, tHS_angle_2, tHS_angle_3, tHS_angle_4, tHS_dist) | 2604 | + # plt.title("GMM of distances between base tips ("+str(nt)+ " values)", fontsize=8) |
2678 | - gmm_hrna_basepair_type('cSS', cSS_angle_1, cSS_angle_2, cSS_angle_3, cSS_angle_4, cSS_dist) | 2605 | + # plt.savefig("distances_between_tips.png") |
2679 | - gmm_hrna_basepair_type('tSS', tSS_angle_1, tSS_angle_2, tSS_angle_3, tSS_angle_4, tSS_dist) | 2606 | + # plt.close() |
2680 | - | ||
2681 | - nc=len(cWW)+len(cHH)+len(cSS)+len(cWH)+len(cHW)+len(cWS)+len(cSW)+len(cHS)+len(cSH) | ||
2682 | - GMM_histo(cWW_dist, "cWW", toric=False, hist=False, couleur='lightcoral') | ||
2683 | - GMM_histo(cHH_dist, "cHH", toric=False, hist=False, couleur='lightseagreen') | ||
2684 | - GMM_histo(cSS_dist, "cSS", toric=False, hist=False, couleur='black') | ||
2685 | - GMM_histo(cWH_dist, "cWH", toric=False, hist=False, couleur='goldenrod') | ||
2686 | - GMM_histo(cHW_dist, "cHW", toric=False, hist=False, couleur='olive') | ||
2687 | - GMM_histo(cWS_dist, "cWS", toric=False, hist=False, couleur='steelblue') | ||
2688 | - GMM_histo(cSW_dist, "cSW", toric=False, hist=False, couleur='silver') | ||
2689 | - GMM_histo(cHS_dist, "cHS", toric=False, hist=False, couleur='deeppink') | ||
2690 | - GMM_histo(cSH_dist, "cSH", toric=False, hist=False, couleur='navy') | ||
2691 | - plt.xlabel('Distance (Angström)') | ||
2692 | - plt.title("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs cis ("+str(nc)+ " valeurs)", fontsize=8) | ||
2693 | - plt.savefig("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs cis (" +str(nc)+ " valeurs).png") | ||
2694 | - plt.close() | ||
2695 | - | ||
2696 | - nt=len(tWW)+len(tHH)+len(tSS)+len(tWH)+len(tHW)+len(tWS)+len(tSW)+len(tHS)+len(tSH) | ||
2697 | - GMM_histo(tWW_dist, "tWW", toric=False, hist=False, couleur='sienna') | ||
2698 | - GMM_histo(tHH_dist, "tHH", toric=False, hist=False, couleur='maroon') | ||
2699 | - GMM_histo(tSS_dist, "tSS", toric=False, hist=False, couleur='orange') | ||
2700 | - GMM_histo(tWH_dist, "tWH", toric=False, hist=False, couleur='mediumaquamarine') | ||
2701 | - GMM_histo(tHW_dist, "tHW", toric=False, hist=False, couleur='tomato') | ||
2702 | - GMM_histo(tWS_dist, "tWS", toric=False, hist=False, couleur='indigo') | ||
2703 | - GMM_histo(tSW_dist, "tSW", toric=False, hist=False, couleur='orchid') | ||
2704 | - GMM_histo(tHS_dist, "tHS", toric=False, hist=False, couleur='tan') | ||
2705 | - GMM_histo(tSH_dist, "tSH", toric=False, hist=False, couleur='lime') | ||
2706 | - plt.xlabel('Distance (Angström)') | ||
2707 | - plt.title("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs trans ("+str(nt)+ " valeurs)", fontsize=8) | ||
2708 | - plt.savefig("GMM des distances entre pointes des nucléotides pour les measure_hrna_basepairs trans (" +str(nt)+ " valeurs).png") | ||
2709 | - plt.close() | ||
2710 | 2607 | ||
2711 | os.chdir(runDir) | 2608 | os.chdir(runDir) |
2712 | setproctitle(f"GMM (HiRE-RNA basepairs) finished") | 2609 | setproctitle(f"GMM (HiRE-RNA basepairs) finished") |
2713 | 2610 | ||
2611 | +def merge_jsons(): | ||
2612 | + # All atom distances | ||
2613 | + bonds = ["O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'", "C1'-N9", | ||
2614 | + "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5", | ||
2615 | + "C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"] | ||
2616 | + bonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in bonds ] | ||
2617 | + concat_jsons(bonds, runDir + "/results/geometry/json/all_atom_distances.json") | ||
2618 | + | ||
2619 | + | ||
2620 | + # All atom torsions | ||
2621 | + torsions = ["Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Xhi", "Zeta"] | ||
2622 | + torsions = [ runDir + "/results/geometry/json/" + x + ".json" for x in torsions ] | ||
2623 | + concat_jsons(torsions, runDir + "/results/geometry/json/all_atom_torsions.json") | ||
2624 | + | ||
2625 | + # HiRE-RNA distances | ||
2626 | + hrnabonds = ["P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2", "C4'-P"] | ||
2627 | + hrnabonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnabonds ] | ||
2628 | + concat_jsons(hrnabonds, runDir + "/results/geometry/json/hirerna_distances.json") | ||
2629 | + | ||
2630 | + # HiRE-RNA angles | ||
2631 | + hrnaangles = ["P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2", "C4'-P-O5'", "C5'-C4'-P", "C1'-C4'-P"] | ||
2632 | + hrnaangles = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnaangles ] | ||
2633 | + concat_jsons(hrnaangles, runDir + "/results/geometry/json/hirerna_angles.json") | ||
2634 | + | ||
2635 | + # HiRE-RNA torsions | ||
2636 | + hrnators = ["P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "C4'-P°-O5'°-C5'°", "C5'-C4'-P°-O5'°", "C1'-C4'-P°-O5'°", "O5'-C5'-C4'-P°"] | ||
2637 | + hrnators = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnators ] | ||
2638 | + concat_jsons(hrnators, runDir + "/results/geometry/json/hirerna_torsions.json") | ||
2639 | + | ||
2640 | + # HiRE-RNA basepairs | ||
2641 | + for nt1 in ['A', 'C', 'G', 'U']: | ||
2642 | + for nt2 in ['A', 'C', 'G', 'U']: | ||
2643 | + bps = glob.glob(runDir + f"/results/geometry/json/*{nt1}{nt2}*.json") | ||
2644 | + concat_jsons(bps, runDir + f"/results/geometry/json/hirerna_{nt1}{nt2}_basepairs.json") | ||
2645 | + | ||
2646 | + # Delete previous files | ||
2647 | + for f in bonds + torsions + hrnabonds + hrnaangles + hrnators: | ||
2648 | + try: | ||
2649 | + os.remove(f) | ||
2650 | + except FileNotFoundError: | ||
2651 | + pass | ||
2652 | + for f in glob.glob(runDir + "/results/geometry/json/t*.json"): | ||
2653 | + try: | ||
2654 | + os.remove(f) | ||
2655 | + except FileNotFoundError: | ||
2656 | + pass | ||
2657 | + for f in glob.glob(runDir + "/results/geometry/json/c*.json"): | ||
2658 | + try: | ||
2659 | + os.remove(f) | ||
2660 | + except FileNotFoundError: | ||
2661 | + pass | ||
2662 | + for f in glob.glob(runDir + "/results/geometry/json/Distance*.json"): | ||
2663 | + try: | ||
2664 | + os.remove(f) | ||
2665 | + except FileNotFoundError: | ||
2666 | + pass | ||
2667 | + | ||
2714 | @trace_unhandled_exceptions | 2668 | @trace_unhandled_exceptions |
2715 | def concat_dataframes(fpath, outfilename): | 2669 | def concat_dataframes(fpath, outfilename): |
2716 | """ | 2670 | """ |
... | @@ -2735,6 +2689,23 @@ def concat_dataframes(fpath, outfilename): | ... | @@ -2735,6 +2689,23 @@ def concat_dataframes(fpath, outfilename): |
2735 | idxQueue.put(thr_idx) # replace the thread index in the queue | 2689 | idxQueue.put(thr_idx) # replace the thread index in the queue |
2736 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | 2690 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") |
2737 | 2691 | ||
2692 | +def concat_jsons(flist, outfilename): | ||
2693 | + """ | ||
2694 | + Reads JSON files computed by the geometry jobs and merge them into a smaller | ||
2695 | + number of files | ||
2696 | + """ | ||
2697 | + | ||
2698 | + result = [] | ||
2699 | + for f in flist: | ||
2700 | + # if not path.isfile(f): | ||
2701 | + # continue: | ||
2702 | + with open(f, "rb") as infile: | ||
2703 | + result.append(json.load(infile)) | ||
2704 | + | ||
2705 | + # write the files | ||
2706 | + with open(outfilename, 'w', encoding='utf-8') as f: | ||
2707 | + json.dump(result, f, indent=4) | ||
2708 | + | ||
2738 | def process_jobs(joblist): | 2709 | def process_jobs(joblist): |
2739 | """ | 2710 | """ |
2740 | Starts a Pool to run the Job() objects in joblist. | 2711 | Starts a Pool to run the Job() objects in joblist. |
... | @@ -2759,7 +2730,6 @@ def process_jobs(joblist): | ... | @@ -2759,7 +2730,6 @@ def process_jobs(joblist): |
2759 | print("Something went wrong") | 2730 | print("Something went wrong") |
2760 | 2731 | ||
2761 | if __name__ == "__main__": | 2732 | if __name__ == "__main__": |
2762 | - | ||
2763 | os.makedirs(runDir + "/results/figures/", exist_ok=True) | 2733 | os.makedirs(runDir + "/results/figures/", exist_ok=True) |
2764 | 2734 | ||
2765 | # parse options | 2735 | # parse options |
... | @@ -2897,29 +2867,29 @@ if __name__ == "__main__": | ... | @@ -2897,29 +2867,29 @@ if __name__ == "__main__": |
2897 | 2867 | ||
2898 | # Do general family statistics | 2868 | # Do general family statistics |
2899 | 2869 | ||
2900 | - joblist.append(Job(function=stats_len)) # Computes figures about chain lengths | 2870 | + # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths |
2901 | - joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) | 2871 | + # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) |
2902 | - for f in famlist: | 2872 | + # for f in famlist: |
2903 | - joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) | 2873 | + # joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) |
2904 | - if f not in ignored: | 2874 | + # if f not in ignored: |
2905 | - joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) | 2875 | + # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) |
2906 | 2876 | ||
2907 | 2877 | ||
2908 | # Do geometric measures on all chains | 2878 | # Do geometric measures on all chains |
2909 | 2879 | ||
2910 | - if n_unmapped_chains: | 2880 | + # if n_unmapped_chains: |
2911 | - os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) | 2881 | + # os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) |
2912 | - liste_struct=os.listdir(path_to_3D_data + "renumbered_rna_only") | 2882 | + # liste_struct = os.listdir(path_to_3D_data + "renumbered_rna_only") |
2913 | - f_prec = os.listdir(path_to_3D_data + "renumbered_rna_only")[0] | 2883 | + # if '4zdo_1_E.cif' in liste_struct: |
2914 | - if '4zdo_1_E.cif' in liste_struct: | 2884 | + # liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now |
2915 | - liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now | 2885 | + # if '4zdp_1_E.cif' in liste_struct: |
2916 | - if '4zdp_1_E.cif' in liste_struct: | 2886 | + # liste_struct.remove('4zdp_1_E.cif') |
2917 | - liste_struct.remove('4zdp_1_E.cif') | 2887 | + # for f in liste_struct: |
2918 | - for f in liste_struct: | 2888 | + # if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]): |
2919 | - joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances | 2889 | + # joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances |
2920 | 2890 | ||
2921 | 2891 | ||
2922 | - process_jobs(joblist) | 2892 | + # process_jobs(joblist) |
2923 | 2893 | ||
2924 | # Now process the memory-heavy tasks family by family | 2894 | # Now process the memory-heavy tasks family by family |
2925 | if DO_AVG_DISTANCE_MATRIX: | 2895 | if DO_AVG_DISTANCE_MATRIX: |
... | @@ -2935,26 +2905,26 @@ if __name__ == "__main__": | ... | @@ -2935,26 +2905,26 @@ if __name__ == "__main__": |
2935 | 2905 | ||
2936 | # finish the work after the parallel portions | 2906 | # finish the work after the parallel portions |
2937 | 2907 | ||
2938 | - per_chain_stats() # per chain base frequencies en basepair types | 2908 | + # per_chain_stats() # per chain base frequencies en basepair types |
2939 | - seq_idty() # identity matrices from pre-computed .npy matrices | 2909 | + # seq_idty() # identity matrices from pre-computed .npy matrices |
2940 | - stats_pairs() | 2910 | + # stats_pairs() |
2941 | 2911 | ||
2942 | if n_unmapped_chains: | 2912 | if n_unmapped_chains: |
2943 | - general_stats() | 2913 | + # general_stats() |
2944 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) | 2914 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) |
2945 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) | 2915 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) |
2946 | joblist = [] | 2916 | joblist = [] |
2947 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) | 2917 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) |
2948 | - if DO_HIRE_RNA_MEASURES: | 2918 | + # if DO_HIRE_RNA_MEASURES: |
2949 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) | 2919 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) |
2950 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) | 2920 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) |
2951 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) | 2921 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) |
2952 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) | 2922 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) |
2953 | - if DO_WADLEY_ANALYSIS: | 2923 | + # if DO_WADLEY_ANALYSIS: |
2954 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) | 2924 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) |
2955 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv'))) | 2925 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv'))) |
2956 | - process_jobs(joblist) | 2926 | + # process_jobs(joblist) |
2957 | - joblist = [] | 2927 | + # joblist = [] |
2958 | joblist.append(Job(function=gmm_aa_dists, args=())) | 2928 | joblist.append(Job(function=gmm_aa_dists, args=())) |
2959 | joblist.append(Job(function=gmm_aa_torsions, args=())) | 2929 | joblist.append(Job(function=gmm_aa_torsions, args=())) |
2960 | if DO_HIRE_RNA_MEASURES: | 2930 | if DO_HIRE_RNA_MEASURES: |
... | @@ -2964,4 +2934,5 @@ if __name__ == "__main__": | ... | @@ -2964,4 +2934,5 @@ if __name__ == "__main__": |
2964 | joblist.append(Job(function=gmm_wadley, args=())) | 2934 | joblist.append(Job(function=gmm_wadley, args=())) |
2965 | if len(joblist): | 2935 | if len(joblist): |
2966 | process_jobs(joblist) | 2936 | process_jobs(joblist) |
2937 | + merge_jsons() | ||
2967 | 2938 | ... | ... |
-
mentioned in commit 3a74c203
-
Please register or login to post a comment