Added functions conversion_angles, conversion_eta_theta, angles_torsion_hire_RNA
Showing
1 changed file
with
183 additions
and
0 deletions
... | @@ -17,6 +17,7 @@ from scipy.spatial.distance import squareform | ... | @@ -17,6 +17,7 @@ from scipy.spatial.distance import squareform |
17 | from mpl_toolkits.mplot3d import axes3d | 17 | from mpl_toolkits.mplot3d import axes3d |
18 | from Bio import AlignIO, SeqIO | 18 | from Bio import AlignIO, SeqIO |
19 | from Bio.PDB.MMCIFParser import MMCIFParser | 19 | from Bio.PDB.MMCIFParser import MMCIFParser |
20 | +from Bio.PDB.vectors import Vector, calc_angle, calc_dihedral | ||
20 | from functools import partial | 21 | from functools import partial |
21 | from multiprocessing import Pool, Manager | 22 | from multiprocessing import Pool, Manager |
22 | from os import path | 23 | from os import path |
... | @@ -1695,7 +1696,186 @@ def dist_atoms_hire_RNA (f) : | ... | @@ -1695,7 +1696,186 @@ def dist_atoms_hire_RNA (f) : |
1695 | 1696 | ||
1696 | df.to_csv(runDir + '/results/distances_hRNA/' + 'dist_atoms_hire_RNA '+name+'.csv') | 1697 | df.to_csv(runDir + '/results/distances_hRNA/' + 'dist_atoms_hire_RNA '+name+'.csv') |
1697 | 1698 | ||
1699 | +def conversion_angles(bdd): | ||
1700 | + ''' | ||
1701 | + Convert database torsion angles to degrees | ||
1702 | + and put them in a list to reuse for statistics | ||
1703 | + ''' | ||
1704 | + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
1705 | + db_path = os.path.join(BASE_DIR, bdd) | ||
1706 | + baseDeDonnees = sqlite3.connect(db_path) | ||
1707 | + curseur = baseDeDonnees.cursor() | ||
1708 | + curseur.execute("SELECT chain_id, nt_name, alpha, beta, gamma, delta, epsilon, zeta, chi FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U' ;") | ||
1709 | + liste=[] | ||
1710 | + for nt in curseur.fetchall(): # retrieve the angle measurements and put them in a list | ||
1711 | + liste.append(nt) | ||
1712 | + angles_torsion=[] | ||
1713 | + for nt in liste : | ||
1714 | + angles_deg=[] | ||
1715 | + angles_deg.append(nt[0]) #chain_id | ||
1716 | + angles_deg.append(nt[1]) #nt_name | ||
1717 | + for i in range (2,9): # on all angles | ||
1718 | + angle=0 | ||
1719 | + if nt[i] == None : | ||
1720 | + angle=None | ||
1721 | + elif nt[i]<=np.pi: #if angle value <pi, positive | ||
1722 | + angle=(180/np.pi)*nt[i] | ||
1723 | + elif np.pi < nt[i] <= 2*np.pi : #if value of the angle between pi and 2pi, negative | ||
1724 | + angle=((180/np.pi)*nt[i])-360 | ||
1725 | + else : | ||
1726 | + angle=nt[i] # dans le cas ou certains angles seraient en degres -> supprimer? | ||
1727 | + angles_deg.append(angle) | ||
1728 | + angles_torsion.append(angles_deg) | ||
1729 | + return angles_torsion | ||
1730 | + | ||
1731 | +def conversion_eta_theta(bdd): | ||
1732 | + ''' | ||
1733 | + We repeat the operation for the pseudotorsion angles | ||
1734 | + ''' | ||
1735 | + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
1736 | + db_path = os.path.join(BASE_DIR, bdd) | ||
1737 | + baseDeDonnees = sqlite3.connect(db_path) | ||
1738 | + curseur = baseDeDonnees.cursor() | ||
1739 | + curseur.execute("SELECT chain_id, nt_name, eta, theta, eta_prime, theta_prime, eta_base, theta_base FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U';") | ||
1740 | + liste=[] | ||
1741 | + for nt in curseur.fetchall(): | ||
1742 | + liste.append(nt) | ||
1743 | + angles_virtuels=[] | ||
1744 | + for nt in liste : | ||
1745 | + angles_deg=[] | ||
1746 | + angles_deg.append(nt[0]) #chain_id | ||
1747 | + angles_deg.append(nt[1]) #nt_name | ||
1748 | + for i in range (2,8): | ||
1749 | + angle=0 | ||
1750 | + if nt[i] == None : | ||
1751 | + angle=None | ||
1752 | + elif nt[i]<=np.pi: | ||
1753 | + angle=(180/np.pi)*nt[i] | ||
1754 | + elif np.pi < nt[i] <= 2*np.pi : | ||
1755 | + angle=((180/np.pi)*nt[i])-360 | ||
1756 | + else : | ||
1757 | + angle=nt[i] | ||
1758 | + angles_deg.append(angle) | ||
1759 | + angles_virtuels.append(angles_deg) | ||
1760 | + return angles_virtuels | ||
1761 | + | ||
1762 | +def angles_torsion_hire_RNA(f): | ||
1763 | + ''' | ||
1764 | + Measures the torsion angles between the atoms of the HiRE-RNA model | ||
1765 | + Saves the results in a dataframe | ||
1766 | + ''' | ||
1767 | + name=str.split(f,'.')[0] | ||
1768 | + liste_angles_torsion=[] | ||
1769 | + | ||
1770 | + last_o5p=[] | ||
1771 | + last_c4p=[] | ||
1772 | + last_c5p=[] | ||
1773 | + last_c1p=[] | ||
1774 | + | ||
1775 | + os.makedirs(runDir+"/results/torsion_angles_hRNA/", exist_ok=True) | ||
1776 | + | ||
1777 | + parser=MMCIFParser() | ||
1778 | + s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f)) | ||
1779 | + chain = next(s[0].get_chains()) | ||
1780 | + | ||
1698 | 1781 | ||
1782 | + for res in chain : | ||
1783 | + p_o5_c5_c4=np.nan | ||
1784 | + o5_c5_c4_c1=np.nan | ||
1785 | + c5_c4_c1_b1=np.nan | ||
1786 | + c4_c1_b1_b2=np.nan | ||
1787 | + o5_c5_c4_psuiv=np.nan | ||
1788 | + c5_c4_psuiv_o5suiv=np.nan | ||
1789 | + c4_psuiv_o5suiv_c5suiv=np.nan | ||
1790 | + c1_c4_psuiv_o5suiv=np.nan | ||
1791 | + if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA'] : # several phosphate groups | ||
1792 | + atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] | ||
1793 | + atom_o5p= [ atom.get_coord() for atom in res if "O5'" in atom.get_fullname() ] | ||
1794 | + atom_c5p = [ atom.get_coord() for atom in res if "C5'" in atom.get_fullname() ] | ||
1795 | + atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | ||
1796 | + atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
1797 | + atom_b1=pos_b1(res) | ||
1798 | + atom_b2=pos_b2(res) | ||
1799 | + | ||
1800 | + if len(atom_p)<1 or len(atom_o5p)<1 or len(atom_c5p)<1 or len(atom_c4p)<1: | ||
1801 | + p_o5_c5_c4=p_o5_c5_c4 | ||
1802 | + else : | ||
1803 | + p=Vector(atom_p[0]) | ||
1804 | + o5p=Vector(atom_o5p[0]) | ||
1805 | + c5p=Vector(atom_c5p[0]) | ||
1806 | + c4p=Vector(atom_c4p[0]) | ||
1807 | + p_o5_c5_c4=calc_dihedral(p, o5p, c5p, c4p)*(180/np.pi) | ||
1808 | + | ||
1809 | + if len(atom_c1p)<1 or len(atom_o5p)<1 or len(atom_c5p)<1 or len(atom_c4p)<1: | ||
1810 | + o5_c5_c4_c1=o5_c5_c4_c1 | ||
1811 | + else : | ||
1812 | + o5p=Vector(atom_o5p[0]) | ||
1813 | + c5p=Vector(atom_c5p[0]) | ||
1814 | + c4p=Vector(atom_c4p[0]) | ||
1815 | + c1p=Vector(atom_c1p[0]) | ||
1816 | + o5_c5_c4_c1=calc_dihedral(o5p, c5p, c4p, c1p)*(180/np.pi) | ||
1817 | + | ||
1818 | + if len(atom_c1p)<1 or len(atom_b1)<1 or len(atom_c5p)<1 or len(atom_c4p)<1: | ||
1819 | + c5_c4_c1_b1=c5_c4_c1_b1 | ||
1820 | + else : | ||
1821 | + c5p=Vector(atom_c5p[0]) | ||
1822 | + c4p=Vector(atom_c4p[0]) | ||
1823 | + c1p=Vector(atom_c1p[0]) | ||
1824 | + b1=Vector(atom_b1) | ||
1825 | + c5_c4_c1_b1=calc_dihedral(c5p, c4p, c1p, b1)*(180/np.pi) | ||
1826 | + | ||
1827 | + if len(atom_c1p)<1 or len(atom_b1)<1 or len(atom_b2)<1 or len(atom_c4p)<1: | ||
1828 | + c4_c1_b1_b2=c4_c1_b1_b2 | ||
1829 | + else : | ||
1830 | + c4p=Vector(atom_c4p[0]) | ||
1831 | + c1p=Vector(atom_c1p[0]) | ||
1832 | + b1=Vector(atom_b1) | ||
1833 | + b2=Vector(atom_b2) | ||
1834 | + c4_c1_b1_b2=calc_dihedral(c4p, c1p, b1, b2)*(180/np.pi) | ||
1835 | + | ||
1836 | + if len(last_o5p)<1 or len(atom_p)<1 or len(last_c5p)<1 or len(last_c4p)<1: | ||
1837 | + o5_c5_c4_psuiv=o5_c5_c4_psuiv | ||
1838 | + else : | ||
1839 | + o5p_prec=Vector(last_o5p[0]) | ||
1840 | + c5p_prec=Vector(last_c5p[0]) | ||
1841 | + c4p_prec=Vector(last_c4p[0]) | ||
1842 | + p=Vector(atom_p[0]) | ||
1843 | + o5_c5_c4_psuiv=calc_dihedral(o5p_prec, c5p_prec, c4p_prec, p)*(180/np.pi) | ||
1844 | + | ||
1845 | + if len(atom_o5p)<1 or len(atom_p)<1 or len(last_c5p)<1 or len(last_c4p)<1: | ||
1846 | + c5_c4_psuiv_o5suiv=c5_c4_psuiv_o5suiv | ||
1847 | + else : | ||
1848 | + c5p_prec=Vector(last_c5p[0]) | ||
1849 | + c4p_prec=Vector(last_c4p[0]) | ||
1850 | + p=Vector(atom_p[0]) | ||
1851 | + o5p=Vector(atom_o5p[0]) | ||
1852 | + c5_c4_psuiv_o5suiv=calc_dihedral(c5p_prec, c4p_prec, p, o5p)*(180/np.pi) | ||
1853 | + | ||
1854 | + if len(atom_o5p)<1 or len(atom_p)<1 or len(atom_c5p)<1 or len(last_c4p)<1: | ||
1855 | + c4_psuiv_o5suiv_c5suiv=c4_psuiv_o5suiv_c5suiv | ||
1856 | + else : | ||
1857 | + c4p_prec=Vector(last_c4p[0]) | ||
1858 | + p=Vector(atom_p[0]) | ||
1859 | + o5p=Vector(atom_o5p[0]) | ||
1860 | + c5p=Vector(atom_c5p[0]) | ||
1861 | + c4_psuiv_o5suiv_c5suiv=calc_dihedral(c4p_prec, p, o5p, c5p)*(180/np.pi) | ||
1862 | + | ||
1863 | + if len(atom_o5p)<1 or len(atom_p)<1 or len(last_c1p)<1 or len(last_c4p)<1: | ||
1864 | + c1_c4_psuiv_o5suiv=c1_c4_psuiv_o5suiv | ||
1865 | + else : | ||
1866 | + c1p_prec=Vector(last_c1p[0]) | ||
1867 | + c4p_prec=Vector(last_c4p[0]) | ||
1868 | + p=Vector(atom_p[0]) | ||
1869 | + o5p=Vector(atom_o5p[0]) | ||
1870 | + c1_c4_psuiv_o5suiv=calc_dihedral(c1p_prec, c4p_prec, p, o5p)*(180/np.pi) | ||
1871 | + last_o5p=atom_o5p | ||
1872 | + last_c4p=atom_c4p | ||
1873 | + last_c5p=atom_c5p | ||
1874 | + last_c1p=atom_c1p | ||
1875 | + liste_angles_torsion.append([res.get_resname(), p_o5_c5_c4, o5_c5_c4_c1, c5_c4_c1_b1, c4_c1_b1_b2, o5_c5_c4_psuiv, c5_c4_psuiv_o5suiv, c4_psuiv_o5suiv_c5suiv, c1_c4_psuiv_o5suiv]) | ||
1876 | + df=pd.DataFrame(liste_angles_torsion, columns=["Residu", "P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "O5'-C5'-C4'-P°", "C5'-C4'-P°-O5'°", "C4'-P°-O5'°-C5'°", "C1'-C4'-P°-O5'°"]) | ||
1877 | + | ||
1878 | + df.to_csv(runDir + '/results/torsion_angles_hRNA/' + 'angles_torsion_hire_RNA '+name+'.csv') | ||
1699 | 1879 | ||
1700 | 1880 | ||
1701 | if __name__ == "__main__": | 1881 | if __name__ == "__main__": |
... | @@ -1830,11 +2010,14 @@ if __name__ == "__main__": | ... | @@ -1830,11 +2010,14 @@ if __name__ == "__main__": |
1830 | 2010 | ||
1831 | #dist_atoms_hire_RNA(os.listdir(path_to_3D_data + "rna_only")[0]) | 2011 | #dist_atoms_hire_RNA(os.listdir(path_to_3D_data + "rna_only")[0]) |
1832 | #concatenate('/results/distances/', os.listdir(runDir+'/results/distances/'), 'dist_atoms.csv') | 2012 | #concatenate('/results/distances/', os.listdir(runDir+'/results/distances/'), 'dist_atoms.csv') |
2013 | + #conversion_angles('/home/atabot/RNANet.db')) # chemin -> runDir + /results/RNANet.db | ||
2014 | + #conversion_eta_theta('/home/atabot/RNANet.db') | ||
1833 | #exit() | 2015 | #exit() |
1834 | f_prec=os.listdir(path_to_3D_data + "rna_only")[0] | 2016 | f_prec=os.listdir(path_to_3D_data + "rna_only")[0] |
1835 | for f in os.listdir(path_to_3D_data + "rna_only")[:100]: | 2017 | for f in os.listdir(path_to_3D_data + "rna_only")[:100]: |
1836 | joblist.append(Job(function=dist_atoms, args=(f,))) | 2018 | joblist.append(Job(function=dist_atoms, args=(f,))) |
1837 | joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) | 2019 | joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) |
2020 | + joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,))) | ||
1838 | 2021 | ||
1839 | 2022 | ||
1840 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | 2023 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | ... | ... |
-
Please register or login to post a comment