Aglaé TABOT

Added functions conversion_angles, conversion_eta_theta, angles_torsion_hire_RNA

...@@ -17,6 +17,7 @@ from scipy.spatial.distance import squareform ...@@ -17,6 +17,7 @@ from scipy.spatial.distance import squareform
17 from mpl_toolkits.mplot3d import axes3d 17 from mpl_toolkits.mplot3d import axes3d
18 from Bio import AlignIO, SeqIO 18 from Bio import AlignIO, SeqIO
19 from Bio.PDB.MMCIFParser import MMCIFParser 19 from Bio.PDB.MMCIFParser import MMCIFParser
20 +from Bio.PDB.vectors import Vector, calc_angle, calc_dihedral
20 from functools import partial 21 from functools import partial
21 from multiprocessing import Pool, Manager 22 from multiprocessing import Pool, Manager
22 from os import path 23 from os import path
...@@ -1695,7 +1696,186 @@ def dist_atoms_hire_RNA (f) : ...@@ -1695,7 +1696,186 @@ def dist_atoms_hire_RNA (f) :
1695 1696
1696 df.to_csv(runDir + '/results/distances_hRNA/' + 'dist_atoms_hire_RNA '+name+'.csv') 1697 df.to_csv(runDir + '/results/distances_hRNA/' + 'dist_atoms_hire_RNA '+name+'.csv')
1697 1698
1699 +def conversion_angles(bdd):
1700 + '''
1701 + Convert database torsion angles to degrees
1702 + and put them in a list to reuse for statistics
1703 + '''
1704 + BASE_DIR = os.path.dirname(os.path.abspath(__file__))
1705 + db_path = os.path.join(BASE_DIR, bdd)
1706 + baseDeDonnees = sqlite3.connect(db_path)
1707 + curseur = baseDeDonnees.cursor()
1708 + curseur.execute("SELECT chain_id, nt_name, alpha, beta, gamma, delta, epsilon, zeta, chi FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U' ;")
1709 + liste=[]
1710 + for nt in curseur.fetchall(): # retrieve the angle measurements and put them in a list
1711 + liste.append(nt)
1712 + angles_torsion=[]
1713 + for nt in liste :
1714 + angles_deg=[]
1715 + angles_deg.append(nt[0]) #chain_id
1716 + angles_deg.append(nt[1]) #nt_name
1717 + for i in range (2,9): # on all angles
1718 + angle=0
1719 + if nt[i] == None :
1720 + angle=None
1721 + elif nt[i]<=np.pi: #if angle value <pi, positive
1722 + angle=(180/np.pi)*nt[i]
1723 + elif np.pi < nt[i] <= 2*np.pi : #if value of the angle between pi and 2pi, negative
1724 + angle=((180/np.pi)*nt[i])-360
1725 + else :
1726 + angle=nt[i] # dans le cas ou certains angles seraient en degres -> supprimer?
1727 + angles_deg.append(angle)
1728 + angles_torsion.append(angles_deg)
1729 + return angles_torsion
1730 +
1731 +def conversion_eta_theta(bdd):
1732 + '''
1733 + We repeat the operation for the pseudotorsion angles
1734 + '''
1735 + BASE_DIR = os.path.dirname(os.path.abspath(__file__))
1736 + db_path = os.path.join(BASE_DIR, bdd)
1737 + baseDeDonnees = sqlite3.connect(db_path)
1738 + curseur = baseDeDonnees.cursor()
1739 + curseur.execute("SELECT chain_id, nt_name, eta, theta, eta_prime, theta_prime, eta_base, theta_base FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U';")
1740 + liste=[]
1741 + for nt in curseur.fetchall():
1742 + liste.append(nt)
1743 + angles_virtuels=[]
1744 + for nt in liste :
1745 + angles_deg=[]
1746 + angles_deg.append(nt[0]) #chain_id
1747 + angles_deg.append(nt[1]) #nt_name
1748 + for i in range (2,8):
1749 + angle=0
1750 + if nt[i] == None :
1751 + angle=None
1752 + elif nt[i]<=np.pi:
1753 + angle=(180/np.pi)*nt[i]
1754 + elif np.pi < nt[i] <= 2*np.pi :
1755 + angle=((180/np.pi)*nt[i])-360
1756 + else :
1757 + angle=nt[i]
1758 + angles_deg.append(angle)
1759 + angles_virtuels.append(angles_deg)
1760 + return angles_virtuels
1761 +
1762 +def angles_torsion_hire_RNA(f):
1763 + '''
1764 + Measures the torsion angles between the atoms of the HiRE-RNA model
1765 + Saves the results in a dataframe
1766 + '''
1767 + name=str.split(f,'.')[0]
1768 + liste_angles_torsion=[]
1769 +
1770 + last_o5p=[]
1771 + last_c4p=[]
1772 + last_c5p=[]
1773 + last_c1p=[]
1774 +
1775 + os.makedirs(runDir+"/results/torsion_angles_hRNA/", exist_ok=True)
1776 +
1777 + parser=MMCIFParser()
1778 + s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f))
1779 + chain = next(s[0].get_chains())
1780 +
1698 1781
1782 + for res in chain :
1783 + p_o5_c5_c4=np.nan
1784 + o5_c5_c4_c1=np.nan
1785 + c5_c4_c1_b1=np.nan
1786 + c4_c1_b1_b2=np.nan
1787 + o5_c5_c4_psuiv=np.nan
1788 + c5_c4_psuiv_o5suiv=np.nan
1789 + c4_psuiv_o5suiv_c5suiv=np.nan
1790 + c1_c4_psuiv_o5suiv=np.nan
1791 + if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA'] : # several phosphate groups
1792 + atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"]
1793 + atom_o5p= [ atom.get_coord() for atom in res if "O5'" in atom.get_fullname() ]
1794 + atom_c5p = [ atom.get_coord() for atom in res if "C5'" in atom.get_fullname() ]
1795 + atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
1796 + atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1797 + atom_b1=pos_b1(res)
1798 + atom_b2=pos_b2(res)
1799 +
1800 + if len(atom_p)<1 or len(atom_o5p)<1 or len(atom_c5p)<1 or len(atom_c4p)<1:
1801 + p_o5_c5_c4=p_o5_c5_c4
1802 + else :
1803 + p=Vector(atom_p[0])
1804 + o5p=Vector(atom_o5p[0])
1805 + c5p=Vector(atom_c5p[0])
1806 + c4p=Vector(atom_c4p[0])
1807 + p_o5_c5_c4=calc_dihedral(p, o5p, c5p, c4p)*(180/np.pi)
1808 +
1809 + if len(atom_c1p)<1 or len(atom_o5p)<1 or len(atom_c5p)<1 or len(atom_c4p)<1:
1810 + o5_c5_c4_c1=o5_c5_c4_c1
1811 + else :
1812 + o5p=Vector(atom_o5p[0])
1813 + c5p=Vector(atom_c5p[0])
1814 + c4p=Vector(atom_c4p[0])
1815 + c1p=Vector(atom_c1p[0])
1816 + o5_c5_c4_c1=calc_dihedral(o5p, c5p, c4p, c1p)*(180/np.pi)
1817 +
1818 + if len(atom_c1p)<1 or len(atom_b1)<1 or len(atom_c5p)<1 or len(atom_c4p)<1:
1819 + c5_c4_c1_b1=c5_c4_c1_b1
1820 + else :
1821 + c5p=Vector(atom_c5p[0])
1822 + c4p=Vector(atom_c4p[0])
1823 + c1p=Vector(atom_c1p[0])
1824 + b1=Vector(atom_b1)
1825 + c5_c4_c1_b1=calc_dihedral(c5p, c4p, c1p, b1)*(180/np.pi)
1826 +
1827 + if len(atom_c1p)<1 or len(atom_b1)<1 or len(atom_b2)<1 or len(atom_c4p)<1:
1828 + c4_c1_b1_b2=c4_c1_b1_b2
1829 + else :
1830 + c4p=Vector(atom_c4p[0])
1831 + c1p=Vector(atom_c1p[0])
1832 + b1=Vector(atom_b1)
1833 + b2=Vector(atom_b2)
1834 + c4_c1_b1_b2=calc_dihedral(c4p, c1p, b1, b2)*(180/np.pi)
1835 +
1836 + if len(last_o5p)<1 or len(atom_p)<1 or len(last_c5p)<1 or len(last_c4p)<1:
1837 + o5_c5_c4_psuiv=o5_c5_c4_psuiv
1838 + else :
1839 + o5p_prec=Vector(last_o5p[0])
1840 + c5p_prec=Vector(last_c5p[0])
1841 + c4p_prec=Vector(last_c4p[0])
1842 + p=Vector(atom_p[0])
1843 + o5_c5_c4_psuiv=calc_dihedral(o5p_prec, c5p_prec, c4p_prec, p)*(180/np.pi)
1844 +
1845 + if len(atom_o5p)<1 or len(atom_p)<1 or len(last_c5p)<1 or len(last_c4p)<1:
1846 + c5_c4_psuiv_o5suiv=c5_c4_psuiv_o5suiv
1847 + else :
1848 + c5p_prec=Vector(last_c5p[0])
1849 + c4p_prec=Vector(last_c4p[0])
1850 + p=Vector(atom_p[0])
1851 + o5p=Vector(atom_o5p[0])
1852 + c5_c4_psuiv_o5suiv=calc_dihedral(c5p_prec, c4p_prec, p, o5p)*(180/np.pi)
1853 +
1854 + if len(atom_o5p)<1 or len(atom_p)<1 or len(atom_c5p)<1 or len(last_c4p)<1:
1855 + c4_psuiv_o5suiv_c5suiv=c4_psuiv_o5suiv_c5suiv
1856 + else :
1857 + c4p_prec=Vector(last_c4p[0])
1858 + p=Vector(atom_p[0])
1859 + o5p=Vector(atom_o5p[0])
1860 + c5p=Vector(atom_c5p[0])
1861 + c4_psuiv_o5suiv_c5suiv=calc_dihedral(c4p_prec, p, o5p, c5p)*(180/np.pi)
1862 +
1863 + if len(atom_o5p)<1 or len(atom_p)<1 or len(last_c1p)<1 or len(last_c4p)<1:
1864 + c1_c4_psuiv_o5suiv=c1_c4_psuiv_o5suiv
1865 + else :
1866 + c1p_prec=Vector(last_c1p[0])
1867 + c4p_prec=Vector(last_c4p[0])
1868 + p=Vector(atom_p[0])
1869 + o5p=Vector(atom_o5p[0])
1870 + c1_c4_psuiv_o5suiv=calc_dihedral(c1p_prec, c4p_prec, p, o5p)*(180/np.pi)
1871 + last_o5p=atom_o5p
1872 + last_c4p=atom_c4p
1873 + last_c5p=atom_c5p
1874 + last_c1p=atom_c1p
1875 + liste_angles_torsion.append([res.get_resname(), p_o5_c5_c4, o5_c5_c4_c1, c5_c4_c1_b1, c4_c1_b1_b2, o5_c5_c4_psuiv, c5_c4_psuiv_o5suiv, c4_psuiv_o5suiv_c5suiv, c1_c4_psuiv_o5suiv])
1876 + df=pd.DataFrame(liste_angles_torsion, columns=["Residu", "P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "O5'-C5'-C4'-P°", "C5'-C4'-P°-O5'°", "C4'-P°-O5'°-C5'°", "C1'-C4'-P°-O5'°"])
1877 +
1878 + df.to_csv(runDir + '/results/torsion_angles_hRNA/' + 'angles_torsion_hire_RNA '+name+'.csv')
1699 1879
1700 1880
1701 if __name__ == "__main__": 1881 if __name__ == "__main__":
...@@ -1830,11 +2010,14 @@ if __name__ == "__main__": ...@@ -1830,11 +2010,14 @@ if __name__ == "__main__":
1830 2010
1831 #dist_atoms_hire_RNA(os.listdir(path_to_3D_data + "rna_only")[0]) 2011 #dist_atoms_hire_RNA(os.listdir(path_to_3D_data + "rna_only")[0])
1832 #concatenate('/results/distances/', os.listdir(runDir+'/results/distances/'), 'dist_atoms.csv') 2012 #concatenate('/results/distances/', os.listdir(runDir+'/results/distances/'), 'dist_atoms.csv')
2013 + #conversion_angles('/home/atabot/RNANet.db')) # chemin -> runDir + /results/RNANet.db
2014 + #conversion_eta_theta('/home/atabot/RNANet.db')
1833 #exit() 2015 #exit()
1834 f_prec=os.listdir(path_to_3D_data + "rna_only")[0] 2016 f_prec=os.listdir(path_to_3D_data + "rna_only")[0]
1835 for f in os.listdir(path_to_3D_data + "rna_only")[:100]: 2017 for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
1836 joblist.append(Job(function=dist_atoms, args=(f,))) 2018 joblist.append(Job(function=dist_atoms, args=(f,)))
1837 joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) 2019 joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,)))
2020 + joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,)))
1838 2021
1839 2022
1840 p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) 2023 p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers)
......