Showing
1 changed file
with
138 additions
and
3 deletions
... | @@ -1899,6 +1899,140 @@ def angles_torsion_hire_RNA(f): | ... | @@ -1899,6 +1899,140 @@ def angles_torsion_hire_RNA(f): |
1899 | idxQueue.put(thr_idx) # replace the thread index in the queue | 1899 | idxQueue.put(thr_idx) # replace the thread index in the queue |
1900 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | 1900 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") |
1901 | 1901 | ||
1902 | +def angles_plans_hire_RNA(f): | ||
1903 | + ''' | ||
1904 | + Measures the plane angles involving C1' and B1 atoms | ||
1905 | + Saves the results in a dataframe | ||
1906 | + ''' | ||
1907 | + name=str.split(f,'.')[0] | ||
1908 | + liste_angles_plans=[] | ||
1909 | + last_p=[] | ||
1910 | + last_c1p=[] | ||
1911 | + | ||
1912 | + global idxQueue | ||
1913 | + thr_idx = idxQueue.get() | ||
1914 | + | ||
1915 | + setproctitle(f"RNANet statistics.py Worker {thr_idx+1} angles_plans_hire_RNA({f})") | ||
1916 | + | ||
1917 | + os.makedirs(runDir+"/results/plane_angles_hRNA/", exist_ok=True) | ||
1918 | + | ||
1919 | + parser=MMCIFParser() | ||
1920 | + s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f)) | ||
1921 | + chain = next(s[0].get_chains()) | ||
1922 | + residues=list(chain.get_residues()) | ||
1923 | + pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False) | ||
1924 | + pbar.update(0) | ||
1925 | + for res in chain : | ||
1926 | + p_c1p_psuiv=np.nan | ||
1927 | + c1p_psuiv_c1psuiv=np.nan | ||
1928 | + if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA'] : | ||
1929 | + atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] | ||
1930 | + atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
1931 | + | ||
1932 | + if len(last_p)<1 or len(last_c1p)<1 or len(atom_p)<1 : | ||
1933 | + p_c1p_psuiv=p_c1p_psuiv | ||
1934 | + else : | ||
1935 | + p_prec=Vector(last_p[0]) | ||
1936 | + c1p_prec=Vector(last_c1p[0]) | ||
1937 | + p=Vector(atom_p[0]) | ||
1938 | + p_c1p_psuiv=calc_angle(p_prec, c1p_prec, p)*(180/np.pi) | ||
1939 | + | ||
1940 | + if len(atom_c1p)<1 or len(last_c1p)<1 or len(atom_p)<1: | ||
1941 | + c1p_psuiv_c1psuiv=c1p_psuiv_c1psuiv | ||
1942 | + else : | ||
1943 | + c1p_prec=Vector(last_c1p[0]) | ||
1944 | + p=Vector(atom_p[0]) | ||
1945 | + c1p=Vector(atom_c1p[0]) | ||
1946 | + c1p_psuiv_c1psuiv=calc_angle(c1p_prec, p, c1p)*(180/np.pi) | ||
1947 | + | ||
1948 | + last_p=atom_p | ||
1949 | + last_c1p=atom_c1p | ||
1950 | + liste_angles_plans.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv]) | ||
1951 | + pbar.update(1) | ||
1952 | + df=pd.DataFrame(liste_angles_plans, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"]) | ||
1953 | + pbar.close() | ||
1954 | + | ||
1955 | + | ||
1956 | + df.to_csv(runDir + '/results/plane_angles_hRNA/' + 'angles_plans_hire_RNA '+name+'.csv') | ||
1957 | + idxQueue.put(thr_idx) # replace the thread index in the queue | ||
1958 | + setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | ||
1959 | + | ||
1960 | +def histogram(data, name_data, x, y, nb): | ||
1961 | + ''' | ||
1962 | + Plot histograms | ||
1963 | + ''' | ||
1964 | + | ||
1965 | + plt.hist(data,color="green",edgecolor='black', linewidth=1.2,bins=50, density=True) | ||
1966 | + plt.xlabel(x) | ||
1967 | + plt.ylabel(y) | ||
1968 | + | ||
1969 | +def GMM_histo(data, name_data, x, y, nb_fichiers) : | ||
1970 | + ''' | ||
1971 | + Plot Gaussian-Mixture-Model on histograms | ||
1972 | + ''' | ||
1973 | + histogramme(data, name_data, x, y, nb_fichiers)#plot the histogram | ||
1974 | + | ||
1975 | + n_max = 8 # number of possible values for n_components | ||
1976 | + n_components_range = np.arange(n_max)+1 | ||
1977 | + aic = [] | ||
1978 | + bic = [] | ||
1979 | + maxlogv=[] | ||
1980 | + md=np.array(data).reshape(-1,1) | ||
1981 | + # construction of models and calculation of criteria | ||
1982 | + nb_components=1 | ||
1983 | + nb_log_max=n_components_range[0] | ||
1984 | + log_max=0 | ||
1985 | + # chooses the number of components based on the maximum likelihood value (maxlogv) | ||
1986 | + for n_comp in n_components_range: | ||
1987 | + gmm = GaussianMixture(n_components=n_comp).fit(md) | ||
1988 | + aic.append(abs(gmm.aic(md))) | ||
1989 | + bic.append(abs(gmm.bic(md))) | ||
1990 | + maxlogv.append(gmm.lower_bound_) | ||
1991 | + if gmm.lower_bound_== max(maxlogv) : # takes the maximum | ||
1992 | + nb_components=n_comp | ||
1993 | + # if there is convergence, keep the first maximum found | ||
1994 | + if abs(gmm.lower_bound_-log_max)<0.02 : #threshold=0.02 | ||
1995 | + nb_components=nb_log_max | ||
1996 | + break | ||
1997 | + log_max=max(maxlogv) | ||
1998 | + nb_log_max=n_comp | ||
1999 | + | ||
2000 | + # plot with the appropriate number of components | ||
2001 | + obs=np.array(data).reshape(-1,1) | ||
2002 | + g = GaussianMixture(n_components=nb_components) | ||
2003 | + g.fit(obs) | ||
2004 | + weights = g.weights_ | ||
2005 | + means = g.means_ | ||
2006 | + covariances = g.covariances_ | ||
2007 | + | ||
2008 | + D = obs.ravel() | ||
2009 | + xmin = D.min() | ||
2010 | + xmax = D.max() | ||
2011 | + x = np.linspace(xmin,xmax,1000) | ||
2012 | + colors=['red', 'blue', 'gold', 'cyan', 'magenta', 'white', 'black', 'green'] | ||
2013 | + # prepare the dictionary to save the parameters | ||
2014 | + summary_data={} | ||
2015 | + summary_data["measure"]= name_data | ||
2016 | + summary_data["weights"]=[] | ||
2017 | + summary_data["means"]=[] | ||
2018 | + summary_data["std"]=[] | ||
2019 | + # plot | ||
2020 | + for i in range(nb_components): | ||
2021 | + mean = means[i] | ||
2022 | + sigma = math.sqrt(covariances[i]) | ||
2023 | + weight = weights[i] | ||
2024 | + plt.plot(x,weights[i]*stats.norm.pdf(x,mean,sigma), c=colors[i]) | ||
2025 | + summary_data["means"].append(str(mean)) | ||
2026 | + summary_data["std"].append(str(sigma)) | ||
2027 | + summary_data["weights"].append(str(weight)) | ||
2028 | + axes=plt.gca() | ||
2029 | + plt.title("Histogramme " +name_data+ " avec GMM pour " +str(nb_components)+ " composantes (" + str(nb_fichiers)+" structures)") | ||
2030 | + | ||
2031 | + # save in a json | ||
2032 | + with open (name_data +" "+str(nb_fichiers)+ " .json", 'w', encoding='utf-8') as f: | ||
2033 | + json.dump(summary_data, f, indent=4) | ||
2034 | + | ||
2035 | + | ||
1902 | 2036 | ||
1903 | if __name__ == "__main__": | 2037 | if __name__ == "__main__": |
1904 | 2038 | ||
... | @@ -2037,9 +2171,10 @@ if __name__ == "__main__": | ... | @@ -2037,9 +2171,10 @@ if __name__ == "__main__": |
2037 | #exit() | 2171 | #exit() |
2038 | f_prec=os.listdir(path_to_3D_data + "rna_only")[0] | 2172 | f_prec=os.listdir(path_to_3D_data + "rna_only")[0] |
2039 | for f in os.listdir(path_to_3D_data + "rna_only")[:100]: | 2173 | for f in os.listdir(path_to_3D_data + "rna_only")[:100]: |
2040 | - joblist.append(Job(function=dist_atoms, args=(f,))) | 2174 | + #joblist.append(Job(function=dist_atoms, args=(f,))) |
2041 | - joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) | 2175 | + #joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) |
2042 | - joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,))) | 2176 | + #joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,))) |
2177 | + joblist.append(Job(function=angles_plans_hire_RNA, args=(f,))) | ||
2043 | 2178 | ||
2044 | 2179 | ||
2045 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | 2180 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | ... | ... |
-
Please register or login to post a comment