Louis BECQUEY

fixed missing atom declaration

...@@ -1484,6 +1484,7 @@ def measures_wadley(name, s, thr_idx): ...@@ -1484,6 +1484,7 @@ def measures_wadley(name, s, thr_idx):
1484 if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] : 1484 if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] :
1485 atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] 1485 atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"]
1486 atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] 1486 atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1487 + atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
1487 if len(atom_c1p) > 1: 1488 if len(atom_c1p) > 1:
1488 for atom in res: 1489 for atom in res:
1489 if "C1'" in atom.get_fullname(): 1490 if "C1'" in atom.get_fullname():
...@@ -2454,8 +2455,8 @@ def gmm_hrna(): ...@@ -2454,8 +2455,8 @@ def gmm_hrna():
2454 c1p_b1 = list(df["C1'-B1"][~ np.isnan(df["C1'-B1"])]) 2455 c1p_b1 = list(df["C1'-B1"][~ np.isnan(df["C1'-B1"])])
2455 b1_b2 = list(df["B1-B2"][~ np.isnan(df["B1-B2"])]) 2456 b1_b2 = list(df["B1-B2"][~ np.isnan(df["B1-B2"])])
2456 2457
2457 - os.makedirs(runDir + "/results/figures/HiRE-RNA/distances/", exist_ok=True) 2458 + os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/distances/", exist_ok=True)
2458 - os.chdir(runDir + "/results/figures/HiRE-RNA/distances/") 2459 + os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/distances/")
2459 2460
2460 GMM_histo(o5p_c5p, "O5'-C5'") 2461 GMM_histo(o5p_c5p, "O5'-C5'")
2461 GMM_histo(b1_b2, "B1-B2") 2462 GMM_histo(b1_b2, "B1-B2")
...@@ -2476,7 +2477,7 @@ def gmm_hrna(): ...@@ -2476,7 +2477,7 @@ def gmm_hrna():
2476 axes.set_ylim(0, 100) 2477 axes.set_ylim(0, 100)
2477 plt.xlabel("Distance (Angström)") 2478 plt.xlabel("Distance (Angström)")
2478 plt.title("GMM des distances entre atomes HiRE-RNA") 2479 plt.title("GMM des distances entre atomes HiRE-RNA")
2479 - plt.savefig(runDir + "/results/figures/HiRE-RNA/distances/GMM des distances entre atomes HiRE-RNA.png") 2480 + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM des distances entre atomes HiRE-RNA.png")
2480 plt.close() 2481 plt.close()
2481 2482
2482 # Angles 2483 # Angles
...@@ -2491,8 +2492,8 @@ def gmm_hrna(): ...@@ -2491,8 +2492,8 @@ def gmm_hrna():
2491 c4p_c1p_b1 = list(df["C4'-C1'-B1"][~ np.isnan(df["C4'-C1'-B1"])]) 2492 c4p_c1p_b1 = list(df["C4'-C1'-B1"][~ np.isnan(df["C4'-C1'-B1"])])
2492 c1p_b1_b2 = list(df["C1'-B1-B2"][~ np.isnan(df["C1'-B1-B2"])]) 2493 c1p_b1_b2 = list(df["C1'-B1-B2"][~ np.isnan(df["C1'-B1-B2"])])
2493 2494
2494 - os.makedirs(runDir + "/results/figures/HiRE-RNA/distances/", exist_ok=True) 2495 + os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/distances/", exist_ok=True)
2495 - os.chdir(runDir + "/results/figures/HiRE-RNA/distances/") 2496 + os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/distances/")
2496 2497
2497 GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True) 2498 GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True)
2498 GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True) 2499 GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True)
...@@ -2515,7 +2516,7 @@ def gmm_hrna(): ...@@ -2515,7 +2516,7 @@ def gmm_hrna():
2515 axes.set_ylim(0, 100) 2516 axes.set_ylim(0, 100)
2516 plt.xlabel("Angle (Degré)") 2517 plt.xlabel("Angle (Degré)")
2517 plt.title("GMM des angles entre atomes HiRE-RNA") 2518 plt.title("GMM des angles entre atomes HiRE-RNA")
2518 - plt.savefig(runDir + "/results/figures/HiRE-RNA/angles/GMM des angles entre atomes HiRE-RNA.png") 2519 + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM des angles entre atomes HiRE-RNA.png")
2519 plt.close() 2520 plt.close()
2520 2521
2521 # Torsions 2522 # Torsions
...@@ -2530,8 +2531,8 @@ def gmm_hrna(): ...@@ -2530,8 +2531,8 @@ def gmm_hrna():
2530 c4_psuiv_o5suiv_c5suiv = list(df["C4'-P°-O5'°-C5'°"][~ np.isnan(df["C4'-P°-O5'°-C5'°"])]) 2531 c4_psuiv_o5suiv_c5suiv = list(df["C4'-P°-O5'°-C5'°"][~ np.isnan(df["C4'-P°-O5'°-C5'°"])])
2531 c1_c4_psuiv_o5suiv = list(df["C1'-C4'-P°-O5'°"][~ np.isnan(df["C1'-C4'-P°-O5'°"])]) 2532 c1_c4_psuiv_o5suiv = list(df["C1'-C4'-P°-O5'°"][~ np.isnan(df["C1'-C4'-P°-O5'°"])])
2532 2533
2533 - os.makedirs(runDir + "/results/figures/HiRE-RNA/torsions/", exist_ok=True) 2534 + os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True)
2534 - os.chdir(runDir + "/results/figures/HiRE-RNA/torsions/") 2535 + os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/torsions/")
2535 2536
2536 GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True) 2537 GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True)
2537 GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True) 2538 GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True)
...@@ -2945,10 +2946,10 @@ if __name__ == "__main__": ...@@ -2945,10 +2946,10 @@ if __name__ == "__main__":
2945 # Define the tasks 2946 # Define the tasks
2946 joblist = [] 2947 joblist = []
2947 2948
2948 - # Do eta/theta plots 2949 + # # Do eta/theta plots
2949 - if n_unmapped_chains and DO_WADLEY_ANALYSIS: 2950 + # if n_unmapped_chains and DO_WADLEY_ANALYSIS:
2950 - joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) 2951 + # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr)))
2951 - joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) 2952 + # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr)))
2952 2953
2953 # Do distance matrices for each family excl. LSU/SSU (will be processed later) 2954 # Do distance matrices for each family excl. LSU/SSU (will be processed later)
2954 if DO_AVG_DISTANCE_MATRIX: 2955 if DO_AVG_DISTANCE_MATRIX:
...@@ -2963,18 +2964,17 @@ if __name__ == "__main__": ...@@ -2963,18 +2964,17 @@ if __name__ == "__main__":
2963 joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) 2964 joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False)))
2964 joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) 2965 joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False)))
2965 2966
2966 - # Do general family statistics 2967 + # # Do general family statistics
2967 - joblist.append(Job(function=stats_len)) # Computes figures about chain lengths 2968 + # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths
2968 - joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) 2969 + # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families)
2969 - for f in famlist: 2970 + # for f in famlist:
2970 - joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) 2971 + # joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family)
2971 - if f not in ignored: 2972 + # if f not in ignored:
2972 - joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) 2973 + # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families)
2973 2974
2974 # Do geometric measures on all chains 2975 # Do geometric measures on all chains
2975 if n_unmapped_chains: 2976 if n_unmapped_chains:
2976 os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) 2977 os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True)
2977 - os.makedirs(runDir+"/results/geometry/all-atoms/angles/", exist_ok=True)
2978 f_prec = os.listdir(path_to_3D_data + "rna_only")[0] 2978 f_prec = os.listdir(path_to_3D_data + "rna_only")[0]
2979 for f in os.listdir(path_to_3D_data + "rna_only"): 2979 for f in os.listdir(path_to_3D_data + "rna_only"):
2980 joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances 2980 joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances
...@@ -2992,7 +2992,7 @@ if __name__ == "__main__": ...@@ -2992,7 +2992,7 @@ if __name__ == "__main__":
2992 2992
2993 #exit() 2993 #exit()
2994 2994
2995 - # process_jobs(joblist) 2995 + process_jobs(joblist)
2996 2996
2997 # Now process the memory-heavy tasks family by family 2997 # Now process the memory-heavy tasks family by family
2998 if DO_AVG_DISTANCE_MATRIX: 2998 if DO_AVG_DISTANCE_MATRIX:
...@@ -3016,15 +3016,15 @@ if __name__ == "__main__": ...@@ -3016,15 +3016,15 @@ if __name__ == "__main__":
3016 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) 3016 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True)
3017 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) 3017 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True)
3018 joblist = [] 3018 joblist = []
3019 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) 3019 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')))
3020 - if DO_HIRE_RNA_MEASURES: 3020 + # if DO_HIRE_RNA_MEASURES:
3021 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) 3021 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
3022 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) 3022 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv')))
3023 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) 3023 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv')))
3024 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) 3024 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv')))
3025 if DO_WADLEY_ANALYSIS: 3025 if DO_WADLEY_ANALYSIS:
3026 joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) 3026 joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv')))
3027 - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv'))) 3027 + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv')))
3028 process_jobs(joblist) 3028 process_jobs(joblist)
3029 joblist = [] 3029 joblist = []
3030 joblist.append(Job(function=gmm_aa_dists, args=())) 3030 joblist.append(Job(function=gmm_aa_dists, args=()))
......