Showing
1 changed file
with
28 additions
and
28 deletions
... | @@ -1484,6 +1484,7 @@ def measures_wadley(name, s, thr_idx): | ... | @@ -1484,6 +1484,7 @@ def measures_wadley(name, s, thr_idx): |
1484 | if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] : | 1484 | if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] : |
1485 | atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] | 1485 | atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] |
1486 | atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | 1486 | atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] |
1487 | + atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | ||
1487 | if len(atom_c1p) > 1: | 1488 | if len(atom_c1p) > 1: |
1488 | for atom in res: | 1489 | for atom in res: |
1489 | if "C1'" in atom.get_fullname(): | 1490 | if "C1'" in atom.get_fullname(): |
... | @@ -2454,8 +2455,8 @@ def gmm_hrna(): | ... | @@ -2454,8 +2455,8 @@ def gmm_hrna(): |
2454 | c1p_b1 = list(df["C1'-B1"][~ np.isnan(df["C1'-B1"])]) | 2455 | c1p_b1 = list(df["C1'-B1"][~ np.isnan(df["C1'-B1"])]) |
2455 | b1_b2 = list(df["B1-B2"][~ np.isnan(df["B1-B2"])]) | 2456 | b1_b2 = list(df["B1-B2"][~ np.isnan(df["B1-B2"])]) |
2456 | 2457 | ||
2457 | - os.makedirs(runDir + "/results/figures/HiRE-RNA/distances/", exist_ok=True) | 2458 | + os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/distances/", exist_ok=True) |
2458 | - os.chdir(runDir + "/results/figures/HiRE-RNA/distances/") | 2459 | + os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/distances/") |
2459 | 2460 | ||
2460 | GMM_histo(o5p_c5p, "O5'-C5'") | 2461 | GMM_histo(o5p_c5p, "O5'-C5'") |
2461 | GMM_histo(b1_b2, "B1-B2") | 2462 | GMM_histo(b1_b2, "B1-B2") |
... | @@ -2476,7 +2477,7 @@ def gmm_hrna(): | ... | @@ -2476,7 +2477,7 @@ def gmm_hrna(): |
2476 | axes.set_ylim(0, 100) | 2477 | axes.set_ylim(0, 100) |
2477 | plt.xlabel("Distance (Angström)") | 2478 | plt.xlabel("Distance (Angström)") |
2478 | plt.title("GMM des distances entre atomes HiRE-RNA") | 2479 | plt.title("GMM des distances entre atomes HiRE-RNA") |
2479 | - plt.savefig(runDir + "/results/figures/HiRE-RNA/distances/GMM des distances entre atomes HiRE-RNA.png") | 2480 | + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM des distances entre atomes HiRE-RNA.png") |
2480 | plt.close() | 2481 | plt.close() |
2481 | 2482 | ||
2482 | # Angles | 2483 | # Angles |
... | @@ -2491,8 +2492,8 @@ def gmm_hrna(): | ... | @@ -2491,8 +2492,8 @@ def gmm_hrna(): |
2491 | c4p_c1p_b1 = list(df["C4'-C1'-B1"][~ np.isnan(df["C4'-C1'-B1"])]) | 2492 | c4p_c1p_b1 = list(df["C4'-C1'-B1"][~ np.isnan(df["C4'-C1'-B1"])]) |
2492 | c1p_b1_b2 = list(df["C1'-B1-B2"][~ np.isnan(df["C1'-B1-B2"])]) | 2493 | c1p_b1_b2 = list(df["C1'-B1-B2"][~ np.isnan(df["C1'-B1-B2"])]) |
2493 | 2494 | ||
2494 | - os.makedirs(runDir + "/results/figures/HiRE-RNA/distances/", exist_ok=True) | 2495 | + os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/distances/", exist_ok=True) |
2495 | - os.chdir(runDir + "/results/figures/HiRE-RNA/distances/") | 2496 | + os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/distances/") |
2496 | 2497 | ||
2497 | GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True) | 2498 | GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True) |
2498 | GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True) | 2499 | GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True) |
... | @@ -2515,7 +2516,7 @@ def gmm_hrna(): | ... | @@ -2515,7 +2516,7 @@ def gmm_hrna(): |
2515 | axes.set_ylim(0, 100) | 2516 | axes.set_ylim(0, 100) |
2516 | plt.xlabel("Angle (Degré)") | 2517 | plt.xlabel("Angle (Degré)") |
2517 | plt.title("GMM des angles entre atomes HiRE-RNA") | 2518 | plt.title("GMM des angles entre atomes HiRE-RNA") |
2518 | - plt.savefig(runDir + "/results/figures/HiRE-RNA/angles/GMM des angles entre atomes HiRE-RNA.png") | 2519 | + plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM des angles entre atomes HiRE-RNA.png") |
2519 | plt.close() | 2520 | plt.close() |
2520 | 2521 | ||
2521 | # Torsions | 2522 | # Torsions |
... | @@ -2530,8 +2531,8 @@ def gmm_hrna(): | ... | @@ -2530,8 +2531,8 @@ def gmm_hrna(): |
2530 | c4_psuiv_o5suiv_c5suiv = list(df["C4'-P°-O5'°-C5'°"][~ np.isnan(df["C4'-P°-O5'°-C5'°"])]) | 2531 | c4_psuiv_o5suiv_c5suiv = list(df["C4'-P°-O5'°-C5'°"][~ np.isnan(df["C4'-P°-O5'°-C5'°"])]) |
2531 | c1_c4_psuiv_o5suiv = list(df["C1'-C4'-P°-O5'°"][~ np.isnan(df["C1'-C4'-P°-O5'°"])]) | 2532 | c1_c4_psuiv_o5suiv = list(df["C1'-C4'-P°-O5'°"][~ np.isnan(df["C1'-C4'-P°-O5'°"])]) |
2532 | 2533 | ||
2533 | - os.makedirs(runDir + "/results/figures/HiRE-RNA/torsions/", exist_ok=True) | 2534 | + os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True) |
2534 | - os.chdir(runDir + "/results/figures/HiRE-RNA/torsions/") | 2535 | + os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/torsions/") |
2535 | 2536 | ||
2536 | GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True) | 2537 | GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True) |
2537 | GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True) | 2538 | GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True) |
... | @@ -2945,10 +2946,10 @@ if __name__ == "__main__": | ... | @@ -2945,10 +2946,10 @@ if __name__ == "__main__": |
2945 | # Define the tasks | 2946 | # Define the tasks |
2946 | joblist = [] | 2947 | joblist = [] |
2947 | 2948 | ||
2948 | - # Do eta/theta plots | 2949 | + # # Do eta/theta plots |
2949 | - if n_unmapped_chains and DO_WADLEY_ANALYSIS: | 2950 | + # if n_unmapped_chains and DO_WADLEY_ANALYSIS: |
2950 | - joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) | 2951 | + # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) |
2951 | - joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) | 2952 | + # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) |
2952 | 2953 | ||
2953 | # Do distance matrices for each family excl. LSU/SSU (will be processed later) | 2954 | # Do distance matrices for each family excl. LSU/SSU (will be processed later) |
2954 | if DO_AVG_DISTANCE_MATRIX: | 2955 | if DO_AVG_DISTANCE_MATRIX: |
... | @@ -2963,18 +2964,17 @@ if __name__ == "__main__": | ... | @@ -2963,18 +2964,17 @@ if __name__ == "__main__": |
2963 | joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) | 2964 | joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) |
2964 | joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) | 2965 | joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) |
2965 | 2966 | ||
2966 | - # Do general family statistics | 2967 | + # # Do general family statistics |
2967 | - joblist.append(Job(function=stats_len)) # Computes figures about chain lengths | 2968 | + # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths |
2968 | - joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) | 2969 | + # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) |
2969 | - for f in famlist: | 2970 | + # for f in famlist: |
2970 | - joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) | 2971 | + # joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) |
2971 | - if f not in ignored: | 2972 | + # if f not in ignored: |
2972 | - joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) | 2973 | + # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) |
2973 | 2974 | ||
2974 | # Do geometric measures on all chains | 2975 | # Do geometric measures on all chains |
2975 | if n_unmapped_chains: | 2976 | if n_unmapped_chains: |
2976 | os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) | 2977 | os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) |
2977 | - os.makedirs(runDir+"/results/geometry/all-atoms/angles/", exist_ok=True) | ||
2978 | f_prec = os.listdir(path_to_3D_data + "rna_only")[0] | 2978 | f_prec = os.listdir(path_to_3D_data + "rna_only")[0] |
2979 | for f in os.listdir(path_to_3D_data + "rna_only"): | 2979 | for f in os.listdir(path_to_3D_data + "rna_only"): |
2980 | joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances | 2980 | joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances |
... | @@ -2992,7 +2992,7 @@ if __name__ == "__main__": | ... | @@ -2992,7 +2992,7 @@ if __name__ == "__main__": |
2992 | 2992 | ||
2993 | #exit() | 2993 | #exit() |
2994 | 2994 | ||
2995 | - # process_jobs(joblist) | 2995 | + process_jobs(joblist) |
2996 | 2996 | ||
2997 | # Now process the memory-heavy tasks family by family | 2997 | # Now process the memory-heavy tasks family by family |
2998 | if DO_AVG_DISTANCE_MATRIX: | 2998 | if DO_AVG_DISTANCE_MATRIX: |
... | @@ -3016,15 +3016,15 @@ if __name__ == "__main__": | ... | @@ -3016,15 +3016,15 @@ if __name__ == "__main__": |
3016 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) | 3016 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) |
3017 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) | 3017 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) |
3018 | joblist = [] | 3018 | joblist = [] |
3019 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) | 3019 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) |
3020 | - if DO_HIRE_RNA_MEASURES: | 3020 | + # if DO_HIRE_RNA_MEASURES: |
3021 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) | 3021 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) |
3022 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) | 3022 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) |
3023 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) | 3023 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) |
3024 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) | 3024 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) |
3025 | if DO_WADLEY_ANALYSIS: | 3025 | if DO_WADLEY_ANALYSIS: |
3026 | joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) | 3026 | joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) |
3027 | - joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv'))) | 3027 | + # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv'))) |
3028 | process_jobs(joblist) | 3028 | process_jobs(joblist) |
3029 | joblist = [] | 3029 | joblist = [] |
3030 | joblist.append(Job(function=gmm_aa_dists, args=())) | 3030 | joblist.append(Job(function=gmm_aa_dists, args=())) | ... | ... |
-
Please register or login to post a comment