Louis BECQUEY

corrected basepairs angles

...@@ -1710,3 +1710,25 @@ ...@@ -1710,3 +1710,25 @@
1710 6pmi_1_3 1710 6pmi_1_3
1711 6pmj_1_3 1711 6pmj_1_3
1712 5hjz_1_C 1712 5hjz_1_C
1713 +7nrc_1_SM
1714 +7nrc_1_SN
1715 +7am2_1_R1
1716 +7k5l_1_R
1717 +7b5k_1_X
1718 +7d8c_1_C
1719 +7m4y_1_V
1720 +7m4x_1_V
1721 +7b5k_1_Z
1722 +7m4u_1_A
1723 +7n06_1_G
1724 +7n06_1_H
1725 +7n06_1_I
1726 +7n06_1_J
1727 +7n06_1_K
1728 +7n06_1_L
1729 +7n33_1_G
1730 +7n33_1_H
1731 +7n33_1_I
1732 +7n33_1_J
1733 +7n33_1_K
1734 +7n33_1_L
......
...@@ -5134,3 +5134,69 @@ Sequence is too short. (< 5 resolved nts) ...@@ -5134,3 +5134,69 @@ Sequence is too short. (< 5 resolved nts)
5134 5hjz_1_C 5134 5hjz_1_C
5135 Sequence is too short. (< 5 resolved nts) 5135 Sequence is too short. (< 5 resolved nts)
5136 5136
5137 +7nrc_1_SM
5138 +Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5139 +
5140 +7nrc_1_SN
5141 +Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5142 +
5143 +7am2_1_R1
5144 +Sequence is too short. (< 5 resolved nts)
5145 +
5146 +7k5l_1_R
5147 +Sequence is too short. (< 5 resolved nts)
5148 +
5149 +7b5k_1_X
5150 +Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5151 +
5152 +7d8c_1_C
5153 +Sequence is too short. (< 5 resolved nts)
5154 +
5155 +7m4y_1_V
5156 +Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5157 +
5158 +7m4x_1_V
5159 +Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5160 +
5161 +7b5k_1_Z
5162 +Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5163 +
5164 +7m4u_1_A
5165 +Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5166 +
5167 +7n06_1_G
5168 +Sequence is too short. (< 5 resolved nts)
5169 +
5170 +7n06_1_H
5171 +Sequence is too short. (< 5 resolved nts)
5172 +
5173 +7n06_1_I
5174 +Sequence is too short. (< 5 resolved nts)
5175 +
5176 +7n06_1_J
5177 +Sequence is too short. (< 5 resolved nts)
5178 +
5179 +7n06_1_K
5180 +Sequence is too short. (< 5 resolved nts)
5181 +
5182 +7n06_1_L
5183 +Sequence is too short. (< 5 resolved nts)
5184 +
5185 +7n33_1_G
5186 +Sequence is too short. (< 5 resolved nts)
5187 +
5188 +7n33_1_H
5189 +Sequence is too short. (< 5 resolved nts)
5190 +
5191 +7n33_1_I
5192 +Sequence is too short. (< 5 resolved nts)
5193 +
5194 +7n33_1_J
5195 +Sequence is too short. (< 5 resolved nts)
5196 +
5197 +7n33_1_K
5198 +Sequence is too short. (< 5 resolved nts)
5199 +
5200 +7n33_1_L
5201 +Sequence is too short. (< 5 resolved nts)
5202 +
......
...@@ -1403,29 +1403,45 @@ def basepair_apex_distance(res, pair): ...@@ -1403,29 +1403,45 @@ def basepair_apex_distance(res, pair):
1403 1403
1404 def basepair_flat_angle(res, pair): 1404 def basepair_flat_angle(res, pair):
1405 """ 1405 """
1406 - measurement of the plane angles formed by the vectors C1-> B1 of the paired nucleotides 1406 + measurement of the plane angles formed by the vectors C1->B1 of the paired nucleotides
1407 """ 1407 """
1408 if res.get_resname()=='A' or res.get_resname()=='G' or res.get_resname()=='C' or res.get_resname()=='U' : 1408 if res.get_resname()=='A' or res.get_resname()=='G' or res.get_resname()=='C' or res.get_resname()=='U' :
1409 atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] 1409 atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
1410 atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] 1410 atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1411 - atom_b1_res=pos_b1(res) 1411 + atom_b1_res = pos_b1(res)
1412 - c4_res=Vector(atom_c4_res[0]) 1412 + a1_res = Vector(atom_c4_res[0])
1413 - c1_res=Vector(atom_c1p_res[0]) 1413 + a2_res = Vector(atom_c1p_res[0])
1414 - b1_res=Vector(atom_b1_res) 1414 + a3_res = Vector(atom_b1_res[0])
1415 - if pair.get_resname()=='A' or pair.get_resname()=='G' or pair.get_resname()=='C' or pair.get_resname()=='U' : 1415 + if res.get_resname()=='C' or res.get_resname()=='U' :
1416 - atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] 1416 + atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1417 - atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] 1417 + atom_b1_res = pos_b1(res)
1418 - atom_b1_pair=pos_b1(pair) 1418 + atom_b2_res = pos_b2(res)
1419 - c4_pair=Vector(atom_c4_pair[0]) 1419 + a1_res = Vector(atom_c1p_res[0])
1420 - c1_pair=Vector(atom_c1p_pair[0]) 1420 + a2_res = Vector(atom_b1_res[0])
1421 - b1_pair=Vector(atom_b1_pair) 1421 + a3_res = Vector(atom_b2_res[0])
1422 - #we calculate the 4 plane angles including these vectors 1422 +
1423 - 1423 + if pair.get_resname()=='A' or pair.get_resname()=='G' or pair.get_resname()=='C' or pair.get_resname()=='U' :
1424 - a=calc_angle(c4_res, c1_res, b1_res)*(180/np.pi) 1424 + atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ]
1425 - b=calc_angle(c1_res, b1_res, b1_pair)*(180/np.pi) 1425 + atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
1426 - c=calc_angle(b1_res, b1_pair, c1_pair)*(180/np.pi) 1426 + atom_b1_pair = pos_b1(pair)
1427 - d=calc_angle(b1_pair, c1_pair, c4_pair)*(180/np.pi) 1427 + a1_pair = Vector(atom_c4_pair[0])
1428 - angles=[a, b, c, d] 1428 + a2_pair = Vector(atom_c1p_pair[0])
1429 + a3_pair = Vector(atom_b1_pair)
1430 + if pair.get_resname()=='C' or pair.get_resname()=='U' :
1431 + atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
1432 + atom_b1_pair = pos_b1(pair)
1433 + atom_b2_pair = pos_b2(pair)
1434 + a1_pair = Vector(atom_c1p_pair[0])
1435 + a2_pair = Vector(atom_b1_pair[0])
1436 + a3_pair = Vector(atom_b2_pair[0])
1437 +
1438 + # we calculate the 4 plane angles including these vectors
1439 +
1440 + a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi)
1441 + b = calc_angle(a2_res, a3_res, a3_pair)*(180/np.pi)
1442 + c = calc_angle(a3_res, a3_pair, a2_pair)*(180/np.pi)
1443 + d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi)
1444 + angles = [a, b, c, d]
1429 return angles 1445 return angles
1430 1446
1431 @trace_unhandled_exceptions 1447 @trace_unhandled_exceptions
...@@ -1465,7 +1481,7 @@ def measures_wadley(name, s, thr_idx): ...@@ -1465,7 +1481,7 @@ def measures_wadley(name, s, thr_idx):
1465 """ 1481 """
1466 1482
1467 # do not recompute something already computed 1483 # do not recompute something already computed
1468 - if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley '+name+'.csv') and 1484 + if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley ' + name + '.csv') and
1469 path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")): 1485 path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")):
1470 return 1486 return
1471 1487
...@@ -2789,6 +2805,7 @@ def list_chains_in_dir(ld): ...@@ -2789,6 +2805,7 @@ def list_chains_in_dir(ld):
2789 dictionnaire[pdb_id] = liste_chaines 2805 dictionnaire[pdb_id] = liste_chaines
2790 return dictionnaire 2806 return dictionnaire
2791 2807
2808 +@trace_unhandled_exceptions
2792 def concat_dataframes(fpath, outfilename): 2809 def concat_dataframes(fpath, outfilename):
2793 """ 2810 """
2794 Concatenates the dataframes containing measures 2811 Concatenates the dataframes containing measures
...@@ -2946,10 +2963,10 @@ if __name__ == "__main__": ...@@ -2946,10 +2963,10 @@ if __name__ == "__main__":
2946 # Define the tasks 2963 # Define the tasks
2947 joblist = [] 2964 joblist = []
2948 2965
2949 - # # Do eta/theta plots 2966 + # Do eta/theta plots
2950 - # if n_unmapped_chains and DO_WADLEY_ANALYSIS: 2967 + if n_unmapped_chains and DO_WADLEY_ANALYSIS:
2951 - # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) 2968 + joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr)))
2952 - # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) 2969 + joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr)))
2953 2970
2954 # Do distance matrices for each family excl. LSU/SSU (will be processed later) 2971 # Do distance matrices for each family excl. LSU/SSU (will be processed later)
2955 if DO_AVG_DISTANCE_MATRIX: 2972 if DO_AVG_DISTANCE_MATRIX:
...@@ -2964,13 +2981,13 @@ if __name__ == "__main__": ...@@ -2964,13 +2981,13 @@ if __name__ == "__main__":
2964 joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) 2981 joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False)))
2965 joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) 2982 joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False)))
2966 2983
2967 - # # Do general family statistics 2984 + # Do general family statistics
2968 - # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths 2985 + joblist.append(Job(function=stats_len)) # Computes figures about chain lengths
2969 - # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) 2986 + joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families)
2970 - # for f in famlist: 2987 + for f in famlist:
2971 - # joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) 2988 + joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family)
2972 - # if f not in ignored: 2989 + if f not in ignored:
2973 - # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) 2990 + joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families)
2974 2991
2975 # Do geometric measures on all chains 2992 # Do geometric measures on all chains
2976 if n_unmapped_chains: 2993 if n_unmapped_chains:
...@@ -3008,23 +3025,23 @@ if __name__ == "__main__": ...@@ -3008,23 +3025,23 @@ if __name__ == "__main__":
3008 3025
3009 # finish the work after the parallel portions 3026 # finish the work after the parallel portions
3010 3027
3011 - # per_chain_stats() # per chain base frequencies en basepair types 3028 + per_chain_stats() # per chain base frequencies en basepair types
3012 - # seq_idty() # identity matrices from pre-computed .npy matrices 3029 + seq_idty() # identity matrices from pre-computed .npy matrices
3013 - # stats_pairs() 3030 + stats_pairs()
3014 if n_unmapped_chains: 3031 if n_unmapped_chains:
3015 - # general_stats() 3032 + general_stats()
3016 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) 3033 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True)
3017 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) 3034 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True)
3018 joblist = [] 3035 joblist = []
3019 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) 3036 + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')))
3020 - # if DO_HIRE_RNA_MEASURES: 3037 + if DO_HIRE_RNA_MEASURES:
3021 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) 3038 + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
3022 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) 3039 + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv')))
3023 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) 3040 + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv')))
3024 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) 3041 + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv')))
3025 if DO_WADLEY_ANALYSIS: 3042 if DO_WADLEY_ANALYSIS:
3026 joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) 3043 joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv')))
3027 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv'))) 3044 + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv')))
3028 process_jobs(joblist) 3045 process_jobs(joblist)
3029 joblist = [] 3046 joblist = []
3030 joblist.append(Job(function=gmm_aa_dists, args=())) 3047 joblist.append(Job(function=gmm_aa_dists, args=()))
...@@ -3036,4 +3053,3 @@ if __name__ == "__main__": ...@@ -3036,4 +3053,3 @@ if __name__ == "__main__":
3036 joblist.append(Job(function=gmm_wadley, args=())) 3053 joblist.append(Job(function=gmm_wadley, args=()))
3037 if len(joblist): 3054 if len(joblist):
3038 process_jobs(joblist) 3055 process_jobs(joblist)
3039 -
...\ No newline at end of file ...\ No newline at end of file
......