Showing
3 changed files
with
147 additions
and
43 deletions
... | @@ -1710,3 +1710,25 @@ | ... | @@ -1710,3 +1710,25 @@ |
1710 | 6pmi_1_3 | 1710 | 6pmi_1_3 |
1711 | 6pmj_1_3 | 1711 | 6pmj_1_3 |
1712 | 5hjz_1_C | 1712 | 5hjz_1_C |
1713 | +7nrc_1_SM | ||
1714 | +7nrc_1_SN | ||
1715 | +7am2_1_R1 | ||
1716 | +7k5l_1_R | ||
1717 | +7b5k_1_X | ||
1718 | +7d8c_1_C | ||
1719 | +7m4y_1_V | ||
1720 | +7m4x_1_V | ||
1721 | +7b5k_1_Z | ||
1722 | +7m4u_1_A | ||
1723 | +7n06_1_G | ||
1724 | +7n06_1_H | ||
1725 | +7n06_1_I | ||
1726 | +7n06_1_J | ||
1727 | +7n06_1_K | ||
1728 | +7n06_1_L | ||
1729 | +7n33_1_G | ||
1730 | +7n33_1_H | ||
1731 | +7n33_1_I | ||
1732 | +7n33_1_J | ||
1733 | +7n33_1_K | ||
1734 | +7n33_1_L | ... | ... |
... | @@ -5134,3 +5134,69 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -5134,3 +5134,69 @@ Sequence is too short. (< 5 resolved nts) |
5134 | 5hjz_1_C | 5134 | 5hjz_1_C |
5135 | Sequence is too short. (< 5 resolved nts) | 5135 | Sequence is too short. (< 5 resolved nts) |
5136 | 5136 | ||
5137 | +7nrc_1_SM | ||
5138 | +Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5139 | + | ||
5140 | +7nrc_1_SN | ||
5141 | +Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5142 | + | ||
5143 | +7am2_1_R1 | ||
5144 | +Sequence is too short. (< 5 resolved nts) | ||
5145 | + | ||
5146 | +7k5l_1_R | ||
5147 | +Sequence is too short. (< 5 resolved nts) | ||
5148 | + | ||
5149 | +7b5k_1_X | ||
5150 | +Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5151 | + | ||
5152 | +7d8c_1_C | ||
5153 | +Sequence is too short. (< 5 resolved nts) | ||
5154 | + | ||
5155 | +7m4y_1_V | ||
5156 | +Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5157 | + | ||
5158 | +7m4x_1_V | ||
5159 | +Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5160 | + | ||
5161 | +7b5k_1_Z | ||
5162 | +Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5163 | + | ||
5164 | +7m4u_1_A | ||
5165 | +Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5166 | + | ||
5167 | +7n06_1_G | ||
5168 | +Sequence is too short. (< 5 resolved nts) | ||
5169 | + | ||
5170 | +7n06_1_H | ||
5171 | +Sequence is too short. (< 5 resolved nts) | ||
5172 | + | ||
5173 | +7n06_1_I | ||
5174 | +Sequence is too short. (< 5 resolved nts) | ||
5175 | + | ||
5176 | +7n06_1_J | ||
5177 | +Sequence is too short. (< 5 resolved nts) | ||
5178 | + | ||
5179 | +7n06_1_K | ||
5180 | +Sequence is too short. (< 5 resolved nts) | ||
5181 | + | ||
5182 | +7n06_1_L | ||
5183 | +Sequence is too short. (< 5 resolved nts) | ||
5184 | + | ||
5185 | +7n33_1_G | ||
5186 | +Sequence is too short. (< 5 resolved nts) | ||
5187 | + | ||
5188 | +7n33_1_H | ||
5189 | +Sequence is too short. (< 5 resolved nts) | ||
5190 | + | ||
5191 | +7n33_1_I | ||
5192 | +Sequence is too short. (< 5 resolved nts) | ||
5193 | + | ||
5194 | +7n33_1_J | ||
5195 | +Sequence is too short. (< 5 resolved nts) | ||
5196 | + | ||
5197 | +7n33_1_K | ||
5198 | +Sequence is too short. (< 5 resolved nts) | ||
5199 | + | ||
5200 | +7n33_1_L | ||
5201 | +Sequence is too short. (< 5 resolved nts) | ||
5202 | + | ... | ... |
... | @@ -1403,29 +1403,45 @@ def basepair_apex_distance(res, pair): | ... | @@ -1403,29 +1403,45 @@ def basepair_apex_distance(res, pair): |
1403 | 1403 | ||
1404 | def basepair_flat_angle(res, pair): | 1404 | def basepair_flat_angle(res, pair): |
1405 | """ | 1405 | """ |
1406 | - measurement of the plane angles formed by the vectors C1-> B1 of the paired nucleotides | 1406 | + measurement of the plane angles formed by the vectors C1->B1 of the paired nucleotides |
1407 | """ | 1407 | """ |
1408 | if res.get_resname()=='A' or res.get_resname()=='G' or res.get_resname()=='C' or res.get_resname()=='U' : | 1408 | if res.get_resname()=='A' or res.get_resname()=='G' or res.get_resname()=='C' or res.get_resname()=='U' : |
1409 | atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | 1409 | atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] |
1410 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | 1410 | atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] |
1411 | - atom_b1_res=pos_b1(res) | 1411 | + atom_b1_res = pos_b1(res) |
1412 | - c4_res=Vector(atom_c4_res[0]) | 1412 | + a1_res = Vector(atom_c4_res[0]) |
1413 | - c1_res=Vector(atom_c1p_res[0]) | 1413 | + a2_res = Vector(atom_c1p_res[0]) |
1414 | - b1_res=Vector(atom_b1_res) | 1414 | + a3_res = Vector(atom_b1_res[0]) |
1415 | - if pair.get_resname()=='A' or pair.get_resname()=='G' or pair.get_resname()=='C' or pair.get_resname()=='U' : | 1415 | + if res.get_resname()=='C' or res.get_resname()=='U' : |
1416 | - atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] | 1416 | + atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] |
1417 | - atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] | 1417 | + atom_b1_res = pos_b1(res) |
1418 | - atom_b1_pair=pos_b1(pair) | 1418 | + atom_b2_res = pos_b2(res) |
1419 | - c4_pair=Vector(atom_c4_pair[0]) | 1419 | + a1_res = Vector(atom_c1p_res[0]) |
1420 | - c1_pair=Vector(atom_c1p_pair[0]) | 1420 | + a2_res = Vector(atom_b1_res[0]) |
1421 | - b1_pair=Vector(atom_b1_pair) | 1421 | + a3_res = Vector(atom_b2_res[0]) |
1422 | - #we calculate the 4 plane angles including these vectors | 1422 | + |
1423 | - | 1423 | + if pair.get_resname()=='A' or pair.get_resname()=='G' or pair.get_resname()=='C' or pair.get_resname()=='U' : |
1424 | - a=calc_angle(c4_res, c1_res, b1_res)*(180/np.pi) | 1424 | + atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] |
1425 | - b=calc_angle(c1_res, b1_res, b1_pair)*(180/np.pi) | 1425 | + atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] |
1426 | - c=calc_angle(b1_res, b1_pair, c1_pair)*(180/np.pi) | 1426 | + atom_b1_pair = pos_b1(pair) |
1427 | - d=calc_angle(b1_pair, c1_pair, c4_pair)*(180/np.pi) | 1427 | + a1_pair = Vector(atom_c4_pair[0]) |
1428 | - angles=[a, b, c, d] | 1428 | + a2_pair = Vector(atom_c1p_pair[0]) |
1429 | + a3_pair = Vector(atom_b1_pair) | ||
1430 | + if pair.get_resname()=='C' or pair.get_resname()=='U' : | ||
1431 | + atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] | ||
1432 | + atom_b1_pair = pos_b1(pair) | ||
1433 | + atom_b2_pair = pos_b2(pair) | ||
1434 | + a1_pair = Vector(atom_c1p_pair[0]) | ||
1435 | + a2_pair = Vector(atom_b1_pair[0]) | ||
1436 | + a3_pair = Vector(atom_b2_pair[0]) | ||
1437 | + | ||
1438 | + # we calculate the 4 plane angles including these vectors | ||
1439 | + | ||
1440 | + a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) | ||
1441 | + b = calc_angle(a2_res, a3_res, a3_pair)*(180/np.pi) | ||
1442 | + c = calc_angle(a3_res, a3_pair, a2_pair)*(180/np.pi) | ||
1443 | + d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) | ||
1444 | + angles = [a, b, c, d] | ||
1429 | return angles | 1445 | return angles |
1430 | 1446 | ||
1431 | @trace_unhandled_exceptions | 1447 | @trace_unhandled_exceptions |
... | @@ -1465,7 +1481,7 @@ def measures_wadley(name, s, thr_idx): | ... | @@ -1465,7 +1481,7 @@ def measures_wadley(name, s, thr_idx): |
1465 | """ | 1481 | """ |
1466 | 1482 | ||
1467 | # do not recompute something already computed | 1483 | # do not recompute something already computed |
1468 | - if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley '+name+'.csv') and | 1484 | + if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley ' + name + '.csv') and |
1469 | path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")): | 1485 | path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")): |
1470 | return | 1486 | return |
1471 | 1487 | ||
... | @@ -2789,6 +2805,7 @@ def list_chains_in_dir(ld): | ... | @@ -2789,6 +2805,7 @@ def list_chains_in_dir(ld): |
2789 | dictionnaire[pdb_id] = liste_chaines | 2805 | dictionnaire[pdb_id] = liste_chaines |
2790 | return dictionnaire | 2806 | return dictionnaire |
2791 | 2807 | ||
2808 | +@trace_unhandled_exceptions | ||
2792 | def concat_dataframes(fpath, outfilename): | 2809 | def concat_dataframes(fpath, outfilename): |
2793 | """ | 2810 | """ |
2794 | Concatenates the dataframes containing measures | 2811 | Concatenates the dataframes containing measures |
... | @@ -2946,10 +2963,10 @@ if __name__ == "__main__": | ... | @@ -2946,10 +2963,10 @@ if __name__ == "__main__": |
2946 | # Define the tasks | 2963 | # Define the tasks |
2947 | joblist = [] | 2964 | joblist = [] |
2948 | 2965 | ||
2949 | - # # Do eta/theta plots | 2966 | + # Do eta/theta plots |
2950 | - # if n_unmapped_chains and DO_WADLEY_ANALYSIS: | 2967 | + if n_unmapped_chains and DO_WADLEY_ANALYSIS: |
2951 | - # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) | 2968 | + joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) |
2952 | - # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) | 2969 | + joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) |
2953 | 2970 | ||
2954 | # Do distance matrices for each family excl. LSU/SSU (will be processed later) | 2971 | # Do distance matrices for each family excl. LSU/SSU (will be processed later) |
2955 | if DO_AVG_DISTANCE_MATRIX: | 2972 | if DO_AVG_DISTANCE_MATRIX: |
... | @@ -2964,13 +2981,13 @@ if __name__ == "__main__": | ... | @@ -2964,13 +2981,13 @@ if __name__ == "__main__": |
2964 | joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) | 2981 | joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) |
2965 | joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) | 2982 | joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) |
2966 | 2983 | ||
2967 | - # # Do general family statistics | 2984 | + # Do general family statistics |
2968 | - # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths | 2985 | + joblist.append(Job(function=stats_len)) # Computes figures about chain lengths |
2969 | - # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) | 2986 | + joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) |
2970 | - # for f in famlist: | 2987 | + for f in famlist: |
2971 | - # joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) | 2988 | + joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family) |
2972 | - # if f not in ignored: | 2989 | + if f not in ignored: |
2973 | - # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) | 2990 | + joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) |
2974 | 2991 | ||
2975 | # Do geometric measures on all chains | 2992 | # Do geometric measures on all chains |
2976 | if n_unmapped_chains: | 2993 | if n_unmapped_chains: |
... | @@ -3008,23 +3025,23 @@ if __name__ == "__main__": | ... | @@ -3008,23 +3025,23 @@ if __name__ == "__main__": |
3008 | 3025 | ||
3009 | # finish the work after the parallel portions | 3026 | # finish the work after the parallel portions |
3010 | 3027 | ||
3011 | - # per_chain_stats() # per chain base frequencies en basepair types | 3028 | + per_chain_stats() # per chain base frequencies en basepair types |
3012 | - # seq_idty() # identity matrices from pre-computed .npy matrices | 3029 | + seq_idty() # identity matrices from pre-computed .npy matrices |
3013 | - # stats_pairs() | 3030 | + stats_pairs() |
3014 | if n_unmapped_chains: | 3031 | if n_unmapped_chains: |
3015 | - # general_stats() | 3032 | + general_stats() |
3016 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) | 3033 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) |
3017 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) | 3034 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) |
3018 | joblist = [] | 3035 | joblist = [] |
3019 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) | 3036 | + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) |
3020 | - # if DO_HIRE_RNA_MEASURES: | 3037 | + if DO_HIRE_RNA_MEASURES: |
3021 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) | 3038 | + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) |
3022 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) | 3039 | + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) |
3023 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) | 3040 | + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) |
3024 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) | 3041 | + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) |
3025 | if DO_WADLEY_ANALYSIS: | 3042 | if DO_WADLEY_ANALYSIS: |
3026 | joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) | 3043 | joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) |
3027 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv'))) | 3044 | + joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv'))) |
3028 | process_jobs(joblist) | 3045 | process_jobs(joblist) |
3029 | joblist = [] | 3046 | joblist = [] |
3030 | joblist.append(Job(function=gmm_aa_dists, args=())) | 3047 | joblist.append(Job(function=gmm_aa_dists, args=())) |
... | @@ -3036,4 +3053,3 @@ if __name__ == "__main__": | ... | @@ -3036,4 +3053,3 @@ if __name__ == "__main__": |
3036 | joblist.append(Job(function=gmm_wadley, args=())) | 3053 | joblist.append(Job(function=gmm_wadley, args=())) |
3037 | if len(joblist): | 3054 | if len(joblist): |
3038 | process_jobs(joblist) | 3055 | process_jobs(joblist) |
3039 | - | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment