corrected basepairs angles

Louis BECQUEY
Commit b334af48c35d9afafbd9c575df7f55951ecec602 b334af48 1 parent ec43d66b
Showing 3 changed files with 147 additions and 43 deletions
known_issues.txt
known_issues_reasons.txt
statistics.py
--- a/known_issues.txt
View file @b334af4
+++ b/known_issues.txt
View file @b334af4
@@ -1710,3 +1710,25 @@
 6pmi_1_3
 6pmj_1_3
 5hjz_1_C
+7nrc_1_SM
+7nrc_1_SN
+7am2_1_R1
+7k5l_1_R
+7b5k_1_X
+7d8c_1_C
+7m4y_1_V
+7m4x_1_V
+7b5k_1_Z
+7m4u_1_A
+7n06_1_G
+7n06_1_H
+7n06_1_I
+7n06_1_J
+7n06_1_K
+7n06_1_L
+7n33_1_G
+7n33_1_H
+7n33_1_I
+7n33_1_J
+7n33_1_K
+7n33_1_L
--- a/known_issues_reasons.txt
View file @b334af4
+++ b/known_issues_reasons.txt
View file @b334af4
@@ -5134,3 +5134,69 @@ Sequence is too short. (< 5 resolved nts)
 5hjz_1_C
 Sequence is too short. (< 5 resolved nts)
+7nrc_1_SM
+Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7nrc_1_SN
+Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7am2_1_R1
+Sequence is too short. (< 5 resolved nts)
+
+7k5l_1_R
+Sequence is too short. (< 5 resolved nts)
+
+7b5k_1_X
+Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7d8c_1_C
+Sequence is too short. (< 5 resolved nts)
+
+7m4y_1_V
+Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7m4x_1_V
+Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7b5k_1_Z
+Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7m4u_1_A
+Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7n06_1_G
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_H
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_I
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_J
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_K
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_L
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_G
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_H
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_I
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_J
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_K
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_L
+Sequence is too short. (< 5 resolved nts)
+
--- a/statistics.py
View file @b334af4
+++ b/statistics.py
View file @b334af4
@@ -1403,29 +1403,45 @@ def basepair_apex_distance(res, pair):
 def basepair_flat_angle(res, pair):
     """
-    measurement of the plane angles formed by the vectors C1-> B1 of the paired nucleotides
+    measurement of the plane angles formed by the vectors C1->B1 of the paired nucleotides
     """
     if res.get_resname()=='A' or res.get_resname()=='G' or res.get_resname()=='C' or res.get_resname()=='U' :
         atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] 
         atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
-        atom_b1_res=pos_b1(res)
+        atom_b1_res = pos_b1(res)
-        c4_res=Vector(atom_c4_res[0])
+        a1_res = Vector(atom_c4_res[0])
-        c1_res=Vector(atom_c1p_res[0])
+        a2_res = Vector(atom_c1p_res[0])
-        b1_res=Vector(atom_b1_res)
+        a3_res = Vector(atom_b1_res[0])
-        if pair.get_resname()=='A' or pair.get_resname()=='G' or pair.get_resname()=='C' or pair.get_resname()=='U' :
+    if res.get_resname()=='C' or res.get_resname()=='U' :
-            atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ]
+        atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
-            atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
+        atom_b1_res = pos_b1(res)
-            atom_b1_pair=pos_b1(pair)
+        atom_b2_res = pos_b2(res)
-            c4_pair=Vector(atom_c4_pair[0])
+        a1_res = Vector(atom_c1p_res[0])
-            c1_pair=Vector(atom_c1p_pair[0])
+        a2_res = Vector(atom_b1_res[0])
-            b1_pair=Vector(atom_b1_pair)
+        a3_res = Vector(atom_b2_res[0])
-            #we calculate the 4 plane angles including these vectors
+        
-            
+    if pair.get_resname()=='A' or pair.get_resname()=='G' or pair.get_resname()=='C' or pair.get_resname()=='U' :
-            a=calc_angle(c4_res, c1_res, b1_res)*(180/np.pi)
+        atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ]
-            b=calc_angle(c1_res, b1_res, b1_pair)*(180/np.pi)
+        atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
-            c=calc_angle(b1_res, b1_pair, c1_pair)*(180/np.pi)
+        atom_b1_pair = pos_b1(pair)
-            d=calc_angle(b1_pair, c1_pair, c4_pair)*(180/np.pi)
+        a1_pair = Vector(atom_c4_pair[0])
-    angles=[a, b, c, d]
+        a2_pair = Vector(atom_c1p_pair[0])
+        a3_pair = Vector(atom_b1_pair)
+    if pair.get_resname()=='C' or pair.get_resname()=='U' :
+        atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
+        atom_b1_pair = pos_b1(pair)
+        atom_b2_pair = pos_b2(pair)
+        a1_pair = Vector(atom_c1p_pair[0])
+        a2_pair = Vector(atom_b1_pair[0])
+        a3_pair = Vector(atom_b2_pair[0])
+
+    # we calculate the 4 plane angles including these vectors
+    
+    a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi)
+    b = calc_angle(a2_res, a3_res, a3_pair)*(180/np.pi)
+    c = calc_angle(a3_res, a3_pair, a2_pair)*(180/np.pi)
+    d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi)
+    angles = [a, b, c, d]
     return angles
 @trace_unhandled_exceptions
@@ -1465,7 +1481,7 @@ def measures_wadley(name, s, thr_idx):
     """
     # do not recompute something already computed
-    if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley '+name+'.csv') and
+    if (path.isfile(runDir + '/results/geometry/Pyle/angles/angles_plans_wadley ' + name + '.csv') and
         path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley " + name + ".csv")):
         return
@@ -2789,6 +2805,7 @@ def list_chains_in_dir(ld):
             dictionnaire[pdb_id] = liste_chaines
     return dictionnaire
+@trace_unhandled_exceptions
 def concat_dataframes(fpath, outfilename):
     """
     Concatenates the dataframes containing measures 
@@ -2946,10 +2963,10 @@ if __name__ == "__main__":
     # Define the tasks
     joblist = []
-    # # Do eta/theta plots
+    # Do eta/theta plots
-    # if n_unmapped_chains and DO_WADLEY_ANALYSIS:    
+    if n_unmapped_chains and DO_WADLEY_ANALYSIS:    
-    #     joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr)))
+        joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr)))
-    #     joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr)))
+        joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr)))
     # Do distance matrices for each family excl. LSU/SSU (will be processed later)
     if DO_AVG_DISTANCE_MATRIX:  
@@ -2964,13 +2981,13 @@ if __name__ == "__main__":
             joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False)))
             joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False)))
-    # # Do general family statistics
+    # Do general family statistics
-    # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths
+    joblist.append(Job(function=stats_len)) # Computes figures about chain lengths
-    # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families)
+    joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families)
-    # for f in famlist:
+    for f in famlist:
-    #     joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family)
+        joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database (intra-chain basepair types within a family)
-    #     if f not in ignored:
+        if f not in ignored:
-    #         joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families)
+            joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families)
     # Do geometric measures on all chains
     if n_unmapped_chains:
@@ -3008,23 +3025,23 @@ if __name__ == "__main__":
     # finish the work after the parallel portions
-    # per_chain_stats()   # per chain base frequencies en basepair types
+    per_chain_stats()   # per chain base frequencies en basepair types
-    # seq_idty()          # identity matrices from pre-computed .npy matrices
+    seq_idty()          # identity matrices from pre-computed .npy matrices
-    # stats_pairs()
+    stats_pairs()
     if n_unmapped_chains:
-        # general_stats()
+        general_stats()
         os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True)
         os.makedirs(runDir+"/results/geometry/json/", exist_ok=True)
         joblist = []
-        # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')))
+        joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')))
-        # if DO_HIRE_RNA_MEASURES:
+        if DO_HIRE_RNA_MEASURES:
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
+            joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv')))
+            joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv')))
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv')))
+            joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv')))
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv')))
+            joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv')))
         if DO_WADLEY_ANALYSIS:
             joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv')))
-            # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv')))
+            joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'angles_plans_wadley.csv')))
         process_jobs(joblist)
         joblist = []
         joblist.append(Job(function=gmm_aa_dists, args=()))
@@ -3036,4 +3053,3 @@ if __name__ == "__main__":
             joblist.append(Job(function=gmm_wadley, args=()))
         if len(joblist):
             process_jobs(joblist)
-    
\ No newline at end of file