solved issue if duplicate nt_resnum at the end of the chain

Louis BECQUEY
Commit 3b5ed451d74419cfdc7bdaa4489364e8b1ae4d78 3b5ed451 1 parent a4b2c505
Showing 1 changed file with 32 additions and 25 deletions
RNAnet.py
--- a/RNAnet.py
View file @3b5ed45
+++ b/RNAnet.py
View file @3b5ed45
@@ -273,32 +273,39 @@ class Chain:
             if self.mapping is not None:
                 self.mapping.log(f"Shifting nt_resnum numbering because of {n_dup} duplicate residues {df.iloc[i,1]}")
 
-             if df.iloc[i,1] == df.iloc[i-1,1] and df.iloc[index_last_dup + 1, 1] - 1 > df.iloc[index_last_dup, 1]:
-                 # The redundant nts are consecutive in the chain (at the begining at least), and there is a gap at the end
- 
-                 if duplicates.iloc[n_dup-1, 0] - duplicates.iloc[0, 0] + 1 == n_dup:
-                     # They are all contiguous in the chain
-                     # 4v9n-DA case (and similar ones) : 610-611-611A-611B-611C-611D-611E-611F-611G-617-618...
-                     # there is a redundancy (611) followed by a gap (611-617). 
-                     # We want the redundancy to fill the gap.
-                     df.iloc[i:i+n_dup-1, 1] += 1
+             try:
+                 if i > 0 and index_last_dup +1 < len(df.index) and df.iloc[i,1] == df.iloc[i-1,1] and df.iloc[index_last_dup + 1, 1] - 1 > df.iloc[index_last_dup, 1]:
+                     # The redundant nts are consecutive in the chain (at the begining at least), and there is a gap at the end
+ 
+                     if duplicates.iloc[n_dup-1, 0] - duplicates.iloc[0, 0] + 1 == n_dup:
+                         # They are all contiguous in the chain
+                         # 4v9n-DA case (and similar ones) : 610-611-611A-611B-611C-611D-611E-611F-611G-617-618...
+                         # there is a redundancy (611) followed by a gap (611-617). 
+                         # We want the redundancy to fill the gap.
+                         df.iloc[i:i+n_dup-1, 1] += 1
+                     else:
+                         # We solve the problem continous component by continuous component
+                         for j in range(1, n_dup+1):
+                             if duplicates.iloc[j,0] == 1 + duplicates.iloc[j-1,0]: # continuous
+                                 df.iloc[i+j-1,1] += 1
+                             else:
+                                 break
+                 elif df.iloc[i,1] == df.iloc[i-1,1]:
+                     # Common 4v9q-DV case (and similar ones) : e.g. chains contains 17 and 17A which are both read 17 by DSSR.
+                     # Solution : we shift the numbering of 17A (to 18) and the following residues.
+                     df.iloc[i:, 1] += 1
                 else:
-                     # We solve the problem continous component by continuous component
-                     for j in range(1, n_dup+1):
-                         if duplicates.iloc[j,0] == 1 + duplicates.iloc[j-1,0]: # continuous
-                             df.iloc[i+j-1,1] += 1
-                         else:
-                             break
-             elif df.iloc[i,1] == df.iloc[i-1,1]:
-                 # Common 4v9q-DV case (and similar ones) : e.g. chains contains 17 and 17A which are both read 17 by DSSR.
-                 # Solution : we shift the numbering of 17A (to 18) and the following residues.
-                 df.iloc[i:, 1] += 1
-             else:
-                 # 4v9k-DA case (and similar ones) : the nt_id is not the full nt_resnum: ... 1629 > 1630 > 163B > 1631 > ...
-                 # Here the 163B is read 163 by DSSR, but there already is a residue 163.
-                 # Solution : set nt_resnum[i] to nt_resnum[i-1] + 1, and shift the following by 1.
-                 df.iloc[i, 1] = 1 + df.iloc[i-1, 1]
-                 df.iloc[i+1:, 1] += 1
+                     # 4v9k-DA case (and similar ones) : the nt_id is not the full nt_resnum: ... 1629 > 1630 > 163B > 1631 > ...
+                     # Here the 163B is read 163 by DSSR, but there already is a residue 163.
+                     # Solution : set nt_resnum[i] to nt_resnum[i-1] + 1, and shift the following by 1.
+                     df.iloc[i, 1] = 1 + df.iloc[i-1, 1]
+                     df.iloc[i+1:, 1] += 1
+             except:
+                 warn(f"Error with parsing of {self.chain_label} duplicate residue numbers. Ignoring it.")
+                 self.delete_me = True
+                 self.error_messages = f"Error with parsing of duplicate residues numbers."
+                 return None
+ 
 
         # Search for ligands at the end of the selection
         # Drop ligands detected as residues by DSSR, by detecting several markers