solved issue if duplicate nt_resnum at the end of the chain

Louis BECQUEY
Commit 3b5ed451d74419cfdc7bdaa4489364e8b1ae4d78 3b5ed451 1 parent a4b2c505
Showing 1 changed file with 32 additions and 25 deletions
RNAnet.py
--- a/RNAnet.py
View file @3b5ed45
+++ b/RNAnet.py
View file @3b5ed45
@@ -273,32 +273,39 @@ class Chain:
             if self.mapping is not None:
                 self.mapping.log(f"Shifting nt_resnum numbering because of {n_dup} duplicate residues {df.iloc[i,1]}")
-            if df.iloc[i,1] == df.iloc[i-1,1] and df.iloc[index_last_dup + 1, 1] - 1 > df.iloc[index_last_dup, 1]:
+            try:
-                # The redundant nts are consecutive in the chain (at the begining at least), and there is a gap at the end
+                if i > 0 and index_last_dup +1 < len(df.index) and df.iloc[i,1] == df.iloc[i-1,1] and df.iloc[index_last_dup + 1, 1] - 1 > df.iloc[index_last_dup, 1]:
-
+                    # The redundant nts are consecutive in the chain (at the begining at least), and there is a gap at the end
-                if duplicates.iloc[n_dup-1, 0] - duplicates.iloc[0, 0] + 1 == n_dup:
+
-                    # They are all contiguous in the chain
+                    if duplicates.iloc[n_dup-1, 0] - duplicates.iloc[0, 0] + 1 == n_dup:
-                    # 4v9n-DA case (and similar ones) : 610-611-611A-611B-611C-611D-611E-611F-611G-617-618...
+                        # They are all contiguous in the chain
-                    # there is a redundancy (611) followed by a gap (611-617). 
+                        # 4v9n-DA case (and similar ones) : 610-611-611A-611B-611C-611D-611E-611F-611G-617-618...
-                    # We want the redundancy to fill the gap.
+                        # there is a redundancy (611) followed by a gap (611-617). 
-                    df.iloc[i:i+n_dup-1, 1] += 1
+                        # We want the redundancy to fill the gap.
+                        df.iloc[i:i+n_dup-1, 1] += 1
+                    else:
+                        # We solve the problem continous component by continuous component
+                        for j in range(1, n_dup+1):
+                            if duplicates.iloc[j,0] == 1 + duplicates.iloc[j-1,0]: # continuous
+                                df.iloc[i+j-1,1] += 1
+                            else:
+                                break
+                elif df.iloc[i,1] == df.iloc[i-1,1]:
+                    # Common 4v9q-DV case (and similar ones) : e.g. chains contains 17 and 17A which are both read 17 by DSSR.
+                    # Solution : we shift the numbering of 17A (to 18) and the following residues.
+                    df.iloc[i:, 1] += 1
                 else:
-                    # We solve the problem continous component by continuous component
+                    # 4v9k-DA case (and similar ones) : the nt_id is not the full nt_resnum: ... 1629 > 1630 > 163B > 1631 > ...
-                    for j in range(1, n_dup+1):
+                    # Here the 163B is read 163 by DSSR, but there already is a residue 163.
-                        if duplicates.iloc[j,0] == 1 + duplicates.iloc[j-1,0]: # continuous
+                    # Solution : set nt_resnum[i] to nt_resnum[i-1] + 1, and shift the following by 1.
-                            df.iloc[i+j-1,1] += 1
+                    df.iloc[i, 1] = 1 + df.iloc[i-1, 1]
-                        else:
+                    df.iloc[i+1:, 1] += 1
-                            break
+            except:
-            elif df.iloc[i,1] == df.iloc[i-1,1]:
+                warn(f"Error with parsing of {self.chain_label} duplicate residue numbers. Ignoring it.")
-                # Common 4v9q-DV case (and similar ones) : e.g. chains contains 17 and 17A which are both read 17 by DSSR.
+                self.delete_me = True
-                # Solution : we shift the numbering of 17A (to 18) and the following residues.
+                self.error_messages = f"Error with parsing of duplicate residues numbers."
-                df.iloc[i:, 1] += 1
+                return None
-            else:
+
-                # 4v9k-DA case (and similar ones) : the nt_id is not the full nt_resnum: ... 1629 > 1630 > 163B > 1631 > ...
-                # Here the 163B is read 163 by DSSR, but there already is a residue 163.
-                # Solution : set nt_resnum[i] to nt_resnum[i-1] + 1, and shift the following by 1.
-                df.iloc[i, 1] = 1 + df.iloc[i-1, 1]
-                df.iloc[i+1:, 1] += 1
         # Search for ligands at the end of the selection
         # Drop ligands detected as residues by DSSR, by detecting several markers