Louis BECQUEY

solved issue if duplicate nt_resnum at the end of the chain

Showing 1 changed file with 32 additions and 25 deletions
...@@ -273,32 +273,39 @@ class Chain: ...@@ -273,32 +273,39 @@ class Chain:
273 if self.mapping is not None: 273 if self.mapping is not None:
274 self.mapping.log(f"Shifting nt_resnum numbering because of {n_dup} duplicate residues {df.iloc[i,1]}") 274 self.mapping.log(f"Shifting nt_resnum numbering because of {n_dup} duplicate residues {df.iloc[i,1]}")
275 275
276 - if df.iloc[i,1] == df.iloc[i-1,1] and df.iloc[index_last_dup + 1, 1] - 1 > df.iloc[index_last_dup, 1]: 276 + try:
277 - # The redundant nts are consecutive in the chain (at the begining at least), and there is a gap at the end 277 + if i > 0 and index_last_dup +1 < len(df.index) and df.iloc[i,1] == df.iloc[i-1,1] and df.iloc[index_last_dup + 1, 1] - 1 > df.iloc[index_last_dup, 1]:
278 - 278 + # The redundant nts are consecutive in the chain (at the begining at least), and there is a gap at the end
279 - if duplicates.iloc[n_dup-1, 0] - duplicates.iloc[0, 0] + 1 == n_dup: 279 +
280 - # They are all contiguous in the chain 280 + if duplicates.iloc[n_dup-1, 0] - duplicates.iloc[0, 0] + 1 == n_dup:
281 - # 4v9n-DA case (and similar ones) : 610-611-611A-611B-611C-611D-611E-611F-611G-617-618... 281 + # They are all contiguous in the chain
282 - # there is a redundancy (611) followed by a gap (611-617). 282 + # 4v9n-DA case (and similar ones) : 610-611-611A-611B-611C-611D-611E-611F-611G-617-618...
283 - # We want the redundancy to fill the gap. 283 + # there is a redundancy (611) followed by a gap (611-617).
284 - df.iloc[i:i+n_dup-1, 1] += 1 284 + # We want the redundancy to fill the gap.
285 + df.iloc[i:i+n_dup-1, 1] += 1
286 + else:
287 + # We solve the problem continous component by continuous component
288 + for j in range(1, n_dup+1):
289 + if duplicates.iloc[j,0] == 1 + duplicates.iloc[j-1,0]: # continuous
290 + df.iloc[i+j-1,1] += 1
291 + else:
292 + break
293 + elif df.iloc[i,1] == df.iloc[i-1,1]:
294 + # Common 4v9q-DV case (and similar ones) : e.g. chains contains 17 and 17A which are both read 17 by DSSR.
295 + # Solution : we shift the numbering of 17A (to 18) and the following residues.
296 + df.iloc[i:, 1] += 1
285 else: 297 else:
286 - # We solve the problem continous component by continuous component 298 + # 4v9k-DA case (and similar ones) : the nt_id is not the full nt_resnum: ... 1629 > 1630 > 163B > 1631 > ...
287 - for j in range(1, n_dup+1): 299 + # Here the 163B is read 163 by DSSR, but there already is a residue 163.
288 - if duplicates.iloc[j,0] == 1 + duplicates.iloc[j-1,0]: # continuous 300 + # Solution : set nt_resnum[i] to nt_resnum[i-1] + 1, and shift the following by 1.
289 - df.iloc[i+j-1,1] += 1 301 + df.iloc[i, 1] = 1 + df.iloc[i-1, 1]
290 - else: 302 + df.iloc[i+1:, 1] += 1
291 - break 303 + except:
292 - elif df.iloc[i,1] == df.iloc[i-1,1]: 304 + warn(f"Error with parsing of {self.chain_label} duplicate residue numbers. Ignoring it.")
293 - # Common 4v9q-DV case (and similar ones) : e.g. chains contains 17 and 17A which are both read 17 by DSSR. 305 + self.delete_me = True
294 - # Solution : we shift the numbering of 17A (to 18) and the following residues. 306 + self.error_messages = f"Error with parsing of duplicate residues numbers."
295 - df.iloc[i:, 1] += 1 307 + return None
296 - else: 308 +
297 - # 4v9k-DA case (and similar ones) : the nt_id is not the full nt_resnum: ... 1629 > 1630 > 163B > 1631 > ...
298 - # Here the 163B is read 163 by DSSR, but there already is a residue 163.
299 - # Solution : set nt_resnum[i] to nt_resnum[i-1] + 1, and shift the following by 1.
300 - df.iloc[i, 1] = 1 + df.iloc[i-1, 1]
301 - df.iloc[i+1:, 1] += 1
302 309
303 # Search for ligands at the end of the selection 310 # Search for ligands at the end of the selection
304 # Drop ligands detected as residues by DSSR, by detecting several markers 311 # Drop ligands detected as residues by DSSR, by detecting several markers
......