Louis BECQUEY

solved issue if duplicate nt_resnum at the end of the chain

Showing 1 changed file with 32 additions and 25 deletions
......@@ -273,32 +273,39 @@ class Chain:
if self.mapping is not None:
self.mapping.log(f"Shifting nt_resnum numbering because of {n_dup} duplicate residues {df.iloc[i,1]}")
if df.iloc[i,1] == df.iloc[i-1,1] and df.iloc[index_last_dup + 1, 1] - 1 > df.iloc[index_last_dup, 1]:
# The redundant nts are consecutive in the chain (at the begining at least), and there is a gap at the end
if duplicates.iloc[n_dup-1, 0] - duplicates.iloc[0, 0] + 1 == n_dup:
# They are all contiguous in the chain
# 4v9n-DA case (and similar ones) : 610-611-611A-611B-611C-611D-611E-611F-611G-617-618...
# there is a redundancy (611) followed by a gap (611-617).
# We want the redundancy to fill the gap.
df.iloc[i:i+n_dup-1, 1] += 1
try:
if i > 0 and index_last_dup +1 < len(df.index) and df.iloc[i,1] == df.iloc[i-1,1] and df.iloc[index_last_dup + 1, 1] - 1 > df.iloc[index_last_dup, 1]:
# The redundant nts are consecutive in the chain (at the begining at least), and there is a gap at the end
if duplicates.iloc[n_dup-1, 0] - duplicates.iloc[0, 0] + 1 == n_dup:
# They are all contiguous in the chain
# 4v9n-DA case (and similar ones) : 610-611-611A-611B-611C-611D-611E-611F-611G-617-618...
# there is a redundancy (611) followed by a gap (611-617).
# We want the redundancy to fill the gap.
df.iloc[i:i+n_dup-1, 1] += 1
else:
# We solve the problem continous component by continuous component
for j in range(1, n_dup+1):
if duplicates.iloc[j,0] == 1 + duplicates.iloc[j-1,0]: # continuous
df.iloc[i+j-1,1] += 1
else:
break
elif df.iloc[i,1] == df.iloc[i-1,1]:
# Common 4v9q-DV case (and similar ones) : e.g. chains contains 17 and 17A which are both read 17 by DSSR.
# Solution : we shift the numbering of 17A (to 18) and the following residues.
df.iloc[i:, 1] += 1
else:
# We solve the problem continous component by continuous component
for j in range(1, n_dup+1):
if duplicates.iloc[j,0] == 1 + duplicates.iloc[j-1,0]: # continuous
df.iloc[i+j-1,1] += 1
else:
break
elif df.iloc[i,1] == df.iloc[i-1,1]:
# Common 4v9q-DV case (and similar ones) : e.g. chains contains 17 and 17A which are both read 17 by DSSR.
# Solution : we shift the numbering of 17A (to 18) and the following residues.
df.iloc[i:, 1] += 1
else:
# 4v9k-DA case (and similar ones) : the nt_id is not the full nt_resnum: ... 1629 > 1630 > 163B > 1631 > ...
# Here the 163B is read 163 by DSSR, but there already is a residue 163.
# Solution : set nt_resnum[i] to nt_resnum[i-1] + 1, and shift the following by 1.
df.iloc[i, 1] = 1 + df.iloc[i-1, 1]
df.iloc[i+1:, 1] += 1
# 4v9k-DA case (and similar ones) : the nt_id is not the full nt_resnum: ... 1629 > 1630 > 163B > 1631 > ...
# Here the 163B is read 163 by DSSR, but there already is a residue 163.
# Solution : set nt_resnum[i] to nt_resnum[i-1] + 1, and shift the following by 1.
df.iloc[i, 1] = 1 + df.iloc[i-1, 1]
df.iloc[i+1:, 1] += 1
except:
warn(f"Error with parsing of {self.chain_label} duplicate residue numbers. Ignoring it.")
self.delete_me = True
self.error_messages = f"Error with parsing of duplicate residues numbers."
return None
# Search for ligands at the end of the selection
# Drop ligands detected as residues by DSSR, by detecting several markers
......