# drop eventual nts with index_chain < the first residue (usually, ligands)
df=df.drop(df[df.index_chain<0].index)
# Assert some nucleotides still exist
try:
l=df.iloc[-1,1]-df.iloc[0,1]+1# length of chain from nt_resnum point of view
exceptIndexError:
warn(f"Could not find real nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. Ignoring chain {self.chain_label}.",error=True)
no_nts_set.add(self.pdb_id)
self.delete_me=True
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
returnNone
# If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one
ifdf.iloc[0,0]!=1:
st=df.iloc[0,0]-1
df.iloc[:,0]-=st
# Find missing index_chain values because of resolved nucleotides that have a strange nt_resnum value
# Duplicates in index_chain : drop, they are ligands
# e.g. 3iwn_1_B_1-91, ligand C2E has index_chain 1 (and nt_resnum 601)
l=df.iloc[-1,1]-df.iloc[0,1]+1# update length of chain from nt_resnum point of view
exceptIndexError:
warn(f"Could not find real nucleotides of chain {self.pdb_chain_id} between {self.pdb_start} and "
f"{self.pdb_end} ({'not ' if not self.inferred else ''}inferred). Ignoring chain {self.chain_label}.")
warn(f"Could not find real nucleotides of chain {self.pdb_chain_id} between {self.mapping.nt_start} and "
f"{self.mapping.nt_end} ({'not ' if not self.mapping.inferred else ''}inferred). Ignoring chain {self.chain_label}.")
no_nts_set.add(self.pdb_id)
self.delete_me=True
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. Either there is a problem with {self.pdb_id} mmCIF download, or the bases are not resolved in the structure. Delete it and retry."
...
...
@@ -464,7 +448,7 @@ class Chain:
df['nb_interact']=interacts
df=df.drop(['nt_id'],axis=1)# remove now useless descriptors
ifself.reversed:
ifself.mapping.reversed:
# The 3D structure is numbered from 3' to 5' instead of standard 5' to 3'
# or the sequence that matches the Rfam family is 3' to 5' instead of standard 5' to 3'.
# Anyways, you need to invert the angles.
...
...
@@ -507,6 +491,10 @@ class Chain:
self.error_messages="Sequence is too short. (< 5 resolved nts)"