Aglaé TABOT

code cleaning

Showing 1 changed file with 19 additions and 30 deletions
......@@ -261,7 +261,6 @@ class Chain:
# renumber this structure (portion of the original) with the index_chain and save it in a cif file
t=pdb.Structure.Structure(new_s.get_id())
#model=new_s[0]
for model in new_s:
new_model_t=pdb.Model.Model(model.get_id())
for chain in model:
......@@ -284,6 +283,7 @@ class Chain:
resseq=int(resseq)
index_chain=nums.at[i, "index_chain"]
nt=nums.at[i, "nt_name"]
if nt == 'A' or nt == 'G' or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' :
res=chain[(' ', resseq, icode_res)]
else : #modified nucleotides (e.g. chain 5l4o_1_A)
......@@ -310,12 +310,6 @@ class Chain:
ioobj.set_structure(t)
ioobj.save(self.file)
# Save that selection on the mmCIF object s to file
'''
ioobj = pdb.MMCIFIO()
ioobj.set_structure(s)
ioobj.save(self.file, sel)
'''
notify(status)
......@@ -369,7 +363,7 @@ class Chain:
"epsilon_zeta", "bb_type", "chi", "glyco_bond", "form", "ssZp", "Dp", "eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base",
"v0", "v1", "v2", "v3", "v4", "amplitude", "phase_angle", "puckering"]
df = df[cols_we_keep]
#print(df.iloc[0,:])
except KeyError as e:
warn(f"Error while parsing DSSR {self.pdb_id}.json output:{e}", error=True)
self.delete_me = True
......@@ -434,7 +428,7 @@ class Chain:
self.delete_me = True
self.error_messages = f"Error with parsing of duplicate residues numbers."
return None
#print(df.iloc[0,:])
# Search for ligands at the end of the selection
# Drop ligands detected as residues by DSSR, by detecting several markers
while (
......@@ -452,7 +446,7 @@ class Chain:
self.mapping.log("Droping ligand:")
self.mapping.log(df.tail(1))
df = df.head(-1)
#print(df.iloc[0,:])
# Duplicates in index_chain : drop, they are ligands
# e.g. 3iwn_1_B_1-91, ligand C2E has index_chain 1 (and nt_resnum 601)
duplicates = [ index for index, element in enumerate(df.duplicated(['index_chain']).values) if element ]
......@@ -462,7 +456,7 @@ class Chain:
if self.mapping is not None:
self.mapping.log(f"Found duplicated index_chain {df.iloc[i,0]}. Keeping only the first.")
df = df.drop_duplicates("index_chain", keep="first") # drop doublons in index_chain
#print(df.iloc[0,:])
# drop eventual nts with index_chain < the first residue,
# now negative because we renumber to 1 (usually, ligands)
ligands = df[df.index_chain < 0]
......@@ -472,7 +466,7 @@ class Chain:
self.mapping.log("Droping ligand:")
self.mapping.log(line)
df = df.drop(ligands.index)
#print(df.iloc[0,:])
# Find missing index_chain values
# This happens because of resolved nucleotides that have a
# strange nt_resnum value. Thanks, biologists ! :@ :(
......@@ -498,7 +492,7 @@ class Chain:
df.iloc[i+1:, 1] += 1
else:
warn(f"Missing index_chain {i} in {self.chain_label} !")
#print(df.iloc[0,:])
# Assert some nucleotides still exist
try:
# update length of chain from nt_resnum point of view
......@@ -528,13 +522,13 @@ class Chain:
# index_chain 1 |-------------|77 83|------------| 154
# expected data point 1 |--------------------------------| 154
#
#print(df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']])
if l != len(df['index_chain']): # if some residues are missing, len(df['index_chain']) < l
resnum_start = df.iloc[0, 1]
# the rowIDs the missing nucleotides would have (rowID = index_chain - 1 = nt_resnum - resnum_start)
diff = set(range(l)).difference(df['nt_resnum'] - resnum_start)
for i in sorted(diff):
#print(i)
# Add a row at position i
df = pd.concat([df.iloc[:i],
pd.DataFrame({"index_chain": i+1, "nt_resnum": i+resnum_start,
......@@ -542,17 +536,15 @@ class Chain:
df.iloc[i:]])
# Increase the index_chain of all following lines
df.iloc[i+1:, 0] += 1
#pairs=df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']]
#print(pairs.iloc[:40])
df = df.reset_index(drop=True)
#pairs=df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']]
#print(pairs.iloc[:40])
self.full_length = len(df.index_chain)
#print(df.iloc[0,:])
#######################################
# Compute new features
#######################################
#print(df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']])
# Convert angles
df.loc[:, ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'epsilon_zeta', 'chi', 'v0', 'v1', 'v2', 'v3', 'v4', # Conversion to radians
......@@ -630,10 +622,10 @@ class Chain:
df['pair_type_LW'] = pair_type_LW
df['pair_type_DSSR'] = pair_type_DSSR
df['nb_interact'] = interacts
#print(df.iloc[0,:])
# remove now useless descriptors
df = df.drop(['nt_id', 'nt_resnum'], axis=1)
#print(df.iloc[0,:])
self.seq = "".join(df.nt_code)
self.seq_to_align = "".join(df.nt_align_code)
self.length = len([x for x in self.seq_to_align if x != "-"])
......@@ -648,9 +640,7 @@ class Chain:
# Log chain info to file
if save_logs and self.mapping is not None:
self.mapping.to_file(self.chain_label+".log")
#print(df.iloc[0,:])
#pairs=df[['index_chain', 'old_nt_resnum', 'paired']]
#print(pairs.iloc[:40])
return df
def register_chain(self, df):
......@@ -988,7 +978,7 @@ class Mapping:
newdf = df.drop(df[(df.nt_resnum < self.nt_start) |
(df.nt_resnum > self.nt_end)].index)
#print(df.iloc[0,:])
if len(newdf.index_chain) > 0:
# everything's okay
df = newdf
......@@ -1001,14 +991,14 @@ class Mapping:
weird_mappings.add(self.chain_label + "." + self.rfam_acc)
df = df.drop(df[(df.index_chain < self.nt_start) |
(df.index_chain > self.nt_end)].index)
#print(df.iloc[0,:])
# If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one
self.st = 0
if len(df.index_chain) and df.iloc[0, 0] != 1:
self.st = df.iloc[0, 0] - 1
df.iloc[:, 0] -= self.st
self.log(f"Shifting index_chain of {self.st}")
#print(df.iloc[0,:])
# Check that some residues are not included by mistake:
# e.g. 4v4t-AA.RF00382-20-55 contains 4 residues numbered 30 but actually far beyond the mapped part,
# because the icode are not read by DSSR.
......@@ -2346,7 +2336,6 @@ def work_build_chain(c, extract, khetatm, retrying=False, save_logs=True):
# extract the portion we want
if extract and not c.delete_me:
c.extract(df, khetatm)
#print(df.iloc[0,:])
return c
@trace_unhandled_exceptions
......