Aglaé TABOT

code cleaning


Former-commit-id: df20771e
Showing 1 changed file with 19 additions and 30 deletions
...@@ -261,7 +261,6 @@ class Chain: ...@@ -261,7 +261,6 @@ class Chain:
261 261
262 # renumber this structure (portion of the original) with the index_chain and save it in a cif file 262 # renumber this structure (portion of the original) with the index_chain and save it in a cif file
263 t=pdb.Structure.Structure(new_s.get_id()) 263 t=pdb.Structure.Structure(new_s.get_id())
264 - #model=new_s[0]
265 for model in new_s: 264 for model in new_s:
266 new_model_t=pdb.Model.Model(model.get_id()) 265 new_model_t=pdb.Model.Model(model.get_id())
267 for chain in model: 266 for chain in model:
...@@ -284,6 +283,7 @@ class Chain: ...@@ -284,6 +283,7 @@ class Chain:
284 resseq=int(resseq) 283 resseq=int(resseq)
285 index_chain=nums.at[i, "index_chain"] 284 index_chain=nums.at[i, "index_chain"]
286 nt=nums.at[i, "nt_name"] 285 nt=nums.at[i, "nt_name"]
286 +
287 if nt == 'A' or nt == 'G' or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : 287 if nt == 'A' or nt == 'G' or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' :
288 res=chain[(' ', resseq, icode_res)] 288 res=chain[(' ', resseq, icode_res)]
289 else : #modified nucleotides (e.g. chain 5l4o_1_A) 289 else : #modified nucleotides (e.g. chain 5l4o_1_A)
...@@ -310,12 +310,6 @@ class Chain: ...@@ -310,12 +310,6 @@ class Chain:
310 ioobj.set_structure(t) 310 ioobj.set_structure(t)
311 ioobj.save(self.file) 311 ioobj.save(self.file)
312 312
313 - # Save that selection on the mmCIF object s to file
314 - '''
315 - ioobj = pdb.MMCIFIO()
316 - ioobj.set_structure(s)
317 - ioobj.save(self.file, sel)
318 - '''
319 313
320 notify(status) 314 notify(status)
321 315
...@@ -369,7 +363,7 @@ class Chain: ...@@ -369,7 +363,7 @@ class Chain:
369 "epsilon_zeta", "bb_type", "chi", "glyco_bond", "form", "ssZp", "Dp", "eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base", 363 "epsilon_zeta", "bb_type", "chi", "glyco_bond", "form", "ssZp", "Dp", "eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base",
370 "v0", "v1", "v2", "v3", "v4", "amplitude", "phase_angle", "puckering"] 364 "v0", "v1", "v2", "v3", "v4", "amplitude", "phase_angle", "puckering"]
371 df = df[cols_we_keep] 365 df = df[cols_we_keep]
372 - #print(df.iloc[0,:]) 366 +
373 except KeyError as e: 367 except KeyError as e:
374 warn(f"Error while parsing DSSR {self.pdb_id}.json output:{e}", error=True) 368 warn(f"Error while parsing DSSR {self.pdb_id}.json output:{e}", error=True)
375 self.delete_me = True 369 self.delete_me = True
...@@ -434,7 +428,7 @@ class Chain: ...@@ -434,7 +428,7 @@ class Chain:
434 self.delete_me = True 428 self.delete_me = True
435 self.error_messages = f"Error with parsing of duplicate residues numbers." 429 self.error_messages = f"Error with parsing of duplicate residues numbers."
436 return None 430 return None
437 - #print(df.iloc[0,:]) 431 +
438 # Search for ligands at the end of the selection 432 # Search for ligands at the end of the selection
439 # Drop ligands detected as residues by DSSR, by detecting several markers 433 # Drop ligands detected as residues by DSSR, by detecting several markers
440 while ( 434 while (
...@@ -452,7 +446,7 @@ class Chain: ...@@ -452,7 +446,7 @@ class Chain:
452 self.mapping.log("Droping ligand:") 446 self.mapping.log("Droping ligand:")
453 self.mapping.log(df.tail(1)) 447 self.mapping.log(df.tail(1))
454 df = df.head(-1) 448 df = df.head(-1)
455 - #print(df.iloc[0,:]) 449 +
456 # Duplicates in index_chain : drop, they are ligands 450 # Duplicates in index_chain : drop, they are ligands
457 # e.g. 3iwn_1_B_1-91, ligand C2E has index_chain 1 (and nt_resnum 601) 451 # e.g. 3iwn_1_B_1-91, ligand C2E has index_chain 1 (and nt_resnum 601)
458 duplicates = [ index for index, element in enumerate(df.duplicated(['index_chain']).values) if element ] 452 duplicates = [ index for index, element in enumerate(df.duplicated(['index_chain']).values) if element ]
...@@ -462,7 +456,7 @@ class Chain: ...@@ -462,7 +456,7 @@ class Chain:
462 if self.mapping is not None: 456 if self.mapping is not None:
463 self.mapping.log(f"Found duplicated index_chain {df.iloc[i,0]}. Keeping only the first.") 457 self.mapping.log(f"Found duplicated index_chain {df.iloc[i,0]}. Keeping only the first.")
464 df = df.drop_duplicates("index_chain", keep="first") # drop doublons in index_chain 458 df = df.drop_duplicates("index_chain", keep="first") # drop doublons in index_chain
465 - #print(df.iloc[0,:]) 459 +
466 # drop eventual nts with index_chain < the first residue, 460 # drop eventual nts with index_chain < the first residue,
467 # now negative because we renumber to 1 (usually, ligands) 461 # now negative because we renumber to 1 (usually, ligands)
468 ligands = df[df.index_chain < 0] 462 ligands = df[df.index_chain < 0]
...@@ -472,7 +466,7 @@ class Chain: ...@@ -472,7 +466,7 @@ class Chain:
472 self.mapping.log("Droping ligand:") 466 self.mapping.log("Droping ligand:")
473 self.mapping.log(line) 467 self.mapping.log(line)
474 df = df.drop(ligands.index) 468 df = df.drop(ligands.index)
475 - #print(df.iloc[0,:]) 469 +
476 # Find missing index_chain values 470 # Find missing index_chain values
477 # This happens because of resolved nucleotides that have a 471 # This happens because of resolved nucleotides that have a
478 # strange nt_resnum value. Thanks, biologists ! :@ :( 472 # strange nt_resnum value. Thanks, biologists ! :@ :(
...@@ -498,7 +492,7 @@ class Chain: ...@@ -498,7 +492,7 @@ class Chain:
498 df.iloc[i+1:, 1] += 1 492 df.iloc[i+1:, 1] += 1
499 else: 493 else:
500 warn(f"Missing index_chain {i} in {self.chain_label} !") 494 warn(f"Missing index_chain {i} in {self.chain_label} !")
501 - #print(df.iloc[0,:]) 495 +
502 # Assert some nucleotides still exist 496 # Assert some nucleotides still exist
503 try: 497 try:
504 # update length of chain from nt_resnum point of view 498 # update length of chain from nt_resnum point of view
...@@ -528,13 +522,13 @@ class Chain: ...@@ -528,13 +522,13 @@ class Chain:
528 # index_chain 1 |-------------|77 83|------------| 154 522 # index_chain 1 |-------------|77 83|------------| 154
529 # expected data point 1 |--------------------------------| 154 523 # expected data point 1 |--------------------------------| 154
530 # 524 #
531 - #print(df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']]) 525 +
532 if l != len(df['index_chain']): # if some residues are missing, len(df['index_chain']) < l 526 if l != len(df['index_chain']): # if some residues are missing, len(df['index_chain']) < l
533 resnum_start = df.iloc[0, 1] 527 resnum_start = df.iloc[0, 1]
534 # the rowIDs the missing nucleotides would have (rowID = index_chain - 1 = nt_resnum - resnum_start) 528 # the rowIDs the missing nucleotides would have (rowID = index_chain - 1 = nt_resnum - resnum_start)
535 diff = set(range(l)).difference(df['nt_resnum'] - resnum_start) 529 diff = set(range(l)).difference(df['nt_resnum'] - resnum_start)
536 for i in sorted(diff): 530 for i in sorted(diff):
537 - #print(i) 531 +
538 # Add a row at position i 532 # Add a row at position i
539 df = pd.concat([df.iloc[:i], 533 df = pd.concat([df.iloc[:i],
540 pd.DataFrame({"index_chain": i+1, "nt_resnum": i+resnum_start, 534 pd.DataFrame({"index_chain": i+1, "nt_resnum": i+resnum_start,
...@@ -542,17 +536,15 @@ class Chain: ...@@ -542,17 +536,15 @@ class Chain:
542 df.iloc[i:]]) 536 df.iloc[i:]])
543 # Increase the index_chain of all following lines 537 # Increase the index_chain of all following lines
544 df.iloc[i+1:, 0] += 1 538 df.iloc[i+1:, 0] += 1
545 - #pairs=df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']] 539 +
546 - #print(pairs.iloc[:40])
547 df = df.reset_index(drop=True) 540 df = df.reset_index(drop=True)
548 - #pairs=df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']] 541 +
549 - #print(pairs.iloc[:40])
550 self.full_length = len(df.index_chain) 542 self.full_length = len(df.index_chain)
551 - #print(df.iloc[0,:]) 543 +
552 ####################################### 544 #######################################
553 # Compute new features 545 # Compute new features
554 ####################################### 546 #######################################
555 - #print(df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']]) 547 +
556 548
557 # Convert angles 549 # Convert angles
558 df.loc[:, ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'epsilon_zeta', 'chi', 'v0', 'v1', 'v2', 'v3', 'v4', # Conversion to radians 550 df.loc[:, ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'epsilon_zeta', 'chi', 'v0', 'v1', 'v2', 'v3', 'v4', # Conversion to radians
...@@ -630,10 +622,10 @@ class Chain: ...@@ -630,10 +622,10 @@ class Chain:
630 df['pair_type_LW'] = pair_type_LW 622 df['pair_type_LW'] = pair_type_LW
631 df['pair_type_DSSR'] = pair_type_DSSR 623 df['pair_type_DSSR'] = pair_type_DSSR
632 df['nb_interact'] = interacts 624 df['nb_interact'] = interacts
633 - #print(df.iloc[0,:]) 625 +
634 # remove now useless descriptors 626 # remove now useless descriptors
635 df = df.drop(['nt_id', 'nt_resnum'], axis=1) 627 df = df.drop(['nt_id', 'nt_resnum'], axis=1)
636 - #print(df.iloc[0,:]) 628 +
637 self.seq = "".join(df.nt_code) 629 self.seq = "".join(df.nt_code)
638 self.seq_to_align = "".join(df.nt_align_code) 630 self.seq_to_align = "".join(df.nt_align_code)
639 self.length = len([x for x in self.seq_to_align if x != "-"]) 631 self.length = len([x for x in self.seq_to_align if x != "-"])
...@@ -648,9 +640,7 @@ class Chain: ...@@ -648,9 +640,7 @@ class Chain:
648 # Log chain info to file 640 # Log chain info to file
649 if save_logs and self.mapping is not None: 641 if save_logs and self.mapping is not None:
650 self.mapping.to_file(self.chain_label+".log") 642 self.mapping.to_file(self.chain_label+".log")
651 - #print(df.iloc[0,:]) 643 +
652 - #pairs=df[['index_chain', 'old_nt_resnum', 'paired']]
653 - #print(pairs.iloc[:40])
654 return df 644 return df
655 645
656 def register_chain(self, df): 646 def register_chain(self, df):
...@@ -988,7 +978,7 @@ class Mapping: ...@@ -988,7 +978,7 @@ class Mapping:
988 978
989 newdf = df.drop(df[(df.nt_resnum < self.nt_start) | 979 newdf = df.drop(df[(df.nt_resnum < self.nt_start) |
990 (df.nt_resnum > self.nt_end)].index) 980 (df.nt_resnum > self.nt_end)].index)
991 - #print(df.iloc[0,:]) 981 +
992 if len(newdf.index_chain) > 0: 982 if len(newdf.index_chain) > 0:
993 # everything's okay 983 # everything's okay
994 df = newdf 984 df = newdf
...@@ -1001,14 +991,14 @@ class Mapping: ...@@ -1001,14 +991,14 @@ class Mapping:
1001 weird_mappings.add(self.chain_label + "." + self.rfam_acc) 991 weird_mappings.add(self.chain_label + "." + self.rfam_acc)
1002 df = df.drop(df[(df.index_chain < self.nt_start) | 992 df = df.drop(df[(df.index_chain < self.nt_start) |
1003 (df.index_chain > self.nt_end)].index) 993 (df.index_chain > self.nt_end)].index)
1004 - #print(df.iloc[0,:]) 994 +
1005 # If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one 995 # If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one
1006 self.st = 0 996 self.st = 0
1007 if len(df.index_chain) and df.iloc[0, 0] != 1: 997 if len(df.index_chain) and df.iloc[0, 0] != 1:
1008 self.st = df.iloc[0, 0] - 1 998 self.st = df.iloc[0, 0] - 1
1009 df.iloc[:, 0] -= self.st 999 df.iloc[:, 0] -= self.st
1010 self.log(f"Shifting index_chain of {self.st}") 1000 self.log(f"Shifting index_chain of {self.st}")
1011 - #print(df.iloc[0,:]) 1001 +
1012 # Check that some residues are not included by mistake: 1002 # Check that some residues are not included by mistake:
1013 # e.g. 4v4t-AA.RF00382-20-55 contains 4 residues numbered 30 but actually far beyond the mapped part, 1003 # e.g. 4v4t-AA.RF00382-20-55 contains 4 residues numbered 30 but actually far beyond the mapped part,
1014 # because the icode are not read by DSSR. 1004 # because the icode are not read by DSSR.
...@@ -2346,7 +2336,6 @@ def work_build_chain(c, extract, khetatm, retrying=False, save_logs=True): ...@@ -2346,7 +2336,6 @@ def work_build_chain(c, extract, khetatm, retrying=False, save_logs=True):
2346 # extract the portion we want 2336 # extract the portion we want
2347 if extract and not c.delete_me: 2337 if extract and not c.delete_me:
2348 c.extract(df, khetatm) 2338 c.extract(df, khetatm)
2349 - #print(df.iloc[0,:])
2350 return c 2339 return c
2351 2340
2352 @trace_unhandled_exceptions 2341 @trace_unhandled_exceptions
......