Showing
1 changed file
with
19 additions
and
30 deletions
... | @@ -261,7 +261,6 @@ class Chain: | ... | @@ -261,7 +261,6 @@ class Chain: |
261 | 261 | ||
262 | # renumber this structure (portion of the original) with the index_chain and save it in a cif file | 262 | # renumber this structure (portion of the original) with the index_chain and save it in a cif file |
263 | t=pdb.Structure.Structure(new_s.get_id()) | 263 | t=pdb.Structure.Structure(new_s.get_id()) |
264 | - #model=new_s[0] | ||
265 | for model in new_s: | 264 | for model in new_s: |
266 | new_model_t=pdb.Model.Model(model.get_id()) | 265 | new_model_t=pdb.Model.Model(model.get_id()) |
267 | for chain in model: | 266 | for chain in model: |
... | @@ -284,6 +283,7 @@ class Chain: | ... | @@ -284,6 +283,7 @@ class Chain: |
284 | resseq=int(resseq) | 283 | resseq=int(resseq) |
285 | index_chain=nums.at[i, "index_chain"] | 284 | index_chain=nums.at[i, "index_chain"] |
286 | nt=nums.at[i, "nt_name"] | 285 | nt=nums.at[i, "nt_name"] |
286 | + | ||
287 | if nt == 'A' or nt == 'G' or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : | 287 | if nt == 'A' or nt == 'G' or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' : |
288 | res=chain[(' ', resseq, icode_res)] | 288 | res=chain[(' ', resseq, icode_res)] |
289 | else : #modified nucleotides (e.g. chain 5l4o_1_A) | 289 | else : #modified nucleotides (e.g. chain 5l4o_1_A) |
... | @@ -310,12 +310,6 @@ class Chain: | ... | @@ -310,12 +310,6 @@ class Chain: |
310 | ioobj.set_structure(t) | 310 | ioobj.set_structure(t) |
311 | ioobj.save(self.file) | 311 | ioobj.save(self.file) |
312 | 312 | ||
313 | - # Save that selection on the mmCIF object s to file | ||
314 | - ''' | ||
315 | - ioobj = pdb.MMCIFIO() | ||
316 | - ioobj.set_structure(s) | ||
317 | - ioobj.save(self.file, sel) | ||
318 | - ''' | ||
319 | 313 | ||
320 | notify(status) | 314 | notify(status) |
321 | 315 | ||
... | @@ -369,7 +363,7 @@ class Chain: | ... | @@ -369,7 +363,7 @@ class Chain: |
369 | "epsilon_zeta", "bb_type", "chi", "glyco_bond", "form", "ssZp", "Dp", "eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base", | 363 | "epsilon_zeta", "bb_type", "chi", "glyco_bond", "form", "ssZp", "Dp", "eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base", |
370 | "v0", "v1", "v2", "v3", "v4", "amplitude", "phase_angle", "puckering"] | 364 | "v0", "v1", "v2", "v3", "v4", "amplitude", "phase_angle", "puckering"] |
371 | df = df[cols_we_keep] | 365 | df = df[cols_we_keep] |
372 | - #print(df.iloc[0,:]) | 366 | + |
373 | except KeyError as e: | 367 | except KeyError as e: |
374 | warn(f"Error while parsing DSSR {self.pdb_id}.json output:{e}", error=True) | 368 | warn(f"Error while parsing DSSR {self.pdb_id}.json output:{e}", error=True) |
375 | self.delete_me = True | 369 | self.delete_me = True |
... | @@ -434,7 +428,7 @@ class Chain: | ... | @@ -434,7 +428,7 @@ class Chain: |
434 | self.delete_me = True | 428 | self.delete_me = True |
435 | self.error_messages = f"Error with parsing of duplicate residues numbers." | 429 | self.error_messages = f"Error with parsing of duplicate residues numbers." |
436 | return None | 430 | return None |
437 | - #print(df.iloc[0,:]) | 431 | + |
438 | # Search for ligands at the end of the selection | 432 | # Search for ligands at the end of the selection |
439 | # Drop ligands detected as residues by DSSR, by detecting several markers | 433 | # Drop ligands detected as residues by DSSR, by detecting several markers |
440 | while ( | 434 | while ( |
... | @@ -452,7 +446,7 @@ class Chain: | ... | @@ -452,7 +446,7 @@ class Chain: |
452 | self.mapping.log("Droping ligand:") | 446 | self.mapping.log("Droping ligand:") |
453 | self.mapping.log(df.tail(1)) | 447 | self.mapping.log(df.tail(1)) |
454 | df = df.head(-1) | 448 | df = df.head(-1) |
455 | - #print(df.iloc[0,:]) | 449 | + |
456 | # Duplicates in index_chain : drop, they are ligands | 450 | # Duplicates in index_chain : drop, they are ligands |
457 | # e.g. 3iwn_1_B_1-91, ligand C2E has index_chain 1 (and nt_resnum 601) | 451 | # e.g. 3iwn_1_B_1-91, ligand C2E has index_chain 1 (and nt_resnum 601) |
458 | duplicates = [ index for index, element in enumerate(df.duplicated(['index_chain']).values) if element ] | 452 | duplicates = [ index for index, element in enumerate(df.duplicated(['index_chain']).values) if element ] |
... | @@ -462,7 +456,7 @@ class Chain: | ... | @@ -462,7 +456,7 @@ class Chain: |
462 | if self.mapping is not None: | 456 | if self.mapping is not None: |
463 | self.mapping.log(f"Found duplicated index_chain {df.iloc[i,0]}. Keeping only the first.") | 457 | self.mapping.log(f"Found duplicated index_chain {df.iloc[i,0]}. Keeping only the first.") |
464 | df = df.drop_duplicates("index_chain", keep="first") # drop doublons in index_chain | 458 | df = df.drop_duplicates("index_chain", keep="first") # drop doublons in index_chain |
465 | - #print(df.iloc[0,:]) | 459 | + |
466 | # drop eventual nts with index_chain < the first residue, | 460 | # drop eventual nts with index_chain < the first residue, |
467 | # now negative because we renumber to 1 (usually, ligands) | 461 | # now negative because we renumber to 1 (usually, ligands) |
468 | ligands = df[df.index_chain < 0] | 462 | ligands = df[df.index_chain < 0] |
... | @@ -472,7 +466,7 @@ class Chain: | ... | @@ -472,7 +466,7 @@ class Chain: |
472 | self.mapping.log("Droping ligand:") | 466 | self.mapping.log("Droping ligand:") |
473 | self.mapping.log(line) | 467 | self.mapping.log(line) |
474 | df = df.drop(ligands.index) | 468 | df = df.drop(ligands.index) |
475 | - #print(df.iloc[0,:]) | 469 | + |
476 | # Find missing index_chain values | 470 | # Find missing index_chain values |
477 | # This happens because of resolved nucleotides that have a | 471 | # This happens because of resolved nucleotides that have a |
478 | # strange nt_resnum value. Thanks, biologists ! :@ :( | 472 | # strange nt_resnum value. Thanks, biologists ! :@ :( |
... | @@ -498,7 +492,7 @@ class Chain: | ... | @@ -498,7 +492,7 @@ class Chain: |
498 | df.iloc[i+1:, 1] += 1 | 492 | df.iloc[i+1:, 1] += 1 |
499 | else: | 493 | else: |
500 | warn(f"Missing index_chain {i} in {self.chain_label} !") | 494 | warn(f"Missing index_chain {i} in {self.chain_label} !") |
501 | - #print(df.iloc[0,:]) | 495 | + |
502 | # Assert some nucleotides still exist | 496 | # Assert some nucleotides still exist |
503 | try: | 497 | try: |
504 | # update length of chain from nt_resnum point of view | 498 | # update length of chain from nt_resnum point of view |
... | @@ -528,13 +522,13 @@ class Chain: | ... | @@ -528,13 +522,13 @@ class Chain: |
528 | # index_chain 1 |-------------|77 83|------------| 154 | 522 | # index_chain 1 |-------------|77 83|------------| 154 |
529 | # expected data point 1 |--------------------------------| 154 | 523 | # expected data point 1 |--------------------------------| 154 |
530 | # | 524 | # |
531 | - #print(df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']]) | 525 | + |
532 | if l != len(df['index_chain']): # if some residues are missing, len(df['index_chain']) < l | 526 | if l != len(df['index_chain']): # if some residues are missing, len(df['index_chain']) < l |
533 | resnum_start = df.iloc[0, 1] | 527 | resnum_start = df.iloc[0, 1] |
534 | # the rowIDs the missing nucleotides would have (rowID = index_chain - 1 = nt_resnum - resnum_start) | 528 | # the rowIDs the missing nucleotides would have (rowID = index_chain - 1 = nt_resnum - resnum_start) |
535 | diff = set(range(l)).difference(df['nt_resnum'] - resnum_start) | 529 | diff = set(range(l)).difference(df['nt_resnum'] - resnum_start) |
536 | for i in sorted(diff): | 530 | for i in sorted(diff): |
537 | - #print(i) | 531 | + |
538 | # Add a row at position i | 532 | # Add a row at position i |
539 | df = pd.concat([df.iloc[:i], | 533 | df = pd.concat([df.iloc[:i], |
540 | pd.DataFrame({"index_chain": i+1, "nt_resnum": i+resnum_start, | 534 | pd.DataFrame({"index_chain": i+1, "nt_resnum": i+resnum_start, |
... | @@ -542,17 +536,15 @@ class Chain: | ... | @@ -542,17 +536,15 @@ class Chain: |
542 | df.iloc[i:]]) | 536 | df.iloc[i:]]) |
543 | # Increase the index_chain of all following lines | 537 | # Increase the index_chain of all following lines |
544 | df.iloc[i+1:, 0] += 1 | 538 | df.iloc[i+1:, 0] += 1 |
545 | - #pairs=df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']] | 539 | + |
546 | - #print(pairs.iloc[:40]) | ||
547 | df = df.reset_index(drop=True) | 540 | df = df.reset_index(drop=True) |
548 | - #pairs=df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']] | 541 | + |
549 | - #print(pairs.iloc[:40]) | ||
550 | self.full_length = len(df.index_chain) | 542 | self.full_length = len(df.index_chain) |
551 | - #print(df.iloc[0,:]) | 543 | + |
552 | ####################################### | 544 | ####################################### |
553 | # Compute new features | 545 | # Compute new features |
554 | ####################################### | 546 | ####################################### |
555 | - #print(df[['index_chain', 'nt_resnum', 'nt_id', 'nt_code']]) | 547 | + |
556 | 548 | ||
557 | # Convert angles | 549 | # Convert angles |
558 | df.loc[:, ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'epsilon_zeta', 'chi', 'v0', 'v1', 'v2', 'v3', 'v4', # Conversion to radians | 550 | df.loc[:, ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'epsilon_zeta', 'chi', 'v0', 'v1', 'v2', 'v3', 'v4', # Conversion to radians |
... | @@ -630,10 +622,10 @@ class Chain: | ... | @@ -630,10 +622,10 @@ class Chain: |
630 | df['pair_type_LW'] = pair_type_LW | 622 | df['pair_type_LW'] = pair_type_LW |
631 | df['pair_type_DSSR'] = pair_type_DSSR | 623 | df['pair_type_DSSR'] = pair_type_DSSR |
632 | df['nb_interact'] = interacts | 624 | df['nb_interact'] = interacts |
633 | - #print(df.iloc[0,:]) | 625 | + |
634 | # remove now useless descriptors | 626 | # remove now useless descriptors |
635 | df = df.drop(['nt_id', 'nt_resnum'], axis=1) | 627 | df = df.drop(['nt_id', 'nt_resnum'], axis=1) |
636 | - #print(df.iloc[0,:]) | 628 | + |
637 | self.seq = "".join(df.nt_code) | 629 | self.seq = "".join(df.nt_code) |
638 | self.seq_to_align = "".join(df.nt_align_code) | 630 | self.seq_to_align = "".join(df.nt_align_code) |
639 | self.length = len([x for x in self.seq_to_align if x != "-"]) | 631 | self.length = len([x for x in self.seq_to_align if x != "-"]) |
... | @@ -648,9 +640,7 @@ class Chain: | ... | @@ -648,9 +640,7 @@ class Chain: |
648 | # Log chain info to file | 640 | # Log chain info to file |
649 | if save_logs and self.mapping is not None: | 641 | if save_logs and self.mapping is not None: |
650 | self.mapping.to_file(self.chain_label+".log") | 642 | self.mapping.to_file(self.chain_label+".log") |
651 | - #print(df.iloc[0,:]) | 643 | + |
652 | - #pairs=df[['index_chain', 'old_nt_resnum', 'paired']] | ||
653 | - #print(pairs.iloc[:40]) | ||
654 | return df | 644 | return df |
655 | 645 | ||
656 | def register_chain(self, df): | 646 | def register_chain(self, df): |
... | @@ -988,7 +978,7 @@ class Mapping: | ... | @@ -988,7 +978,7 @@ class Mapping: |
988 | 978 | ||
989 | newdf = df.drop(df[(df.nt_resnum < self.nt_start) | | 979 | newdf = df.drop(df[(df.nt_resnum < self.nt_start) | |
990 | (df.nt_resnum > self.nt_end)].index) | 980 | (df.nt_resnum > self.nt_end)].index) |
991 | - #print(df.iloc[0,:]) | 981 | + |
992 | if len(newdf.index_chain) > 0: | 982 | if len(newdf.index_chain) > 0: |
993 | # everything's okay | 983 | # everything's okay |
994 | df = newdf | 984 | df = newdf |
... | @@ -1001,14 +991,14 @@ class Mapping: | ... | @@ -1001,14 +991,14 @@ class Mapping: |
1001 | weird_mappings.add(self.chain_label + "." + self.rfam_acc) | 991 | weird_mappings.add(self.chain_label + "." + self.rfam_acc) |
1002 | df = df.drop(df[(df.index_chain < self.nt_start) | | 992 | df = df.drop(df[(df.index_chain < self.nt_start) | |
1003 | (df.index_chain > self.nt_end)].index) | 993 | (df.index_chain > self.nt_end)].index) |
1004 | - #print(df.iloc[0,:]) | 994 | + |
1005 | # If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one | 995 | # If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one |
1006 | self.st = 0 | 996 | self.st = 0 |
1007 | if len(df.index_chain) and df.iloc[0, 0] != 1: | 997 | if len(df.index_chain) and df.iloc[0, 0] != 1: |
1008 | self.st = df.iloc[0, 0] - 1 | 998 | self.st = df.iloc[0, 0] - 1 |
1009 | df.iloc[:, 0] -= self.st | 999 | df.iloc[:, 0] -= self.st |
1010 | self.log(f"Shifting index_chain of {self.st}") | 1000 | self.log(f"Shifting index_chain of {self.st}") |
1011 | - #print(df.iloc[0,:]) | 1001 | + |
1012 | # Check that some residues are not included by mistake: | 1002 | # Check that some residues are not included by mistake: |
1013 | # e.g. 4v4t-AA.RF00382-20-55 contains 4 residues numbered 30 but actually far beyond the mapped part, | 1003 | # e.g. 4v4t-AA.RF00382-20-55 contains 4 residues numbered 30 but actually far beyond the mapped part, |
1014 | # because the icode are not read by DSSR. | 1004 | # because the icode are not read by DSSR. |
... | @@ -2346,7 +2336,6 @@ def work_build_chain(c, extract, khetatm, retrying=False, save_logs=True): | ... | @@ -2346,7 +2336,6 @@ def work_build_chain(c, extract, khetatm, retrying=False, save_logs=True): |
2346 | # extract the portion we want | 2336 | # extract the portion we want |
2347 | if extract and not c.delete_me: | 2337 | if extract and not c.delete_me: |
2348 | c.extract(df, khetatm) | 2338 | c.extract(df, khetatm) |
2349 | - #print(df.iloc[0,:]) | ||
2350 | return c | 2339 | return c |
2351 | 2340 | ||
2352 | @trace_unhandled_exceptions | 2341 | @trace_unhandled_exceptions | ... | ... |
-
Please register or login to post a comment