Showing
2 changed files
with
16 additions
and
7 deletions
... | @@ -540,6 +540,7 @@ class Chain: | ... | @@ -540,6 +540,7 @@ class Chain: |
540 | self.seq_to_align = ''.join(c_seq_to_align) | 540 | self.seq_to_align = ''.join(c_seq_to_align) |
541 | self.seq = ''.join(c_seq) | 541 | self.seq = ''.join(c_seq) |
542 | 542 | ||
543 | + | ||
543 | class Job: | 544 | class Job: |
544 | """ This class contains information about a task to run later. | 545 | """ This class contains information about a task to run later. |
545 | 546 | ||
... | @@ -574,6 +575,7 @@ class Job: | ... | @@ -574,6 +575,7 @@ class Job: |
574 | s = f"{self.priority_}({self.nthreads}) [{self.comp_time}]\t{self.label:25}{self.func_.__name__}(" + " ".join([str(a) for a in self.args_]) + ")" | 575 | s = f"{self.priority_}({self.nthreads}) [{self.comp_time}]\t{self.label:25}{self.func_.__name__}(" + " ".join([str(a) for a in self.args_]) + ")" |
575 | return s | 576 | return s |
576 | 577 | ||
578 | + | ||
577 | class Monitor: | 579 | class Monitor: |
578 | """ A job that simply watches the memory usage of another process. | 580 | """ A job that simply watches the memory usage of another process. |
579 | 581 | ||
... | @@ -1681,10 +1683,16 @@ def work_infer_mappings(update_only, allmappings, codelist): | ... | @@ -1681,10 +1683,16 @@ def work_infer_mappings(update_only, allmappings, codelist): |
1681 | for rfam in families: | 1683 | for rfam in families: |
1682 | # if a known mapping of this chain on this family exists, apply it | 1684 | # if a known mapping of this chain on this family exists, apply it |
1683 | m = known_mappings.loc[ (known_mappings.pdb_id + "|1|" + known_mappings.chain == c[:4].lower()+c[4:]) & (known_mappings['rfam_acc'] == rfam ) ] | 1685 | m = known_mappings.loc[ (known_mappings.pdb_id + "|1|" + known_mappings.chain == c[:4].lower()+c[4:]) & (known_mappings['rfam_acc'] == rfam ) ] |
1684 | - if len(m): | 1686 | + if len(m) and len(m) < 2: |
1685 | pdb_start = int(m.pdb_start) | 1687 | pdb_start = int(m.pdb_start) |
1686 | pdb_end = int(m.pdb_end) | 1688 | pdb_end = int(m.pdb_end) |
1687 | inferred = False | 1689 | inferred = False |
1690 | + elif len(m): | ||
1691 | + # two different parts of the same chain are mapped to the same family... (ex: 6ek0-L5) | ||
1692 | + # ==> map the whole chain to that family, not the parts | ||
1693 | + pdb_start = int(m.pdb_start.min()) | ||
1694 | + pdb_end = int(m.pdb_end.max()) | ||
1695 | + inferred = False | ||
1688 | else: # otherwise, use the inferred mapping | 1696 | else: # otherwise, use the inferred mapping |
1689 | pdb_start = int(inferred_mappings.loc[ (inferred_mappings['rfam_acc'] == rfam) ].pdb_start) | 1697 | pdb_start = int(inferred_mappings.loc[ (inferred_mappings['rfam_acc'] == rfam) ].pdb_start) |
1690 | pdb_end = int(inferred_mappings.loc[ (inferred_mappings['rfam_acc'] == rfam) ].pdb_end) | 1698 | pdb_end = int(inferred_mappings.loc[ (inferred_mappings['rfam_acc'] == rfam) ].pdb_end) |
... | @@ -2114,7 +2122,7 @@ if __name__ == "__main__": | ... | @@ -2114,7 +2122,7 @@ if __name__ == "__main__": |
2114 | 2122 | ||
2115 | # At this point, the structure table is up to date | 2123 | # At this point, the structure table is up to date |
2116 | 2124 | ||
2117 | - pp.build_chains(coeff_ncores=2.0) | 2125 | + pp.build_chains(coeff_ncores=1.0) |
2118 | if len(pp.to_retry): | 2126 | if len(pp.to_retry): |
2119 | # Redownload and re-annotate | 2127 | # Redownload and re-annotate |
2120 | print("> Retrying to annotate some structures which just failed.", flush=True) | 2128 | print("> Retrying to annotate some structures which just failed.", flush=True) | ... | ... |
... | @@ -87,6 +87,7 @@ def reproduce_wadley_results(show=False, carbon=4, sd_range=(1,4)): | ... | @@ -87,6 +87,7 @@ def reproduce_wadley_results(show=False, carbon=4, sd_range=(1,4)): |
87 | kernel_c2 = st.gaussian_kde(values_c2) | 87 | kernel_c2 = st.gaussian_kde(values_c2) |
88 | f_c2 = np.reshape(kernel_c2(positions).T, xx.shape) | 88 | f_c2 = np.reshape(kernel_c2(positions).T, xx.shape) |
89 | 89 | ||
90 | + | ||
90 | # Uncomment to save the data to an archive for later use without the need to recompute | 91 | # Uncomment to save the data to an archive for later use without the need to recompute |
91 | np.savez(f"data/wadley_kernel_{angle}.npz", | 92 | np.savez(f"data/wadley_kernel_{angle}.npz", |
92 | c3_endo_e=c3_endo_etas, c3_endo_t=c3_endo_thetas, | 93 | c3_endo_e=c3_endo_etas, c3_endo_t=c3_endo_thetas, |
... | @@ -516,16 +517,16 @@ if __name__ == "__main__": | ... | @@ -516,16 +517,16 @@ if __name__ == "__main__": |
516 | mappings_list[k] = [ x[0] for x in sql_ask_database(conn, f"SELECT chain_id from chain WHERE rfam_acc='{k}';") ] | 517 | mappings_list[k] = [ x[0] for x in sql_ask_database(conn, f"SELECT chain_id from chain WHERE rfam_acc='{k}';") ] |
517 | conn.close() | 518 | conn.close() |
518 | 519 | ||
519 | - stats_pairs() | 520 | + # stats_pairs() |
520 | 521 | ||
521 | # Define threads for the tasks | 522 | # Define threads for the tasks |
522 | threads = [ | 523 | threads = [ |
523 | th.Thread(target=reproduce_wadley_results, kwargs={'carbon': 1}), | 524 | th.Thread(target=reproduce_wadley_results, kwargs={'carbon': 1}), |
524 | th.Thread(target=reproduce_wadley_results, kwargs={'carbon': 4}), | 525 | th.Thread(target=reproduce_wadley_results, kwargs={'carbon': 4}), |
525 | - th.Thread(target=stats_len), | 526 | + # th.Thread(target=stats_len), |
526 | - th.Thread(target=stats_freq), | 527 | + # th.Thread(target=stats_freq), |
527 | - th.Thread(target=seq_idty), | 528 | + # th.Thread(target=seq_idty), |
528 | - th.Thread(target=per_chain_stats) | 529 | + # th.Thread(target=per_chain_stats) |
529 | ] | 530 | ] |
530 | 531 | ||
531 | # Start the threads | 532 | # Start the threads | ... | ... |
-
Please register or login to post a comment