Showing
1 changed file
with
15 additions
and
8 deletions
... | @@ -1164,10 +1164,10 @@ class Pipeline: | ... | @@ -1164,10 +1164,10 @@ class Pipeline: |
1164 | # Compute the list of mappable structures using NR-list and Rfam-PDB mappings | 1164 | # Compute the list of mappable structures using NR-list and Rfam-PDB mappings |
1165 | # And get Chain() objects | 1165 | # And get Chain() objects |
1166 | print("> Building list of structures...", flush=True) | 1166 | print("> Building list of structures...", flush=True) |
1167 | - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores, maxtasksperchild=1) | 1167 | + p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores) |
1168 | try: | 1168 | try: |
1169 | 1169 | ||
1170 | - pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, desc="Eq. classes", bar_format="{percentage:3.0f}%|{bar}|") | 1170 | + pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, desc="Eq. classes", bar_format="{desc}:{percentage:3.0f}%|{bar}|") |
1171 | for _, newchains in enumerate(p.imap_unordered(partial(work_infer_mappings, not self.REUSE_ALL, allmappings), full_structures_list, chunksize=1)): | 1171 | for _, newchains in enumerate(p.imap_unordered(partial(work_infer_mappings, not self.REUSE_ALL, allmappings), full_structures_list, chunksize=1)): |
1172 | self.update += newchains | 1172 | self.update += newchains |
1173 | pbar.update(1) # Everytime the iteration finishes, update the global progress bar | 1173 | pbar.update(1) # Everytime the iteration finishes, update the global progress bar |
... | @@ -1183,7 +1183,7 @@ class Pipeline: | ... | @@ -1183,7 +1183,7 @@ class Pipeline: |
1183 | exit(1) | 1183 | exit(1) |
1184 | else: | 1184 | else: |
1185 | conn = sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0) | 1185 | conn = sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0) |
1186 | - for eq_class, codelist in tqdm(full_structures_list): | 1186 | + for eq_class, codelist in tqdm(full_structures_list, desc="Eq. classes"): |
1187 | codes = codelist.replace('+',',').split(',') | 1187 | codes = codelist.replace('+',',').split(',') |
1188 | 1188 | ||
1189 | # Simply convert the list of codes to Chain() objects | 1189 | # Simply convert the list of codes to Chain() objects |
... | @@ -1204,6 +1204,7 @@ class Pipeline: | ... | @@ -1204,6 +1204,7 @@ class Pipeline: |
1204 | self.n_chains = len(self.update) | 1204 | self.n_chains = len(self.update) |
1205 | print(str(self.n_chains) + " RNA chains of interest.") | 1205 | print(str(self.n_chains) + " RNA chains of interest.") |
1206 | 1206 | ||
1207 | + @trace_unhandled_exceptions | ||
1207 | def dl_and_annotate(self, retry=False, coeff_ncores = 0.75): | 1208 | def dl_and_annotate(self, retry=False, coeff_ncores = 0.75): |
1208 | """ | 1209 | """ |
1209 | Gets mmCIF files from the PDB, and runs DSSR on them. | 1210 | Gets mmCIF files from the PDB, and runs DSSR on them. |
... | @@ -1212,7 +1213,7 @@ class Pipeline: | ... | @@ -1212,7 +1213,7 @@ class Pipeline: |
1212 | REQUIRES the previous definition of self.update, so call list_available_mappings() before. | 1213 | REQUIRES the previous definition of self.update, so call list_available_mappings() before. |
1213 | SETS table structure""" | 1214 | SETS table structure""" |
1214 | 1215 | ||
1215 | - setproctitle(f"RNANet.py dl_and_annotate(retry={retry})") | 1216 | + # setproctitle(f"RNANet.py dl_and_annotate(retry={retry})") |
1216 | 1217 | ||
1217 | # Prepare the results folders | 1218 | # Prepare the results folders |
1218 | if not path.isdir(path_to_3D_data + "RNAcifs"): | 1219 | if not path.isdir(path_to_3D_data + "RNAcifs"): |
... | @@ -1293,7 +1294,7 @@ class Pipeline: | ... | @@ -1293,7 +1294,7 @@ class Pipeline: |
1293 | ki.close() | 1294 | ki.close() |
1294 | kir.close() | 1295 | kir.close() |
1295 | if issues: | 1296 | if issues: |
1296 | - warn("Added newly discovered issues to known issues:") | 1297 | + warn(f"Added {issues} newly discovered issues to known issues:") |
1297 | print("\033[33m"+ " ".join(issues_names) + "\033[0m", flush=True) | 1298 | print("\033[33m"+ " ".join(issues_names) + "\033[0m", flush=True) |
1298 | 1299 | ||
1299 | # Add successfully built chains to list | 1300 | # Add successfully built chains to list |
... | @@ -1438,7 +1439,7 @@ class Pipeline: | ... | @@ -1438,7 +1439,7 @@ class Pipeline: |
1438 | os.makedirs(runDir + "/results/archive/") | 1439 | os.makedirs(runDir + "/results/archive/") |
1439 | 1440 | ||
1440 | # Save to by-chain CSV files | 1441 | # Save to by-chain CSV files |
1441 | - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3, maxtasksperchild=1) | 1442 | + p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3) |
1442 | try: | 1443 | try: |
1443 | pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True) | 1444 | pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True) |
1444 | for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains, chunksize=2)): | 1445 | for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains, chunksize=2)): |
... | @@ -1991,7 +1992,7 @@ def work_mmcif(pdb_id): | ... | @@ -1991,7 +1992,7 @@ def work_mmcif(pdb_id): |
1991 | # Attempt to download it if not present | 1992 | # Attempt to download it if not present |
1992 | try: | 1993 | try: |
1993 | if not path.isfile(final_filepath): | 1994 | if not path.isfile(final_filepath): |
1994 | - subprocess.run(["wget", f'http://files.rcsb.org/download/{pdb_id}.cif', "-O", final_filepath], stdout=subprocess.DEVNULL) | 1995 | + subprocess.run(["wget", f'http://files.rcsb.org/download/{pdb_id}.cif', "-O", final_filepath], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) |
1995 | except: | 1996 | except: |
1996 | warn(f"Unable to download {pdb_id}.cif. Ignoring it.", error=True) | 1997 | warn(f"Unable to download {pdb_id}.cif. Ignoring it.", error=True) |
1997 | return | 1998 | return |
... | @@ -2006,7 +2007,12 @@ def work_mmcif(pdb_id): | ... | @@ -2006,7 +2007,12 @@ def work_mmcif(pdb_id): |
2006 | mmCif_info = MMCIF2Dict(final_filepath) | 2007 | mmCif_info = MMCIF2Dict(final_filepath) |
2007 | 2008 | ||
2008 | # Get info about that structure | 2009 | # Get info about that structure |
2009 | - exp_meth = mmCif_info["_exptl.method"][0] | 2010 | + try: |
2011 | + exp_meth = mmCif_info["_exptl.method"][0] | ||
2012 | + except KeyError: | ||
2013 | + warn(f"Wtf, {pdb_id}.cif has no _exptl.method ? Assuming X-ray.") | ||
2014 | + warn(f"Check https://files.rcsb.org/header/{pdb_id}.cif to figure it out.") | ||
2015 | + exp_meth = "X-RAY DIFFRACTION" | ||
2010 | date = mmCif_info["_pdbx_database_status.recvd_initial_deposition_date"][0] | 2016 | date = mmCif_info["_pdbx_database_status.recvd_initial_deposition_date"][0] |
2011 | if "_refine.ls_d_res_high" in mmCif_info.keys() and mmCif_info["_refine.ls_d_res_high"][0] not in ['.', '?']: | 2017 | if "_refine.ls_d_res_high" in mmCif_info.keys() and mmCif_info["_refine.ls_d_res_high"][0] not in ['.', '?']: |
2012 | reso = float(mmCif_info["_refine.ls_d_res_high"][0]) | 2018 | reso = float(mmCif_info["_refine.ls_d_res_high"][0]) |
... | @@ -2463,6 +2469,7 @@ if __name__ == "__main__": | ... | @@ -2463,6 +2469,7 @@ if __name__ == "__main__": |
2463 | 2469 | ||
2464 | # Download and annotate new RNA 3D chains (Chain objects in pp.update) | 2470 | # Download and annotate new RNA 3D chains (Chain objects in pp.update) |
2465 | pp.dl_and_annotate(coeff_ncores=0.5) | 2471 | pp.dl_and_annotate(coeff_ncores=0.5) |
2472 | + print("Here we go.") | ||
2466 | 2473 | ||
2467 | # At this point, the structure table is up to date | 2474 | # At this point, the structure table is up to date |
2468 | pp.build_chains(coeff_ncores=1.0) | 2475 | pp.build_chains(coeff_ncores=1.0) | ... | ... |
-
Please register or login to post a comment