Louis BECQUEY

Do not use maxtaskperchild=1

Showing 1 changed file with 15 additions and 8 deletions
...@@ -1164,10 +1164,10 @@ class Pipeline: ...@@ -1164,10 +1164,10 @@ class Pipeline:
1164 # Compute the list of mappable structures using NR-list and Rfam-PDB mappings 1164 # Compute the list of mappable structures using NR-list and Rfam-PDB mappings
1165 # And get Chain() objects 1165 # And get Chain() objects
1166 print("> Building list of structures...", flush=True) 1166 print("> Building list of structures...", flush=True)
1167 - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores, maxtasksperchild=1) 1167 + p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores)
1168 try: 1168 try:
1169 1169
1170 - pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, desc="Eq. classes", bar_format="{percentage:3.0f}%|{bar}|") 1170 + pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, desc="Eq. classes", bar_format="{desc}:{percentage:3.0f}%|{bar}|")
1171 for _, newchains in enumerate(p.imap_unordered(partial(work_infer_mappings, not self.REUSE_ALL, allmappings), full_structures_list, chunksize=1)): 1171 for _, newchains in enumerate(p.imap_unordered(partial(work_infer_mappings, not self.REUSE_ALL, allmappings), full_structures_list, chunksize=1)):
1172 self.update += newchains 1172 self.update += newchains
1173 pbar.update(1) # Everytime the iteration finishes, update the global progress bar 1173 pbar.update(1) # Everytime the iteration finishes, update the global progress bar
...@@ -1183,7 +1183,7 @@ class Pipeline: ...@@ -1183,7 +1183,7 @@ class Pipeline:
1183 exit(1) 1183 exit(1)
1184 else: 1184 else:
1185 conn = sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0) 1185 conn = sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0)
1186 - for eq_class, codelist in tqdm(full_structures_list): 1186 + for eq_class, codelist in tqdm(full_structures_list, desc="Eq. classes"):
1187 codes = codelist.replace('+',',').split(',') 1187 codes = codelist.replace('+',',').split(',')
1188 1188
1189 # Simply convert the list of codes to Chain() objects 1189 # Simply convert the list of codes to Chain() objects
...@@ -1204,6 +1204,7 @@ class Pipeline: ...@@ -1204,6 +1204,7 @@ class Pipeline:
1204 self.n_chains = len(self.update) 1204 self.n_chains = len(self.update)
1205 print(str(self.n_chains) + " RNA chains of interest.") 1205 print(str(self.n_chains) + " RNA chains of interest.")
1206 1206
1207 + @trace_unhandled_exceptions
1207 def dl_and_annotate(self, retry=False, coeff_ncores = 0.75): 1208 def dl_and_annotate(self, retry=False, coeff_ncores = 0.75):
1208 """ 1209 """
1209 Gets mmCIF files from the PDB, and runs DSSR on them. 1210 Gets mmCIF files from the PDB, and runs DSSR on them.
...@@ -1212,7 +1213,7 @@ class Pipeline: ...@@ -1212,7 +1213,7 @@ class Pipeline:
1212 REQUIRES the previous definition of self.update, so call list_available_mappings() before. 1213 REQUIRES the previous definition of self.update, so call list_available_mappings() before.
1213 SETS table structure""" 1214 SETS table structure"""
1214 1215
1215 - setproctitle(f"RNANet.py dl_and_annotate(retry={retry})") 1216 + # setproctitle(f"RNANet.py dl_and_annotate(retry={retry})")
1216 1217
1217 # Prepare the results folders 1218 # Prepare the results folders
1218 if not path.isdir(path_to_3D_data + "RNAcifs"): 1219 if not path.isdir(path_to_3D_data + "RNAcifs"):
...@@ -1293,7 +1294,7 @@ class Pipeline: ...@@ -1293,7 +1294,7 @@ class Pipeline:
1293 ki.close() 1294 ki.close()
1294 kir.close() 1295 kir.close()
1295 if issues: 1296 if issues:
1296 - warn("Added newly discovered issues to known issues:") 1297 + warn(f"Added {issues} newly discovered issues to known issues:")
1297 print("\033[33m"+ " ".join(issues_names) + "\033[0m", flush=True) 1298 print("\033[33m"+ " ".join(issues_names) + "\033[0m", flush=True)
1298 1299
1299 # Add successfully built chains to list 1300 # Add successfully built chains to list
...@@ -1438,7 +1439,7 @@ class Pipeline: ...@@ -1438,7 +1439,7 @@ class Pipeline:
1438 os.makedirs(runDir + "/results/archive/") 1439 os.makedirs(runDir + "/results/archive/")
1439 1440
1440 # Save to by-chain CSV files 1441 # Save to by-chain CSV files
1441 - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3, maxtasksperchild=1) 1442 + p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3)
1442 try: 1443 try:
1443 pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True) 1444 pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True)
1444 for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains, chunksize=2)): 1445 for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains, chunksize=2)):
...@@ -1991,7 +1992,7 @@ def work_mmcif(pdb_id): ...@@ -1991,7 +1992,7 @@ def work_mmcif(pdb_id):
1991 # Attempt to download it if not present 1992 # Attempt to download it if not present
1992 try: 1993 try:
1993 if not path.isfile(final_filepath): 1994 if not path.isfile(final_filepath):
1994 - subprocess.run(["wget", f'http://files.rcsb.org/download/{pdb_id}.cif', "-O", final_filepath], stdout=subprocess.DEVNULL) 1995 + subprocess.run(["wget", f'http://files.rcsb.org/download/{pdb_id}.cif', "-O", final_filepath], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
1995 except: 1996 except:
1996 warn(f"Unable to download {pdb_id}.cif. Ignoring it.", error=True) 1997 warn(f"Unable to download {pdb_id}.cif. Ignoring it.", error=True)
1997 return 1998 return
...@@ -2006,7 +2007,12 @@ def work_mmcif(pdb_id): ...@@ -2006,7 +2007,12 @@ def work_mmcif(pdb_id):
2006 mmCif_info = MMCIF2Dict(final_filepath) 2007 mmCif_info = MMCIF2Dict(final_filepath)
2007 2008
2008 # Get info about that structure 2009 # Get info about that structure
2009 - exp_meth = mmCif_info["_exptl.method"][0] 2010 + try:
2011 + exp_meth = mmCif_info["_exptl.method"][0]
2012 + except KeyError:
2013 + warn(f"Wtf, {pdb_id}.cif has no _exptl.method ? Assuming X-ray.")
2014 + warn(f"Check https://files.rcsb.org/header/{pdb_id}.cif to figure it out.")
2015 + exp_meth = "X-RAY DIFFRACTION"
2010 date = mmCif_info["_pdbx_database_status.recvd_initial_deposition_date"][0] 2016 date = mmCif_info["_pdbx_database_status.recvd_initial_deposition_date"][0]
2011 if "_refine.ls_d_res_high" in mmCif_info.keys() and mmCif_info["_refine.ls_d_res_high"][0] not in ['.', '?']: 2017 if "_refine.ls_d_res_high" in mmCif_info.keys() and mmCif_info["_refine.ls_d_res_high"][0] not in ['.', '?']:
2012 reso = float(mmCif_info["_refine.ls_d_res_high"][0]) 2018 reso = float(mmCif_info["_refine.ls_d_res_high"][0])
...@@ -2463,6 +2469,7 @@ if __name__ == "__main__": ...@@ -2463,6 +2469,7 @@ if __name__ == "__main__":
2463 2469
2464 # Download and annotate new RNA 3D chains (Chain objects in pp.update) 2470 # Download and annotate new RNA 3D chains (Chain objects in pp.update)
2465 pp.dl_and_annotate(coeff_ncores=0.5) 2471 pp.dl_and_annotate(coeff_ncores=0.5)
2472 + print("Here we go.")
2466 2473
2467 # At this point, the structure table is up to date 2474 # At this point, the structure table is up to date
2468 pp.build_chains(coeff_ncores=1.0) 2475 pp.build_chains(coeff_ncores=1.0)
......