Do not use maxtaskperchild=1

Louis BECQUEY
Commit 28c116cee00d870fd4618870f9e18882b342664e 28c116ce 1 parent b484b3f6
Showing 1 changed file with 15 additions and 8 deletions
RNAnet.py
--- a/RNAnet.py
View file @28c116c
+++ b/RNAnet.py
View file @28c116c
@@ -1164,10 +1164,10 @@ class Pipeline:
             # Compute the list of mappable structures using NR-list and Rfam-PDB mappings
             # And get Chain() objects
             print("> Building list of structures...", flush=True)
-            p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores, maxtasksperchild=1)
+            p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores)
             try:
-                pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, desc="Eq. classes", bar_format="{percentage:3.0f}%|{bar}|")
+                pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, desc="Eq. classes", bar_format="{desc}:{percentage:3.0f}%|{bar}|")
                 for _, newchains in enumerate(p.imap_unordered(partial(work_infer_mappings, not self.REUSE_ALL, allmappings), full_structures_list, chunksize=1)): 
                     self.update += newchains
                     pbar.update(1) # Everytime the iteration finishes, update the global progress bar
@@ -1183,7 +1183,7 @@ class Pipeline:
                 exit(1)
         else:
             conn = sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0)
-            for eq_class, codelist in tqdm(full_structures_list):
+            for eq_class, codelist in tqdm(full_structures_list, desc="Eq. classes"):
                 codes = codelist.replace('+',',').split(',')
                 # Simply convert the list of codes to Chain() objects
@@ -1204,6 +1204,7 @@ class Pipeline:
         self.n_chains = len(self.update)
         print(str(self.n_chains) + " RNA chains of interest.")
+    @trace_unhandled_exceptions
     def dl_and_annotate(self, retry=False, coeff_ncores = 0.75):
         """
         Gets mmCIF files from the PDB, and runs DSSR on them.
@@ -1212,7 +1213,7 @@ class Pipeline:
         REQUIRES the previous definition of self.update, so call list_available_mappings() before.
         SETS table structure"""
-        setproctitle(f"RNANet.py dl_and_annotate(retry={retry})")
+        # setproctitle(f"RNANet.py dl_and_annotate(retry={retry})")
         # Prepare the results folders
         if not path.isdir(path_to_3D_data + "RNAcifs"):
@@ -1293,7 +1294,7 @@ class Pipeline:
         ki.close()
         kir.close()
         if issues:
-            warn("Added newly discovered issues to known issues:")
+            warn(f"Added {issues} newly discovered issues to known issues:")
             print("\033[33m"+ " ".join(issues_names) + "\033[0m", flush=True)
         # Add successfully built chains to list
@@ -1438,7 +1439,7 @@ class Pipeline:
             os.makedirs(runDir + "/results/archive/")
         # Save to by-chain CSV files
-        p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3, maxtasksperchild=1)
+        p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3)
         try:
             pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True) 
             for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains, chunksize=2)):
@@ -1991,7 +1992,7 @@ def work_mmcif(pdb_id):
     # Attempt to download it if not present
     try:
         if not path.isfile(final_filepath):
-            subprocess.run(["wget", f'http://files.rcsb.org/download/{pdb_id}.cif', "-O", final_filepath], stdout=subprocess.DEVNULL)
+            subprocess.run(["wget", f'http://files.rcsb.org/download/{pdb_id}.cif', "-O", final_filepath], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     except:
         warn(f"Unable to download {pdb_id}.cif. Ignoring it.", error=True)
         return
@@ -2006,7 +2007,12 @@ def work_mmcif(pdb_id):
         mmCif_info = MMCIF2Dict(final_filepath)
         # Get info about that structure
-        exp_meth = mmCif_info["_exptl.method"][0]
+        try:
+            exp_meth = mmCif_info["_exptl.method"][0]
+        except KeyError:
+            warn(f"Wtf, {pdb_id}.cif has no _exptl.method ? Assuming X-ray.")
+            warn(f"Check https://files.rcsb.org/header/{pdb_id}.cif to figure it out.")
+            exp_meth = "X-RAY DIFFRACTION"
         date = mmCif_info["_pdbx_database_status.recvd_initial_deposition_date"][0]
         if "_refine.ls_d_res_high" in mmCif_info.keys() and mmCif_info["_refine.ls_d_res_high"][0] not in ['.', '?']:
             reso = float(mmCif_info["_refine.ls_d_res_high"][0])
@@ -2463,6 +2469,7 @@ if __name__ == "__main__":
     # Download and annotate new RNA 3D chains (Chain objects in pp.update)
     pp.dl_and_annotate(coeff_ncores=0.5) 
+    print("Here we go.")
     # At this point, the structure table is up to date
     pp.build_chains(coeff_ncores=1.0)