tqdm progress bars

Louis BECQUEY
Commit 97b2613edea51c438a87a18c5ddc2858a6edc4f0 97b2613e 1 parent 8d1af7ec
Showing 2 changed files with 57 additions and 32 deletions
RNAnet.py
kill_rnanet.sh
--- a/RNAnet.py
View file @97b2613
+++ b/RNAnet.py
View file @97b2613
@@ -18,6 +18,7 @@ from functools import partial
 from os import path, makedirs
 from multiprocessing import Pool, cpu_count, Manager
 from time import sleep
+from tqdm import tqdm
 if path.isdir("/home/ubuntu/"): # this is the IFB-core cloud
     path_to_3D_data = "/mnt/Data/RNA/3D/"
@@ -267,9 +268,9 @@ class Job:
         self.max_mem = -1 # not executed yet
         self.label = label
         if not how_many_in_parallel:
-            self.nthreads = cpu_count()
+            self.nthreads = read_cpu_number()
         elif how_many_in_parallel == -1:
-            self.nthreads = cpu_count() - 1
+            self.nthreads = read_cpu_number() - 1
         else:
             self.nthreads = how_many_in_parallel
         self.useless_bool = False
@@ -472,6 +473,13 @@ class Monitor:
             sleep(0.1)
         return max_mem
+def read_cpu_number():
+    # do not use os.cpu_count() on LXC containers
+    # it reads info from /sys wich is not the VM resources but the host resources.
+    # Read from /proc/cpuinfo instead.
+    p = subprocess.run(['grep', '-c', 'Intel(', '/proc/cpuinfo'], stdout=subprocess.PIPE)
+    return int(p.stdout.decode('utf-8')[:-1])
+
 def warn(message, error=False):
     if error:
         print(f"\t> \033[31mERR: {message}\033[0m{errsymb}", flush=True)
@@ -499,11 +507,10 @@ def execute_job(j, jobcount):
             monitor.keep_watching = False
             m = assistant_future.result()
-            
     elif j.func_ is not None:
-        print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if not ((type(a) == list) and len(a)>3)])})")
+        #print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if not ((type(a) == list) and len(a)>3)])})")
         m = -1
         monitor = Monitor(os.getpid())
@@ -558,7 +565,7 @@ def execute_joblist(fulljoblist, printstats=False):
             print("using", n, "processes:")
             # execute jobs of priority i that should be processed n by n:
-            p = Pool(processes=n, maxtasksperchild=10)
+            p = Pool(processes=n)
             raw_results = p.map(partial(execute_job, jobcount=jobcount), bunch)
             p.close()
             p.join()
@@ -833,23 +840,26 @@ def summarize_position(col):
     else:
         return (0, 0, 0, 0, 0)
-def alignment_nt_stats(f, list_of_chains):
+def alignment_nt_stats(f, list_of_chains) :
-    print("\t>",f,"... ", flush=True)
+    global idxQueue
+    #print("\t>",f,"... ", flush=True)
     chains_ids = [ str(c) for c in list_of_chains ]
-    
+    thr_idx = idxQueue.get()
+    print(thr_idx, flush=True)
+
     # Open the alignment
     align = AlignIO.read(path_to_seq_data + f"realigned/{f}++.afa", "fasta")
     alilen = align.get_alignment_length()
-    print("\t>",f,"... loaded", flush=True)
+    #print("\t>",f,"... loaded", flush=True)
-    
+
     # Compute statistics per column
-    results = [ summarize_position(align[:,i]) for i in range(alilen) ]
+    results = [ summarize_position(align[:,i]) for i in tqdm(range(alilen), position=thr_idx) ]
     frequencies = np.array(results).T
-    print("\t>",f,"... loaded, computed", flush=True)
+    #print("\t>",f,"... loaded, computed", flush=True)
     for s in align:
         if not '[' in s.id: # this is a Rfamseq entry, not PDB
-            continue 
+            continue
         # get the right 3D chain:
         idx = chains_ids.index(s.id)
@@ -868,16 +878,16 @@ def alignment_nt_stats(f, list_of_chains):
                 i += 1
                 j += 1
             elif c.seq[i] == '-': # gap in the chain, but not in the aligned sequence
-                
+
                 # search for a gap to the consensus nearby
                 k = 0
                 while j+k<alilen and s.seq[j+k] in ['.','-']:
                     if s.seq[j+k] == '-':
                         break
                     k += 1
-                
+
                 # if found, set j to that position
-                if j+k<alilen and s.seq[j+k] == '-': 
+                if j+k<alilen and s.seq[j+k] == '-':
                     j = j + k
                     continue
@@ -897,8 +907,8 @@ def alignment_nt_stats(f, list_of_chains):
             else:
                 print(f"You are never supposed to reach this. Comparing {c.chain_label} in {i} ({c.seq[i-1:i+2]}) with seq[{j}] ({s.seq[j-3:j+4]}).\n", c.seq, '\n', s.seq, sep='', flush=True)
                 exit(1)
-        if warn_gaps:
+        #if warn_gaps:
-            warn(f"Some gap(s) in {c.chain_label} were not re-found in the aligned sequence... Ignoring them.")
+            #warn(f"Some gap(s) in {c.chain_label} were not re-found in the aligned sequence... Ignoring them.")
         # Replace masked positions by the consensus sequence:
         c_seq = c.seq.split()
@@ -934,11 +944,11 @@ def alignment_nt_stats(f, list_of_chains):
             line = [str(x) for x in list(point[i,:]) ]
             file.write(','.join(line)+'\n')
         file.close()
-        print("\t\tWritten", c.chain_label, f"to file\t{validsymb}", flush=True)
+        #print("\t\tWritten", c.chain_label, f"to file\t{validsymb}", flush=True)
+
+    #print("\t>", f, f"... loaded, computed, saved\t{validsymb}", flush=True)
+    return 0
-    print("\t>", f, f"... loaded, computed, saved\t{validsymb}", flush=True)
-    return None
-    
 if __name__ == "__main__":
     print("Main process running. (PID", os.getpid(), ")")
@@ -991,15 +1001,15 @@ if __name__ == "__main__":
         os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam")
     if not path.isdir(path_to_3D_data + "RNAcifs"):
         os.makedirs(path_to_3D_data + "RNAcifs")
-    
+
-    results = execute_joblist(joblist)[1]    
+    results = execute_joblist(joblist)[1]
     loaded_chains = [ c for c in results if not c.delete_me ]
     print(f"> Loaded {len(loaded_chains)} RNA chains ({len(chains_with_mapping) - len(loaded_chains)} errors).")
     # ===========================================================================
     # Download RNA sequences of the corresponding Rfam families
     # ===========================================================================
-    
+
     # Get the list of Rfam families found
     rfam_acc_to_download = {}
     for c in loaded_chains:
@@ -1041,18 +1051,18 @@ if __name__ == "__main__":
     if not path.isdir(path_to_3D_data + "datapoints/"):
         os.makedirs(path_to_3D_data + "datapoints/")
     print("Computing nucleotide frequencies in alignments...")
-    families =  sorted([f for f in rfam_acc_to_download.keys() if f not in ["RF01960", "RF02540"]])
+    families =  sorted([f for f in rfam_acc_to_download.keys() ])
-    # pool = Pool(processes=cpu_count(), maxtasksperchild=10)
-    # results = pool.map(alignment_nt_stats, families)
-    # pool.close()
-    # pool.join()
-    # loaded_chains = list(itertools.chain.from_iterable(results))
     # Build job list
+    thr_idx_mgr = multiprocessing.Manager()
+    idxQueue = thr_idx_mgr.Queue()
+    for i in range(10):
+        idxQueue.put(i)
     fulljoblist = []
     for f in families:
         label = f"Save {f} PSSMs"
         list_of_chains = rfam_acc_to_download[f]
-        fulljoblist.append(Job(function=alignment_nt_stats, args=[f, list_of_chains, label], how_many_in_parallel=10, priority=1, label=label))
+        fulljoblist.append(Job(function=alignment_nt_stats, args=[f, list_of_chains], how_many_in_parallel=10, priority=1, label=label))
     execute_joblist(fulljoblist, printstats=False)
+
     print("Completed.")
--- a/kill_rnanet.sh 0 → 100755
View file @97b2613
+++ b/kill_rnanet.sh 0 → 100755
View file @97b2613
+#!/bin/bash
+PROCESS_TO_KILL="RNAnet.py"
+PROCESS_LIST=`ps ax | grep -Ei ${PROCESS_TO_KILL} | grep -Eiv '(grep|vi RNAnet.py)' | awk ' { print $1;}'`
+KILLED=
+for KILLPID in $PROCESS_LIST; do
+  if [ ! -z $KILLPID ];then
+    kill -9 $KILLPID
+    echo "Killed PID ${KILLPID}"
+    KILLED=yes
+  fi
+done
+
+if [ -z $KILLED ];then
+    echo "Didn't kill anything"
+fi