Showing
5 changed files
with
48 additions
and
19 deletions
... | @@ -1389,7 +1389,7 @@ class Pipeline: | ... | @@ -1389,7 +1389,7 @@ class Pipeline: |
1389 | # Remove previous precomputed data | 1389 | # Remove previous precomputed data |
1390 | subprocess.run(["rm","-f", "data/wadley_kernel_eta.npz", "data/wadley_kernel_eta_prime.npz", "data/pair_counts.csv"]) | 1390 | subprocess.run(["rm","-f", "data/wadley_kernel_eta.npz", "data/wadley_kernel_eta_prime.npz", "data/pair_counts.csv"]) |
1391 | for f in self.fam_list: | 1391 | for f in self.fam_list: |
1392 | - subprocess.run(["rm","-f", f"data/{f}.npy"]) | 1392 | + subprocess.run(["rm","-f", f"data/{f}.npy", f"data/{f}_pairs.csv", f"data/{f}_counts.csv"]) |
1393 | 1393 | ||
1394 | # Run statistics files | 1394 | # Run statistics files |
1395 | os.chdir(runDir) | 1395 | os.chdir(runDir) |
... | @@ -1397,13 +1397,12 @@ class Pipeline: | ... | @@ -1397,13 +1397,12 @@ class Pipeline: |
1397 | subprocess.run(["python3.8", "statistics.py", path_to_3D_data, path_to_seq_data]) | 1397 | subprocess.run(["python3.8", "statistics.py", path_to_3D_data, path_to_seq_data]) |
1398 | 1398 | ||
1399 | # Save additional informations | 1399 | # Save additional informations |
1400 | - conn = sqlite3.connect(runDir+"/results/RNANet.db") | 1400 | + with sqlite3.connect(runDir+"/results/RNANet.db") as conn: |
1401 | - pd.read_sql_query("SELECT rfam_acc, description, idty_percent, nb_homologs, nb_3d_chains, nb_total_homol, max_len, comput_time, comput_peak_mem from family ORDER BY nb_3d_chains DESC;", | 1401 | + pd.read_sql_query("SELECT rfam_acc, description, idty_percent, nb_homologs, nb_3d_chains, nb_total_homol, max_len, comput_time, comput_peak_mem from family ORDER BY nb_3d_chains DESC;", |
1402 | - conn).to_csv(runDir + f"/results/archive/families_{time_str}.csv", float_format="%.2f", index=False) | 1402 | + conn).to_csv(runDir + f"/results/archive/families_{time_str}.csv", float_format="%.2f", index=False) |
1403 | - pd.read_sql_query("""SELECT structure_id, chain_name, pdb_start, pdb_end, rfam_acc, inferred, date, exp_method, resolution, issue FROM structure | 1403 | + pd.read_sql_query("""SELECT structure_id, chain_name, pdb_start, pdb_end, rfam_acc, inferred, date, exp_method, resolution, issue FROM structure |
1404 | - JOIN chain ON structure.pdb_id = chain.structure_id | 1404 | + JOIN chain ON structure.pdb_id = chain.structure_id |
1405 | - ORDER BY structure_id, chain_name, rfam_acc ASC;""", conn).to_csv(runDir + f"/results/archive/summary_{time_str}.csv", float_format="%.2f", index=False) | 1405 | + ORDER BY structure_id, chain_name, rfam_acc ASC;""", conn).to_csv(runDir + f"/results/archive/summary_{time_str}.csv", float_format="%.2f", index=False) |
1406 | - conn.close() | ||
1407 | 1406 | ||
1408 | # Archive the results | 1407 | # Archive the results |
1409 | if self.SELECT_ONLY is None: | 1408 | if self.SELECT_ONLY is None: |
... | @@ -2408,6 +2407,7 @@ if __name__ == "__main__": | ... | @@ -2408,6 +2407,7 @@ if __name__ == "__main__": |
2408 | rfam_acc_to_download[c.mapping.rfam_acc] = [ c ] | 2407 | rfam_acc_to_download[c.mapping.rfam_acc] = [ c ] |
2409 | else: | 2408 | else: |
2410 | rfam_acc_to_download[c.mapping.rfam_acc].append(c) | 2409 | rfam_acc_to_download[c.mapping.rfam_acc].append(c) |
2410 | + | ||
2411 | print(f"> Identified {len(rfam_acc_to_download.keys())} families to update and re-align with the crystals' sequences") | 2411 | print(f"> Identified {len(rfam_acc_to_download.keys())} families to update and re-align with the crystals' sequences") |
2412 | pp.fam_list = sorted(rfam_acc_to_download.keys()) | 2412 | pp.fam_list = sorted(rfam_acc_to_download.keys()) |
2413 | 2413 | ... | ... |
1 | # This is a script supposed to be run periodically as a cron job | 1 | # This is a script supposed to be run periodically as a cron job |
2 | 2 | ||
3 | -cd /home/lbecquey/Projects/RNANet; | 3 | +cd /home/lbecquey/Projects/RNANet |
4 | -rm -f nohup.out errors.txt; | 4 | +rm -f latest_run.log errors.txt |
5 | 5 | ||
6 | # Run RNANet | 6 | # Run RNANet |
7 | -nohup bash -c 'time ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ -s -r 20.0 --archive'; | 7 | +bash -c 'time ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ -r 20.0 -s --archive' &> latest_run.log |
8 | +touch results/RNANet.db # update last modification date | ||
9 | +rm -f results/RNANet.db-wal results/RNANet.db-shm # SQLite temporary files | ||
8 | 10 | ||
9 | # Compress | 11 | # Compress |
10 | -rm -f results/RNANet.db.gz | 12 | +rm -f /home/lbecquey/Projects/RNANet/results/RNANet.db.gz |
11 | -gzip -k results/RNANet.db | 13 | +echo 'Deleted results/RNANet.db.gz (if existed)' >> latest_run.log |
14 | +gzip -k /home/lbecquey/Projects/RNANet/results/RNANet.db | ||
15 | +echo 'Recreated it.' >> latest_run.log | ||
12 | 16 | ||
13 | # Sync in Seafile | 17 | # Sync in Seafile |
14 | -seaf-cli start; | 18 | +seaf-cli start >> latest_run.log 2>&1 |
15 | -sleep 30m; | 19 | +echo 'Waiting 10m for SeaFile synchronization...' >> latest_run.log |
16 | -seaf-cli stop; | 20 | +sleep 10m |
21 | +echo `seaf-cli status` >> latest_run.log | ||
22 | +seaf-cli stop >> latest_run.log 2>&1 | ||
23 | +echo 'We are '`date`', update completed.' >> latest_run.log | ||
17 | 24 | ... | ... |
... | @@ -487,3 +487,15 @@ DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AA_1-1562. | ... | @@ -487,3 +487,15 @@ DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AA_1-1562. |
487 | 4v5z_1_AA_1-1563 | 487 | 4v5z_1_AA_1-1563 |
488 | DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AA_1-1563. | 488 | DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AA_1-1563. |
489 | 489 | ||
490 | +6lqm_1_8_1267-4755 | ||
491 | +Could not find nucleotides of chain 8 in annotation 6lqm.json. Either there is a problem with 6lqm mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
492 | + | ||
493 | +6lu8_1_8_1267-4755 | ||
494 | +Could not find nucleotides of chain 8 in annotation 6lu8.json. Either there is a problem with 6lu8 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
495 | + | ||
496 | +6lsr_1_8_1267-4755 | ||
497 | +Could not find nucleotides of chain 8 in annotation 6lsr.json. Either there is a problem with 6lsr mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
498 | + | ||
499 | +6lss_1_8_1267-4755 | ||
500 | +Could not find nucleotides of chain 8 in annotation 6lss.json. Either there is a problem with 6lss mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
501 | + | ... | ... |
... | @@ -288,6 +288,9 @@ def parallel_stats_pairs(f): | ... | @@ -288,6 +288,9 @@ def parallel_stats_pairs(f): |
288 | 288 | ||
289 | REQUIRES tables chain, nucleotide up-to-date.""" | 289 | REQUIRES tables chain, nucleotide up-to-date.""" |
290 | 290 | ||
291 | + if path.isfile("data/"+f+"_pairs.csv") and path.isfile("data/"+f+"_counts.csv"): | ||
292 | + return | ||
293 | + | ||
291 | # Get a worker number to position the progress bar | 294 | # Get a worker number to position the progress bar |
292 | global idxQueue | 295 | global idxQueue |
293 | thr_idx = idxQueue.get() | 296 | thr_idx = idxQueue.get() |
... | @@ -363,7 +366,7 @@ def parallel_stats_pairs(f): | ... | @@ -363,7 +366,7 @@ def parallel_stats_pairs(f): |
363 | data.append(expanded_list) | 366 | data.append(expanded_list) |
364 | 367 | ||
365 | # Update the database | 368 | # Update the database |
366 | - with sqlite3.connect("results/RNANet.db") as conn: | 369 | + with sqlite3.connect("results/RNANet.db", isolation_level=None) as conn: |
367 | conn.execute('pragma journal_mode=wal') # Allow multiple other readers to ask things while we execute this writing query | 370 | conn.execute('pragma journal_mode=wal') # Allow multiple other readers to ask things while we execute this writing query |
368 | sql_execute(conn, """UPDATE chain SET pair_count_cWW = ?, pair_count_cWH = ?, pair_count_cWS = ?, pair_count_cHH = ?, | 371 | sql_execute(conn, """UPDATE chain SET pair_count_cWW = ?, pair_count_cWH = ?, pair_count_cWS = ?, pair_count_cHH = ?, |
369 | pair_count_cHS = ?, pair_count_cSS = ?, pair_count_tWW = ?, pair_count_tWH = ?, pair_count_tWS = ?, | 372 | pair_count_cHS = ?, pair_count_cSS = ?, pair_count_tWW = ?, pair_count_tWH = ?, pair_count_tWS = ?, |
... | @@ -554,7 +557,7 @@ def per_chain_stats(): | ... | @@ -554,7 +557,7 @@ def per_chain_stats(): |
554 | 557 | ||
555 | REQUIRES tables chain, nucleotide up to date. """ | 558 | REQUIRES tables chain, nucleotide up to date. """ |
556 | 559 | ||
557 | - with sqlite3.connect("results/RNANet.db") as conn: | 560 | + with sqlite3.connect("results/RNANet.db", isolation_level=None) as conn: |
558 | # Compute per-chain nucleotide frequencies | 561 | # Compute per-chain nucleotide frequencies |
559 | df = pd.read_sql("SELECT SUM(is_A) as A, SUM(is_C) AS C, SUM(is_G) AS G, SUM(is_U) AS U, SUM(is_other) AS O, chain_id FROM nucleotide GROUP BY chain_id;", conn) | 562 | df = pd.read_sql("SELECT SUM(is_A) as A, SUM(is_C) AS C, SUM(is_G) AS G, SUM(is_U) AS U, SUM(is_other) AS O, chain_id FROM nucleotide GROUP BY chain_id;", conn) |
560 | df["total"] = pd.Series(df.A + df.C + df.G + df.U + df.O, dtype=np.float64) | 563 | df["total"] = pd.Series(df.A + df.C + df.G + df.U + df.O, dtype=np.float64) |
... | @@ -610,7 +613,7 @@ if __name__ == "__main__": | ... | @@ -610,7 +613,7 @@ if __name__ == "__main__": |
610 | 613 | ||
611 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | 614 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) |
612 | pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, leave=True) | 615 | pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, leave=True) |
613 | - | 616 | +sqlite3 |
614 | try: | 617 | try: |
615 | for j in joblist: | 618 | for j in joblist: |
616 | p.apply_async(j.func_, args=j.args_, callback=log_to_pbar(pbar)) | 619 | p.apply_async(j.func_, args=j.args_, callback=log_to_pbar(pbar)) |
... | @@ -626,6 +629,9 @@ if __name__ == "__main__": | ... | @@ -626,6 +629,9 @@ if __name__ == "__main__": |
626 | except: | 629 | except: |
627 | print("Something went wrong") | 630 | print("Something went wrong") |
628 | 631 | ||
632 | + print() | ||
633 | + print() | ||
634 | + | ||
629 | # finish the work after the parallel portions | 635 | # finish the work after the parallel portions |
630 | per_chain_stats() | 636 | per_chain_stats() |
631 | seq_idty() | 637 | seq_idty() | ... | ... |
-
Please register or login to post a comment