Louis BECQUEY

scripts/ folder

...@@ -10,9 +10,9 @@ __pycache__/ ...@@ -10,9 +10,9 @@ __pycache__/
10 errors.txt 10 errors.txt
11 known_issues.txt 11 known_issues.txt
12 known_issues_reasons.txt 12 known_issues_reasons.txt
13 -kill_rnanet.sh
14 Dockerfile 13 Dockerfile
15 LICENSE 14 LICENSE
16 README.md 15 README.md
17 -automate.sh
18 -build_docker_image.sh
...\ No newline at end of file ...\ No newline at end of file
16 +scripts/automate.sh
17 +scripts/kill_rnanet.sh
18 +scripts/build_docker_image.sh
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -23,7 +23,7 @@ RUN apk update && apk add --no-cache \ ...@@ -23,7 +23,7 @@ RUN apk update && apk add --no-cache \
23 \ 23 \
24 mkdir /3D && mkdir /sequences && \ 24 mkdir /3D && mkdir /sequences && \
25 \ 25 \
26 - mv /RNANet/x3dna-dssr /usr/local/bin/x3dna-dssr && chmod +x /usr/local/bin/x3dna-dssr && \ 26 + mv /RNANet/scripts/x3dna-dssr /usr/local/bin/x3dna-dssr && chmod +x /usr/local/bin/x3dna-dssr && \
27 \ 27 \
28 curl -SL http://eddylab.org/infernal/infernal-1.1.3.tar.gz | tar xz && cd infernal-1.1.3 && \ 28 curl -SL http://eddylab.org/infernal/infernal-1.1.3.tar.gz | tar xz && cd infernal-1.1.3 && \
29 ./configure && make -j 16 && make install && cd easel && make install && cd / && \ 29 ./configure && make -j 16 && make install && cd easel && make install && cd / && \
......
...@@ -86,7 +86,7 @@ $ docker image import rnanet_v1.2_docker.tar rnanet ...@@ -86,7 +86,7 @@ $ docker image import rnanet_v1.2_docker.tar rnanet
86 ``` 86 ```
87 * Step 3 : Run the container, giving it 3 folders to mount as volumes: a first to store the 3D data, a second to store the sequence data and alignments, and a third to output the results, data and logs: 87 * Step 3 : Run the container, giving it 3 folders to mount as volumes: a first to store the 3D data, a second to store the sequence data and alignments, and a third to output the results, data and logs:
88 ``` 88 ```
89 -$ docker run -v path/to/3D/data/folder:/3D -v path/to/sequence/data/folder:/sequences -v path/to/experiment/results/folder:/runDir rnanet [ - other options ] 89 +$ docker run --rm -v path/to/3D/data/folder:/3D -v path/to/sequence/data/folder:/sequences -v path/to/experiment/results/folder:/runDir rnanet [ - other options ]
90 ``` 90 ```
91 91
92 The detailed list of options is below: 92 The detailed list of options is below:
...@@ -124,6 +124,11 @@ The detailed list of options is below: ...@@ -124,6 +124,11 @@ The detailed list of options is below:
124 ``` 124 ```
125 You may not use the --3d-folder and --seq-folder options, they are set by default to the paths you provide with the -v options when running Docker. 125 You may not use the --3d-folder and --seq-folder options, they are set by default to the paths you provide with the -v options when running Docker.
126 126
127 +Typical usage:
128 +```
129 +nohup bash -c 'time docker run --rm -v /path/to/3D/data/folder:/3D -v /path/to/sequence/data/folder:/sequences -v /path/to/experiment/folder:/runDir rnanet -s --no-logs ' &
130 +```
131 +
127 ## Using classical command line installation 132 ## Using classical command line installation
128 133
129 You need to install the dependencies: 134 You need to install the dependencies:
......
...@@ -1546,35 +1546,33 @@ class Pipeline: ...@@ -1546,35 +1546,33 @@ class Pipeline:
1546 runDir + f"/data/{f}_counts.csv"]) 1546 runDir + f"/data/{f}_counts.csv"])
1547 1547
1548 # Run statistics files 1548 # Run statistics files
1549 - subprocess.run(["python3.8", fileDir+"/regression.py"]) 1549 + subprocess.run(["python3.8", fileDir+"/scripts/regression.py", runDir + "/results/RNANet.db"])
1550 subprocess.run(["python3.8", fileDir+"/statistics.py", "--3d-folder", path_to_3D_data, 1550 subprocess.run(["python3.8", fileDir+"/statistics.py", "--3d-folder", path_to_3D_data,
1551 "--seq-folder", path_to_seq_data, "-r", str(self.CRYSTAL_RES)]) 1551 "--seq-folder", path_to_seq_data, "-r", str(self.CRYSTAL_RES)])
1552 1552
1553 # Save additional informations 1553 # Save additional informations
1554 + os.makedirs(runDir + "/archive", exist_ok=True)
1554 with sqlite3.connect(runDir+"/results/RNANet.db") as conn: 1555 with sqlite3.connect(runDir+"/results/RNANet.db") as conn:
1555 pd.read_sql_query("""SELECT rfam_acc, description, idty_percent, nb_homologs, nb_3d_chains, nb_total_homol, max_len, comput_time, comput_peak_mem 1556 pd.read_sql_query("""SELECT rfam_acc, description, idty_percent, nb_homologs, nb_3d_chains, nb_total_homol, max_len, comput_time, comput_peak_mem
1556 FROM family ORDER BY nb_3d_chains DESC;""", 1557 FROM family ORDER BY nb_3d_chains DESC;""",
1557 - conn).to_csv(runDir + f"/results/archive/families_{time_str}.csv", float_format="%.2f", index=False) 1558 + conn).to_csv(runDir + f"/archive/families_{time_str}.csv", float_format="%.2f", index=False)
1558 pd.read_sql_query("""SELECT eq_class, structure_id, chain_name, pdb_start, pdb_end, rfam_acc, inferred, date, exp_method, resolution, issue 1559 pd.read_sql_query("""SELECT eq_class, structure_id, chain_name, pdb_start, pdb_end, rfam_acc, inferred, date, exp_method, resolution, issue
1559 FROM structure 1560 FROM structure
1560 JOIN chain ON structure.pdb_id = chain.structure_id 1561 JOIN chain ON structure.pdb_id = chain.structure_id
1561 ORDER BY structure_id, chain_name, rfam_acc ASC;""", 1562 ORDER BY structure_id, chain_name, rfam_acc ASC;""",
1562 - conn).to_csv(runDir + f"/results/archive/summary_{time_str}.csv", float_format="%.2f", index=False) 1563 + conn).to_csv(runDir + f"/archive/summary_{time_str}.csv", float_format="%.2f", index=False)
1563 1564
1564 - # Archive the results 1565 + # Update shortcuts to latest versions
1566 + subprocess.run(["rm", "-f", runDir + "/results/RNANET_datapoints_latest.tar.gz",
1567 + runDir + "/results/summary_latest.csv",
1568 + runDir + "/results/families_latest.csv"
1569 + ])
1565 if self.ARCHIVE: 1570 if self.ARCHIVE:
1566 - os.makedirs(runDir + "/results/archive", exist_ok=True)
1567 subprocess.run(["tar", "-C", path_to_3D_data + "/datapoints", "-czf", 1571 subprocess.run(["tar", "-C", path_to_3D_data + "/datapoints", "-czf",
1568 - runDir + f"/results/archive/RNANET_datapoints_{time_str}.tar.gz", "."]) 1572 + runDir + f"/archive/RNANET_datapoints_{time_str}.tar.gz", "."])
1569 - 1573 + subprocess.run(['ln', "-s", runDir + f"/archive/RNANET_datapoints_{time_str}.tar.gz", runDir + "/results/RNANET_datapoints_latest.tar.gz"])
1570 - # Update shortcuts to latest versions 1574 + subprocess.run(['ln', "-s", runDir + f"/archive/summary_{time_str}.csv", runDir + "/results/summary_latest.csv"])
1571 - subprocess.run(["rm", "-f", runDir + "/results/RNANET_datapoints_latest.tar.gz", 1575 + subprocess.run(['ln', "-s", runDir + f"/archive/families_{time_str}.csv", runDir + "/results/families_latest.csv"])
1572 - runDir + "/results/summary_latest.csv",
1573 - runDir + "/results/families_latest.csv"
1574 - ])
1575 - subprocess.run(['ln', "-s", runDir + f"/results/archive/RNANET_datapoints_{time_str}.tar.gz", runDir + "/results/RNANET_datapoints_latest.tar.gz"])
1576 - subprocess.run(['ln', "-s", runDir + f"/results/archive/summary_{time_str}.csv", runDir + "/results/summary_latest.csv"])
1577 - subprocess.run(['ln', "-s", runDir + f"/results/archive/families_{time_str}.csv", runDir + "/results/families_latest.csv"])
1578 1576
1579 def sanitize_database(self): 1577 def sanitize_database(self):
1580 """Searches for issues in the database and correct them""" 1578 """Searches for issues in the database and correct them"""
......
...@@ -15,7 +15,7 @@ cp `which x3dna-dssr` $THISDIR ...@@ -15,7 +15,7 @@ cp `which x3dna-dssr` $THISDIR
15 15
16 ######################################################## Build Docker image ###################################################### 16 ######################################################## Build Docker image ######################################################
17 # Execute the Dockerfile and build the image 17 # Execute the Dockerfile and build the image
18 -docker build -t persalteas/rnanet . 18 +docker build -t persalteas/rnanet ..
19 19
20 ############################################################## Cleaning ########################################################## 20 ############################################################## Cleaning ##########################################################
21 rm x3dna-dssr 21 rm x3dna-dssr
......
...@@ -7,15 +7,18 @@ problems = [ ...@@ -7,15 +7,18 @@ problems = [
7 "1k73_1_B" 7 "1k73_1_B"
8 ] 8 ]
9 9
10 +# provide the path to your data folders, the RNANet.db file, and the RNANet.py file as arguments to this script
10 path_to_3D_data = sys.argv[1] 11 path_to_3D_data = sys.argv[1]
11 path_to_seq_data = sys.argv[2] 12 path_to_seq_data = sys.argv[2]
13 +path_to_db = sys.argv[3]
14 +path_to_RNANet = sys.argv[4]
12 15
13 for p in problems: 16 for p in problems:
14 print() 17 print()
15 print() 18 print()
16 print() 19 print()
17 print() 20 print()
18 - homology = ('-' in p) 21 + homology = ('-' in p) # H4cky l1f3
19 22
20 # Remove the datapoints files and 3D files 23 # Remove the datapoints files and 3D files
21 subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_mapped_to_Rfam/{p}.cif"]) 24 subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_mapped_to_Rfam/{p}.cif"])
...@@ -31,21 +34,20 @@ for p in problems: ...@@ -31,21 +34,20 @@ for p in problems:
31 34
32 # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys 35 # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys
33 for fam in families: 36 for fam in families:
34 - command = ["sqlite3", "results/RNANet.db", f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc=\"{fam}\";"] 37 + command = ["sqlite3", path_to_db, f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc=\"{fam}\";"]
35 print(' '.join(command)) 38 print(' '.join(command))
36 subprocess.run(command) 39 subprocess.run(command)
37 40
38 - command = ["python3.8", "RNAnet.py", "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--extract", "--only", p] 41 + command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--extract", "--only", p]
39 else: 42 else:
40 # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys 43 # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys
41 - command = ["sqlite3", "results/RNANet.db", f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc is null;"] 44 + command = ["sqlite3", path_to_db, f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc is null;"]
42 print(' '.join(command)) 45 print(' '.join(command))
43 subprocess.run(command) 46 subprocess.run(command)
44 47
45 - command = ["python3.8", "RNAnet.py", "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--no-homology", "--extract", "--only", p] 48 + command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--no-homology", "--extract", "--only", p]
46 49
47 # Re-run RNANet 50 # Re-run RNANet
51 + os.chdir(os.path.dirname(os.path.realpath(path_to_db)) + '/../')
48 print('\n',' '.join(command),'\n') 52 print('\n',' '.join(command),'\n')
49 subprocess.run(command) 53 subprocess.run(command)
50 -
51 -# run statistics
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
20 import matplotlib.pyplot as plt 20 import matplotlib.pyplot as plt
21 import pandas as pd 21 import pandas as pd
22 import numpy as np 22 import numpy as np
23 -import scipy, os, sqlite3 23 +import scipy, os, sqlite3, sys
24 # from sklearn.linear_model import LinearRegression 24 # from sklearn.linear_model import LinearRegression
25 from mpl_toolkits.mplot3d import Axes3D 25 from mpl_toolkits.mplot3d import Axes3D
26 pd.set_option('display.max_rows', None) 26 pd.set_option('display.max_rows', None)
...@@ -28,7 +28,10 @@ pd.set_option('display.max_rows', None) ...@@ -28,7 +28,10 @@ pd.set_option('display.max_rows', None)
28 LSU_set = ["RF00002", "RF02540", "RF02541", "RF02543", "RF02546"] # From Rfam CLAN 00112 28 LSU_set = ["RF00002", "RF02540", "RF02541", "RF02543", "RF02546"] # From Rfam CLAN 00112
29 SSU_set = ["RF00177", "RF02542", "RF02545", "RF01959", "RF01960"] # From Rfam CLAN 00111 29 SSU_set = ["RF00177", "RF02542", "RF02545", "RF01959", "RF01960"] # From Rfam CLAN 00111
30 30
31 -with sqlite3.connect(os.getcwd()+"/results/RNANet.db") as conn: 31 +path_to_db = sys.argv[1] # The RNANet.db file
32 +runDir = os.path.dirname(os.path.realpath(path_to_db)) + '/../'
33 +
34 +with sqlite3.connect(path_to_db) as conn:
32 df = pd.read_sql("SELECT rfam_acc, max_len, nb_total_homol, comput_time, comput_peak_mem FROM family;", conn) 35 df = pd.read_sql("SELECT rfam_acc, max_len, nb_total_homol, comput_time, comput_peak_mem FROM family;", conn)
33 36
34 to_remove = [ f for f in df.rfam_acc if f in LSU_set+SSU_set ] 37 to_remove = [ f for f in df.rfam_acc if f in LSU_set+SSU_set ]
...@@ -74,7 +77,7 @@ ax.set_ylabel("Maximum length of sequences ") ...@@ -74,7 +77,7 @@ ax.set_ylabel("Maximum length of sequences ")
74 ax.set_zlabel("Computation time (s)") 77 ax.set_zlabel("Computation time (s)")
75 78
76 plt.subplots_adjust(wspace=0.4) 79 plt.subplots_adjust(wspace=0.4)
77 -plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png") 80 +plt.savefig(runDir+"/results/cmalign_jobs_performance.png")
78 81
79 # # ======================================================== 82 # # ========================================================
80 # # Linear Regression of max_mem as function of max_length 83 # # Linear Regression of max_mem as function of max_length
...@@ -110,7 +113,7 @@ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png") ...@@ -110,7 +113,7 @@ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png")
110 # plt.plot(x, coeffs[0] + np.exp(coeffs[1]*x), "-g", label="expo fit") 113 # plt.plot(x, coeffs[0] + np.exp(coeffs[1]*x), "-g", label="expo fit")
111 # plt.plot(x, coeffs_log[0] + coeffs_log[1]*np.log(x), "-b", label="log fit") 114 # plt.plot(x, coeffs_log[0] + coeffs_log[1]*np.log(x), "-b", label="log fit")
112 # plt.legend() 115 # plt.legend()
113 -# plt.savefig("results/regression/memory_linear_model.png") 116 +# plt.savefig(runDir + "/results/regression/memory_linear_model.png")
114 117
115 # # ======================================================== 118 # # ========================================================
116 # # Linear Regression of comp_time as function of n_chains 119 # # Linear Regression of comp_time as function of n_chains
...@@ -131,4 +134,4 @@ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png") ...@@ -131,4 +134,4 @@ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png")
131 # plt.ylabel("Computation time (s)") 134 # plt.ylabel("Computation time (s)")
132 # plt.plot(x, b0 + b1*x, "-r", label="linear fit") 135 # plt.plot(x, b0 + b1*x, "-r", label="linear fit")
133 # plt.legend() 136 # plt.legend()
134 -# plt.savefig("results/regression/comp_time_linear_model.png") 137 +# plt.savefig(runDir + "/results/regression/comp_time_linear_model.png")
......