Showing
9 changed files
with
41 additions
and
33 deletions
... | @@ -10,9 +10,9 @@ __pycache__/ | ... | @@ -10,9 +10,9 @@ __pycache__/ |
10 | errors.txt | 10 | errors.txt |
11 | known_issues.txt | 11 | known_issues.txt |
12 | known_issues_reasons.txt | 12 | known_issues_reasons.txt |
13 | -kill_rnanet.sh | ||
14 | Dockerfile | 13 | Dockerfile |
15 | LICENSE | 14 | LICENSE |
16 | README.md | 15 | README.md |
17 | -automate.sh | ||
18 | -build_docker_image.sh | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
16 | +scripts/automate.sh | ||
17 | +scripts/kill_rnanet.sh | ||
18 | +scripts/build_docker_image.sh | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -23,7 +23,7 @@ RUN apk update && apk add --no-cache \ | ... | @@ -23,7 +23,7 @@ RUN apk update && apk add --no-cache \ |
23 | \ | 23 | \ |
24 | mkdir /3D && mkdir /sequences && \ | 24 | mkdir /3D && mkdir /sequences && \ |
25 | \ | 25 | \ |
26 | - mv /RNANet/x3dna-dssr /usr/local/bin/x3dna-dssr && chmod +x /usr/local/bin/x3dna-dssr && \ | 26 | + mv /RNANet/scripts/x3dna-dssr /usr/local/bin/x3dna-dssr && chmod +x /usr/local/bin/x3dna-dssr && \ |
27 | \ | 27 | \ |
28 | curl -SL http://eddylab.org/infernal/infernal-1.1.3.tar.gz | tar xz && cd infernal-1.1.3 && \ | 28 | curl -SL http://eddylab.org/infernal/infernal-1.1.3.tar.gz | tar xz && cd infernal-1.1.3 && \ |
29 | ./configure && make -j 16 && make install && cd easel && make install && cd / && \ | 29 | ./configure && make -j 16 && make install && cd easel && make install && cd / && \ | ... | ... |
... | @@ -86,7 +86,7 @@ $ docker image import rnanet_v1.2_docker.tar rnanet | ... | @@ -86,7 +86,7 @@ $ docker image import rnanet_v1.2_docker.tar rnanet |
86 | ``` | 86 | ``` |
87 | * Step 3 : Run the container, giving it 3 folders to mount as volumes: a first to store the 3D data, a second to store the sequence data and alignments, and a third to output the results, data and logs: | 87 | * Step 3 : Run the container, giving it 3 folders to mount as volumes: a first to store the 3D data, a second to store the sequence data and alignments, and a third to output the results, data and logs: |
88 | ``` | 88 | ``` |
89 | -$ docker run -v path/to/3D/data/folder:/3D -v path/to/sequence/data/folder:/sequences -v path/to/experiment/results/folder:/runDir rnanet [ - other options ] | 89 | +$ docker run --rm -v path/to/3D/data/folder:/3D -v path/to/sequence/data/folder:/sequences -v path/to/experiment/results/folder:/runDir rnanet [ - other options ] |
90 | ``` | 90 | ``` |
91 | 91 | ||
92 | The detailed list of options is below: | 92 | The detailed list of options is below: |
... | @@ -124,6 +124,11 @@ The detailed list of options is below: | ... | @@ -124,6 +124,11 @@ The detailed list of options is below: |
124 | ``` | 124 | ``` |
125 | You may not use the --3d-folder and --seq-folder options, they are set by default to the paths you provide with the -v options when running Docker. | 125 | You may not use the --3d-folder and --seq-folder options, they are set by default to the paths you provide with the -v options when running Docker. |
126 | 126 | ||
127 | +Typical usage: | ||
128 | +``` | ||
129 | +nohup bash -c 'time docker run --rm -v /path/to/3D/data/folder:/3D -v /path/to/sequence/data/folder:/sequences -v /path/to/experiment/folder:/runDir rnanet -s --no-logs ' & | ||
130 | +``` | ||
131 | + | ||
127 | ## Using classical command line installation | 132 | ## Using classical command line installation |
128 | 133 | ||
129 | You need to install the dependencies: | 134 | You need to install the dependencies: | ... | ... |
... | @@ -1546,35 +1546,33 @@ class Pipeline: | ... | @@ -1546,35 +1546,33 @@ class Pipeline: |
1546 | runDir + f"/data/{f}_counts.csv"]) | 1546 | runDir + f"/data/{f}_counts.csv"]) |
1547 | 1547 | ||
1548 | # Run statistics files | 1548 | # Run statistics files |
1549 | - subprocess.run(["python3.8", fileDir+"/regression.py"]) | 1549 | + subprocess.run(["python3.8", fileDir+"/scripts/regression.py", runDir + "/results/RNANet.db"]) |
1550 | subprocess.run(["python3.8", fileDir+"/statistics.py", "--3d-folder", path_to_3D_data, | 1550 | subprocess.run(["python3.8", fileDir+"/statistics.py", "--3d-folder", path_to_3D_data, |
1551 | "--seq-folder", path_to_seq_data, "-r", str(self.CRYSTAL_RES)]) | 1551 | "--seq-folder", path_to_seq_data, "-r", str(self.CRYSTAL_RES)]) |
1552 | 1552 | ||
1553 | # Save additional informations | 1553 | # Save additional informations |
1554 | + os.makedirs(runDir + "/archive", exist_ok=True) | ||
1554 | with sqlite3.connect(runDir+"/results/RNANet.db") as conn: | 1555 | with sqlite3.connect(runDir+"/results/RNANet.db") as conn: |
1555 | pd.read_sql_query("""SELECT rfam_acc, description, idty_percent, nb_homologs, nb_3d_chains, nb_total_homol, max_len, comput_time, comput_peak_mem | 1556 | pd.read_sql_query("""SELECT rfam_acc, description, idty_percent, nb_homologs, nb_3d_chains, nb_total_homol, max_len, comput_time, comput_peak_mem |
1556 | FROM family ORDER BY nb_3d_chains DESC;""", | 1557 | FROM family ORDER BY nb_3d_chains DESC;""", |
1557 | - conn).to_csv(runDir + f"/results/archive/families_{time_str}.csv", float_format="%.2f", index=False) | 1558 | + conn).to_csv(runDir + f"/archive/families_{time_str}.csv", float_format="%.2f", index=False) |
1558 | pd.read_sql_query("""SELECT eq_class, structure_id, chain_name, pdb_start, pdb_end, rfam_acc, inferred, date, exp_method, resolution, issue | 1559 | pd.read_sql_query("""SELECT eq_class, structure_id, chain_name, pdb_start, pdb_end, rfam_acc, inferred, date, exp_method, resolution, issue |
1559 | FROM structure | 1560 | FROM structure |
1560 | JOIN chain ON structure.pdb_id = chain.structure_id | 1561 | JOIN chain ON structure.pdb_id = chain.structure_id |
1561 | ORDER BY structure_id, chain_name, rfam_acc ASC;""", | 1562 | ORDER BY structure_id, chain_name, rfam_acc ASC;""", |
1562 | - conn).to_csv(runDir + f"/results/archive/summary_{time_str}.csv", float_format="%.2f", index=False) | 1563 | + conn).to_csv(runDir + f"/archive/summary_{time_str}.csv", float_format="%.2f", index=False) |
1563 | 1564 | ||
1564 | - # Archive the results | 1565 | + # Update shortcuts to latest versions |
1566 | + subprocess.run(["rm", "-f", runDir + "/results/RNANET_datapoints_latest.tar.gz", | ||
1567 | + runDir + "/results/summary_latest.csv", | ||
1568 | + runDir + "/results/families_latest.csv" | ||
1569 | + ]) | ||
1565 | if self.ARCHIVE: | 1570 | if self.ARCHIVE: |
1566 | - os.makedirs(runDir + "/results/archive", exist_ok=True) | ||
1567 | subprocess.run(["tar", "-C", path_to_3D_data + "/datapoints", "-czf", | 1571 | subprocess.run(["tar", "-C", path_to_3D_data + "/datapoints", "-czf", |
1568 | - runDir + f"/results/archive/RNANET_datapoints_{time_str}.tar.gz", "."]) | 1572 | + runDir + f"/archive/RNANET_datapoints_{time_str}.tar.gz", "."]) |
1569 | - | 1573 | + subprocess.run(['ln', "-s", runDir + f"/archive/RNANET_datapoints_{time_str}.tar.gz", runDir + "/results/RNANET_datapoints_latest.tar.gz"]) |
1570 | - # Update shortcuts to latest versions | 1574 | + subprocess.run(['ln', "-s", runDir + f"/archive/summary_{time_str}.csv", runDir + "/results/summary_latest.csv"]) |
1571 | - subprocess.run(["rm", "-f", runDir + "/results/RNANET_datapoints_latest.tar.gz", | 1575 | + subprocess.run(['ln', "-s", runDir + f"/archive/families_{time_str}.csv", runDir + "/results/families_latest.csv"]) |
1572 | - runDir + "/results/summary_latest.csv", | ||
1573 | - runDir + "/results/families_latest.csv" | ||
1574 | - ]) | ||
1575 | - subprocess.run(['ln', "-s", runDir + f"/results/archive/RNANET_datapoints_{time_str}.tar.gz", runDir + "/results/RNANET_datapoints_latest.tar.gz"]) | ||
1576 | - subprocess.run(['ln', "-s", runDir + f"/results/archive/summary_{time_str}.csv", runDir + "/results/summary_latest.csv"]) | ||
1577 | - subprocess.run(['ln', "-s", runDir + f"/results/archive/families_{time_str}.csv", runDir + "/results/families_latest.csv"]) | ||
1578 | 1576 | ||
1579 | def sanitize_database(self): | 1577 | def sanitize_database(self): |
1580 | """Searches for issues in the database and correct them""" | 1578 | """Searches for issues in the database and correct them""" | ... | ... |
File moved
... | @@ -15,7 +15,7 @@ cp `which x3dna-dssr` $THISDIR | ... | @@ -15,7 +15,7 @@ cp `which x3dna-dssr` $THISDIR |
15 | 15 | ||
16 | ######################################################## Build Docker image ###################################################### | 16 | ######################################################## Build Docker image ###################################################### |
17 | # Execute the Dockerfile and build the image | 17 | # Execute the Dockerfile and build the image |
18 | -docker build -t persalteas/rnanet . | 18 | +docker build -t persalteas/rnanet .. |
19 | 19 | ||
20 | ############################################################## Cleaning ########################################################## | 20 | ############################################################## Cleaning ########################################################## |
21 | rm x3dna-dssr | 21 | rm x3dna-dssr | ... | ... |
File moved
... | @@ -7,15 +7,18 @@ problems = [ | ... | @@ -7,15 +7,18 @@ problems = [ |
7 | "1k73_1_B" | 7 | "1k73_1_B" |
8 | ] | 8 | ] |
9 | 9 | ||
10 | +# provide the path to your data folders, the RNANet.db file, and the RNANet.py file as arguments to this script | ||
10 | path_to_3D_data = sys.argv[1] | 11 | path_to_3D_data = sys.argv[1] |
11 | path_to_seq_data = sys.argv[2] | 12 | path_to_seq_data = sys.argv[2] |
13 | +path_to_db = sys.argv[3] | ||
14 | +path_to_RNANet = sys.argv[4] | ||
12 | 15 | ||
13 | for p in problems: | 16 | for p in problems: |
14 | print() | 17 | print() |
15 | print() | 18 | print() |
16 | print() | 19 | print() |
17 | print() | 20 | print() |
18 | - homology = ('-' in p) | 21 | + homology = ('-' in p) # H4cky l1f3 |
19 | 22 | ||
20 | # Remove the datapoints files and 3D files | 23 | # Remove the datapoints files and 3D files |
21 | subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_mapped_to_Rfam/{p}.cif"]) | 24 | subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_mapped_to_Rfam/{p}.cif"]) |
... | @@ -31,21 +34,20 @@ for p in problems: | ... | @@ -31,21 +34,20 @@ for p in problems: |
31 | 34 | ||
32 | # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys | 35 | # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys |
33 | for fam in families: | 36 | for fam in families: |
34 | - command = ["sqlite3", "results/RNANet.db", f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc=\"{fam}\";"] | 37 | + command = ["sqlite3", path_to_db, f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc=\"{fam}\";"] |
35 | print(' '.join(command)) | 38 | print(' '.join(command)) |
36 | subprocess.run(command) | 39 | subprocess.run(command) |
37 | 40 | ||
38 | - command = ["python3.8", "RNAnet.py", "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--extract", "--only", p] | 41 | + command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--extract", "--only", p] |
39 | else: | 42 | else: |
40 | # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys | 43 | # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys |
41 | - command = ["sqlite3", "results/RNANet.db", f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc is null;"] | 44 | + command = ["sqlite3", path_to_db, f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc is null;"] |
42 | print(' '.join(command)) | 45 | print(' '.join(command)) |
43 | subprocess.run(command) | 46 | subprocess.run(command) |
44 | 47 | ||
45 | - command = ["python3.8", "RNAnet.py", "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--no-homology", "--extract", "--only", p] | 48 | + command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--no-homology", "--extract", "--only", p] |
46 | 49 | ||
47 | # Re-run RNANet | 50 | # Re-run RNANet |
51 | + os.chdir(os.path.dirname(os.path.realpath(path_to_db)) + '/../') | ||
48 | print('\n',' '.join(command),'\n') | 52 | print('\n',' '.join(command),'\n') |
49 | subprocess.run(command) | 53 | subprocess.run(command) |
50 | - | ||
51 | -# run statistics | ... | ... |
... | @@ -20,7 +20,7 @@ | ... | @@ -20,7 +20,7 @@ |
20 | import matplotlib.pyplot as plt | 20 | import matplotlib.pyplot as plt |
21 | import pandas as pd | 21 | import pandas as pd |
22 | import numpy as np | 22 | import numpy as np |
23 | -import scipy, os, sqlite3 | 23 | +import scipy, os, sqlite3, sys |
24 | # from sklearn.linear_model import LinearRegression | 24 | # from sklearn.linear_model import LinearRegression |
25 | from mpl_toolkits.mplot3d import Axes3D | 25 | from mpl_toolkits.mplot3d import Axes3D |
26 | pd.set_option('display.max_rows', None) | 26 | pd.set_option('display.max_rows', None) |
... | @@ -28,7 +28,10 @@ pd.set_option('display.max_rows', None) | ... | @@ -28,7 +28,10 @@ pd.set_option('display.max_rows', None) |
28 | LSU_set = ["RF00002", "RF02540", "RF02541", "RF02543", "RF02546"] # From Rfam CLAN 00112 | 28 | LSU_set = ["RF00002", "RF02540", "RF02541", "RF02543", "RF02546"] # From Rfam CLAN 00112 |
29 | SSU_set = ["RF00177", "RF02542", "RF02545", "RF01959", "RF01960"] # From Rfam CLAN 00111 | 29 | SSU_set = ["RF00177", "RF02542", "RF02545", "RF01959", "RF01960"] # From Rfam CLAN 00111 |
30 | 30 | ||
31 | -with sqlite3.connect(os.getcwd()+"/results/RNANet.db") as conn: | 31 | +path_to_db = sys.argv[1] # The RNANet.db file |
32 | +runDir = os.path.dirname(os.path.realpath(path_to_db)) + '/../' | ||
33 | + | ||
34 | +with sqlite3.connect(path_to_db) as conn: | ||
32 | df = pd.read_sql("SELECT rfam_acc, max_len, nb_total_homol, comput_time, comput_peak_mem FROM family;", conn) | 35 | df = pd.read_sql("SELECT rfam_acc, max_len, nb_total_homol, comput_time, comput_peak_mem FROM family;", conn) |
33 | 36 | ||
34 | to_remove = [ f for f in df.rfam_acc if f in LSU_set+SSU_set ] | 37 | to_remove = [ f for f in df.rfam_acc if f in LSU_set+SSU_set ] |
... | @@ -74,7 +77,7 @@ ax.set_ylabel("Maximum length of sequences ") | ... | @@ -74,7 +77,7 @@ ax.set_ylabel("Maximum length of sequences ") |
74 | ax.set_zlabel("Computation time (s)") | 77 | ax.set_zlabel("Computation time (s)") |
75 | 78 | ||
76 | plt.subplots_adjust(wspace=0.4) | 79 | plt.subplots_adjust(wspace=0.4) |
77 | -plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png") | 80 | +plt.savefig(runDir+"/results/cmalign_jobs_performance.png") |
78 | 81 | ||
79 | # # ======================================================== | 82 | # # ======================================================== |
80 | # # Linear Regression of max_mem as function of max_length | 83 | # # Linear Regression of max_mem as function of max_length |
... | @@ -110,7 +113,7 @@ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png") | ... | @@ -110,7 +113,7 @@ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png") |
110 | # plt.plot(x, coeffs[0] + np.exp(coeffs[1]*x), "-g", label="expo fit") | 113 | # plt.plot(x, coeffs[0] + np.exp(coeffs[1]*x), "-g", label="expo fit") |
111 | # plt.plot(x, coeffs_log[0] + coeffs_log[1]*np.log(x), "-b", label="log fit") | 114 | # plt.plot(x, coeffs_log[0] + coeffs_log[1]*np.log(x), "-b", label="log fit") |
112 | # plt.legend() | 115 | # plt.legend() |
113 | -# plt.savefig("results/regression/memory_linear_model.png") | 116 | +# plt.savefig(runDir + "/results/regression/memory_linear_model.png") |
114 | 117 | ||
115 | # # ======================================================== | 118 | # # ======================================================== |
116 | # # Linear Regression of comp_time as function of n_chains | 119 | # # Linear Regression of comp_time as function of n_chains |
... | @@ -131,4 +134,4 @@ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png") | ... | @@ -131,4 +134,4 @@ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png") |
131 | # plt.ylabel("Computation time (s)") | 134 | # plt.ylabel("Computation time (s)") |
132 | # plt.plot(x, b0 + b1*x, "-r", label="linear fit") | 135 | # plt.plot(x, b0 + b1*x, "-r", label="linear fit") |
133 | # plt.legend() | 136 | # plt.legend() |
134 | -# plt.savefig("results/regression/comp_time_linear_model.png") | 137 | +# plt.savefig(runDir + "/results/regression/comp_time_linear_model.png") | ... | ... |
-
Please register or login to post a comment