Showing
1 changed file
with
17 additions
and
4 deletions
... | @@ -887,6 +887,7 @@ class Pipeline: | ... | @@ -887,6 +887,7 @@ class Pipeline: |
887 | self.RUN_STATS = False | 887 | self.RUN_STATS = False |
888 | self.EXTRACT_CHAINS = False | 888 | self.EXTRACT_CHAINS = False |
889 | self.REUSE_ALL = False | 889 | self.REUSE_ALL = False |
890 | + self.SELECT_ONLY = None | ||
890 | 891 | ||
891 | def process_options(self): | 892 | def process_options(self): |
892 | """Sets the paths and options of the pipeline""" | 893 | """Sets the paths and options of the pipeline""" |
... | @@ -897,7 +898,7 @@ class Pipeline: | ... | @@ -897,7 +898,7 @@ class Pipeline: |
897 | opts, _ = getopt.getopt( sys.argv[1:], "r:hs", | 898 | opts, _ = getopt.getopt( sys.argv[1:], "r:hs", |
898 | [ "help", "resolution=", "keep-hetatm=", "from-scratch", | 899 | [ "help", "resolution=", "keep-hetatm=", "from-scratch", |
899 | "fill-gaps=", "3d-folder=", "seq-folder=", | 900 | "fill-gaps=", "3d-folder=", "seq-folder=", |
900 | - "no-homology", "ignore-issues", "extract", | 901 | + "no-homology", "ignore-issues", "extract", "only=", "all", |
901 | "update-homologous" ]) | 902 | "update-homologous" ]) |
902 | except getopt.GetoptError as err: | 903 | except getopt.GetoptError as err: |
903 | print(err) | 904 | print(err) |
... | @@ -934,6 +935,8 @@ class Pipeline: | ... | @@ -934,6 +935,8 @@ class Pipeline: |
934 | print("--no-homology\t\t\tDo not try to compute PSSMs and do not align sequences." | 935 | print("--no-homology\t\t\tDo not try to compute PSSMs and do not align sequences." |
935 | "\n\t\t\t\tAllows to yield more 3D data (consider chains without a Rfam mapping).") | 936 | "\n\t\t\t\tAllows to yield more 3D data (consider chains without a Rfam mapping).") |
936 | print() | 937 | print() |
938 | + print("--all\t\t\t\tBuild chains even if they already are in the database.") | ||
939 | + print("--only\t\t\t\tAsk to process a specific chain label only") | ||
937 | print("--ignore-issues\t\t\tDo not ignore already known issues and attempt to compute them") | 940 | print("--ignore-issues\t\t\tDo not ignore already known issues and attempt to compute them") |
938 | print("--update-homologous\t\tRe-download Rfam and SILVA databases, realign all families, and recompute all CSV files") | 941 | print("--update-homologous\t\tRe-download Rfam and SILVA databases, realign all families, and recompute all CSV files") |
939 | print("--from-scratch\t\t\tDelete database, local 3D and sequence files, and known issues, and recompute.") | 942 | print("--from-scratch\t\t\tDelete database, local 3D and sequence files, and known issues, and recompute.") |
... | @@ -969,6 +972,10 @@ class Pipeline: | ... | @@ -969,6 +972,10 @@ class Pipeline: |
969 | print("> Storing sequences into", path_to_seq_data) | 972 | print("> Storing sequences into", path_to_seq_data) |
970 | elif opt == "--ignore-issues": | 973 | elif opt == "--ignore-issues": |
971 | self.USE_KNOWN_ISSUES = False | 974 | self.USE_KNOWN_ISSUES = False |
975 | + elif opt == "--only": | ||
976 | + self.USE_KNOWN_ISSUES = False | ||
977 | + self.REUSE_ALL = True | ||
978 | + self.SELECT_ONLY = arg | ||
972 | elif opt == "--from-scratch": | 979 | elif opt == "--from-scratch": |
973 | warn("Deleting previous database and recomputing from scratch.") | 980 | warn("Deleting previous database and recomputing from scratch.") |
974 | subprocess.run(["rm", "-rf", | 981 | subprocess.run(["rm", "-rf", |
... | @@ -987,6 +994,9 @@ class Pipeline: | ... | @@ -987,6 +994,9 @@ class Pipeline: |
987 | path_to_seq_data + "realigned", | 994 | path_to_seq_data + "realigned", |
988 | path_to_seq_data + "rfam_sequences"]) | 995 | path_to_seq_data + "rfam_sequences"]) |
989 | self.REUSE_ALL = True | 996 | self.REUSE_ALL = True |
997 | + elif opt == "--all": | ||
998 | + self.REUSE_ALL = True | ||
999 | + self.USE_KNOWN_ISSUES = False | ||
990 | elif opt == "--extract": | 1000 | elif opt == "--extract": |
991 | self.EXTRACT_CHAINS = True | 1001 | self.EXTRACT_CHAINS = True |
992 | 1002 | ||
... | @@ -1054,6 +1064,9 @@ class Pipeline: | ... | @@ -1054,6 +1064,9 @@ class Pipeline: |
1054 | self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label)) | 1064 | self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label)) |
1055 | conn.close() | 1065 | conn.close() |
1056 | 1066 | ||
1067 | + if self.SELECT_ONLY is not None: | ||
1068 | + self.update = [ c for c in self.update if c.chain_label == self.SELECT_ONLY ] | ||
1069 | + | ||
1057 | self.n_chains = len(self.update) | 1070 | self.n_chains = len(self.update) |
1058 | print(str(self.n_chains) + " RNA chains of interest.") | 1071 | print(str(self.n_chains) + " RNA chains of interest.") |
1059 | 1072 | ||
... | @@ -1325,13 +1338,13 @@ class Pipeline: | ... | @@ -1325,13 +1338,13 @@ class Pipeline: |
1325 | conn = sqlite3.connect(runDir + "/results/RNANet.db") | 1338 | conn = sqlite3.connect(runDir + "/results/RNANet.db") |
1326 | 1339 | ||
1327 | # Assert every structure is used | 1340 | # Assert every structure is used |
1328 | - r = sql_ask_database(conn, """SELECT DISTINCT pdb_id FROM structure WHERE pdb_id NOT IN (SELECT DISTINCT structure_id FROM chain);""") | 1341 | + r = sql_ask_database(conn, """SELECT DISTINCT pdb_id FROM structure WHERE pdb_id NOT IN (SELECT DISTINCT structure_id FROM chain WHERE issue = 0);""") |
1329 | if len(r) and r[0][0] is not None: | 1342 | if len(r) and r[0][0] is not None: |
1330 | - warn("Structures without referenced chains have been detected. This happens if we have known issues, for example.") | 1343 | + warn("Structures without referenced chains have been detected.") |
1331 | print(" ".join([x[0] for x in r])) | 1344 | print(" ".join([x[0] for x in r])) |
1332 | 1345 | ||
1333 | # Assert every chain is attached to a structure | 1346 | # Assert every chain is attached to a structure |
1334 | - r = sql_ask_database(conn, """SELECT DISTINCT chain_id, structure_id FROM chain WHERE structure_id NOT IN (SELECT DISTINCT pdb_id FROM structure);""") | 1347 | + r = sql_ask_database(conn, """SELECT DISTINCT chain_id, structure_id FROM chain WHERE structure_id NOT IN (SELECT DISTINCT pdb_id FROM structure) AND issue = 0;""") |
1335 | if len(r) and r[0][0] is not None: | 1348 | if len(r) and r[0][0] is not None: |
1336 | warn("Chains without referenced structures have been detected") | 1349 | warn("Chains without referenced structures have been detected") |
1337 | print(" ".join([str(x[1])+'-'+str(x[0]) for x in r])) | 1350 | print(" ".join([str(x[1])+'-'+str(x[0]) for x in r])) | ... | ... |
-
Please register or login to post a comment