Louis BECQUEY

new debug command line options

Showing 1 changed file with 17 additions and 4 deletions
...@@ -887,6 +887,7 @@ class Pipeline: ...@@ -887,6 +887,7 @@ class Pipeline:
887 self.RUN_STATS = False 887 self.RUN_STATS = False
888 self.EXTRACT_CHAINS = False 888 self.EXTRACT_CHAINS = False
889 self.REUSE_ALL = False 889 self.REUSE_ALL = False
890 + self.SELECT_ONLY = None
890 891
891 def process_options(self): 892 def process_options(self):
892 """Sets the paths and options of the pipeline""" 893 """Sets the paths and options of the pipeline"""
...@@ -897,7 +898,7 @@ class Pipeline: ...@@ -897,7 +898,7 @@ class Pipeline:
897 opts, _ = getopt.getopt( sys.argv[1:], "r:hs", 898 opts, _ = getopt.getopt( sys.argv[1:], "r:hs",
898 [ "help", "resolution=", "keep-hetatm=", "from-scratch", 899 [ "help", "resolution=", "keep-hetatm=", "from-scratch",
899 "fill-gaps=", "3d-folder=", "seq-folder=", 900 "fill-gaps=", "3d-folder=", "seq-folder=",
900 - "no-homology", "ignore-issues", "extract", 901 + "no-homology", "ignore-issues", "extract", "only=", "all",
901 "update-homologous" ]) 902 "update-homologous" ])
902 except getopt.GetoptError as err: 903 except getopt.GetoptError as err:
903 print(err) 904 print(err)
...@@ -934,6 +935,8 @@ class Pipeline: ...@@ -934,6 +935,8 @@ class Pipeline:
934 print("--no-homology\t\t\tDo not try to compute PSSMs and do not align sequences." 935 print("--no-homology\t\t\tDo not try to compute PSSMs and do not align sequences."
935 "\n\t\t\t\tAllows to yield more 3D data (consider chains without a Rfam mapping).") 936 "\n\t\t\t\tAllows to yield more 3D data (consider chains without a Rfam mapping).")
936 print() 937 print()
938 + print("--all\t\t\t\tBuild chains even if they already are in the database.")
939 + print("--only\t\t\t\tAsk to process a specific chain label only")
937 print("--ignore-issues\t\t\tDo not ignore already known issues and attempt to compute them") 940 print("--ignore-issues\t\t\tDo not ignore already known issues and attempt to compute them")
938 print("--update-homologous\t\tRe-download Rfam and SILVA databases, realign all families, and recompute all CSV files") 941 print("--update-homologous\t\tRe-download Rfam and SILVA databases, realign all families, and recompute all CSV files")
939 print("--from-scratch\t\t\tDelete database, local 3D and sequence files, and known issues, and recompute.") 942 print("--from-scratch\t\t\tDelete database, local 3D and sequence files, and known issues, and recompute.")
...@@ -969,6 +972,10 @@ class Pipeline: ...@@ -969,6 +972,10 @@ class Pipeline:
969 print("> Storing sequences into", path_to_seq_data) 972 print("> Storing sequences into", path_to_seq_data)
970 elif opt == "--ignore-issues": 973 elif opt == "--ignore-issues":
971 self.USE_KNOWN_ISSUES = False 974 self.USE_KNOWN_ISSUES = False
975 + elif opt == "--only":
976 + self.USE_KNOWN_ISSUES = False
977 + self.REUSE_ALL = True
978 + self.SELECT_ONLY = arg
972 elif opt == "--from-scratch": 979 elif opt == "--from-scratch":
973 warn("Deleting previous database and recomputing from scratch.") 980 warn("Deleting previous database and recomputing from scratch.")
974 subprocess.run(["rm", "-rf", 981 subprocess.run(["rm", "-rf",
...@@ -987,6 +994,9 @@ class Pipeline: ...@@ -987,6 +994,9 @@ class Pipeline:
987 path_to_seq_data + "realigned", 994 path_to_seq_data + "realigned",
988 path_to_seq_data + "rfam_sequences"]) 995 path_to_seq_data + "rfam_sequences"])
989 self.REUSE_ALL = True 996 self.REUSE_ALL = True
997 + elif opt == "--all":
998 + self.REUSE_ALL = True
999 + self.USE_KNOWN_ISSUES = False
990 elif opt == "--extract": 1000 elif opt == "--extract":
991 self.EXTRACT_CHAINS = True 1001 self.EXTRACT_CHAINS = True
992 1002
...@@ -1054,6 +1064,9 @@ class Pipeline: ...@@ -1054,6 +1064,9 @@ class Pipeline:
1054 self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label)) 1064 self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label))
1055 conn.close() 1065 conn.close()
1056 1066
1067 + if self.SELECT_ONLY is not None:
1068 + self.update = [ c for c in self.update if c.chain_label == self.SELECT_ONLY ]
1069 +
1057 self.n_chains = len(self.update) 1070 self.n_chains = len(self.update)
1058 print(str(self.n_chains) + " RNA chains of interest.") 1071 print(str(self.n_chains) + " RNA chains of interest.")
1059 1072
...@@ -1325,13 +1338,13 @@ class Pipeline: ...@@ -1325,13 +1338,13 @@ class Pipeline:
1325 conn = sqlite3.connect(runDir + "/results/RNANet.db") 1338 conn = sqlite3.connect(runDir + "/results/RNANet.db")
1326 1339
1327 # Assert every structure is used 1340 # Assert every structure is used
1328 - r = sql_ask_database(conn, """SELECT DISTINCT pdb_id FROM structure WHERE pdb_id NOT IN (SELECT DISTINCT structure_id FROM chain);""") 1341 + r = sql_ask_database(conn, """SELECT DISTINCT pdb_id FROM structure WHERE pdb_id NOT IN (SELECT DISTINCT structure_id FROM chain WHERE issue = 0);""")
1329 if len(r) and r[0][0] is not None: 1342 if len(r) and r[0][0] is not None:
1330 - warn("Structures without referenced chains have been detected. This happens if we have known issues, for example.") 1343 + warn("Structures without referenced chains have been detected.")
1331 print(" ".join([x[0] for x in r])) 1344 print(" ".join([x[0] for x in r]))
1332 1345
1333 # Assert every chain is attached to a structure 1346 # Assert every chain is attached to a structure
1334 - r = sql_ask_database(conn, """SELECT DISTINCT chain_id, structure_id FROM chain WHERE structure_id NOT IN (SELECT DISTINCT pdb_id FROM structure);""") 1347 + r = sql_ask_database(conn, """SELECT DISTINCT chain_id, structure_id FROM chain WHERE structure_id NOT IN (SELECT DISTINCT pdb_id FROM structure) AND issue = 0;""")
1335 if len(r) and r[0][0] is not None: 1348 if len(r) and r[0][0] is not None:
1336 warn("Chains without referenced structures have been detected") 1349 warn("Chains without referenced structures have been detected")
1337 print(" ".join([str(x[1])+'-'+str(x[0]) for x in r])) 1350 print(" ".join([str(x[1])+'-'+str(x[0]) for x in r]))
......