Louis BECQUEY

better errors with 20A structures

Showing 1 changed file with 24 additions and 21 deletions
...@@ -252,14 +252,23 @@ class Chain: ...@@ -252,14 +252,23 @@ class Chain:
252 while True in df.duplicated(['nt_resnum']).values: 252 while True in df.duplicated(['nt_resnum']).values:
253 i = df.duplicated(['nt_resnum']).values.tolist().index(True) 253 i = df.duplicated(['nt_resnum']).values.tolist().index(True)
254 df.iloc[i:, 1] += 1 254 df.iloc[i:, 1] += 1
255 - 255 +
256 # Drop ligands detected as residues by DSSR, by detecting several markers 256 # Drop ligands detected as residues by DSSR, by detecting several markers
257 df = df.drop_duplicates("index_chain", keep="first") # drop doublons in index_chain 257 df = df.drop_duplicates("index_chain", keep="first") # drop doublons in index_chain
258 - while (df.iloc[[-1]].nt_name.tolist()[0] not in ["A", "C", "G", "U"] and 258 + while (len(df.index_chain) and df.iloc[[-1]].nt_name.tolist()[0] not in ["A", "C", "G", "U"] and
259 - (df.iloc[[-1]][["alpha", "beta", "gamma", "delta", "epsilon", "zeta", "v0", "v1", "v2", "v3", "v4"]].isna().values).all() 259 + ((df.iloc[[-1]][["alpha", "beta", "gamma", "delta", "epsilon", "zeta", "v0", "v1", "v2", "v3", "v4"]].isna().values).all()
260 - or (df.iloc[[-1]].puckering=='').any()): 260 + or (df.iloc[[-1]].puckering=='').any())):
261 df = df.head(-1) 261 df = df.head(-1)
262 262
263 + # Assert some nucleotides exist
264 + try:
265 + l = df.iloc[-1,1] - df.iloc[0,1] + 1 # length of chain from nt_resnum point of view
266 + except IndexError:
267 + warn(f"Error while parsing DSSR's annotation: No nucleotides are part of {self.chain_label}!", error=True)
268 + self.delete_me = True
269 + self.error_messages = f"Error while parsing DSSR's json output: No nucleotides from {self.chain_label}. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
270 + return 1
271 +
263 # If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one 272 # If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one
264 if df.iloc[0,0] != 1: 273 if df.iloc[0,0] != 1:
265 st = df.iloc[0,0] -1 274 st = df.iloc[0,0] -1
...@@ -292,13 +301,6 @@ class Chain: ...@@ -292,13 +301,6 @@ class Chain:
292 # index_chain 1 |-------------|77 83|------------| 149 301 # index_chain 1 |-------------|77 83|------------| 149
293 # expected data point 1 |--------------------------------| 154 302 # expected data point 1 |--------------------------------| 154
294 # 303 #
295 - try:
296 - l = df.iloc[-1,1] - df.iloc[0,1] + 1 # length of chain from nt_resnum point of view
297 - except IndexError:
298 - warn(f"Error while parsing DSSR's annotation: No nucleotides are part of {self.chain_label}!", error=True)
299 - self.delete_me = True
300 - self.error_messages = f"Error while parsing DSSR's json output: No nucleotides from {self.chain_label}. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
301 - return 1
302 304
303 if l != len(df['index_chain']): # if some residues are missing, len(df['index_chain']) < l 305 if l != len(df['index_chain']): # if some residues are missing, len(df['index_chain']) < l
304 resnum_start = df.iloc[0,1] 306 resnum_start = df.iloc[0,1]
...@@ -851,7 +853,7 @@ class Pipeline: ...@@ -851,7 +853,7 @@ class Pipeline:
851 print("RNANet 1.0 alpha ") 853 print("RNANet 1.0 alpha ")
852 sys.exit() 854 sys.exit()
853 elif opt == "-r" or opt == "--resolution": 855 elif opt == "-r" or opt == "--resolution":
854 - assert float(arg) > 0.0 and float(arg) < 20.0 856 + assert float(arg) > 0.0 and float(arg) <= 20.0
855 self.CRYSTAL_RES = float(arg) 857 self.CRYSTAL_RES = float(arg)
856 elif opt == "-s": 858 elif opt == "-s":
857 self.RUN_STATS = True 859 self.RUN_STATS = True
...@@ -1770,14 +1772,15 @@ def work_build_chain(c, extract, khetatm, retrying=False): ...@@ -1770,14 +1772,15 @@ def work_build_chain(c, extract, khetatm, retrying=False):
1770 c.extract_3D_data() 1772 c.extract_3D_data()
1771 1773
1772 # Small check 1774 # Small check
1773 - with sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0) as conn: 1775 + if not c.delete_me:
1774 - nnts = sql_ask_database(conn, f"SELECT COUNT(nt_id) FROM nucleotide WHERE chain_id={c.db_chain_id};", warn_every=10)[0][0] 1776 + with sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0) as conn:
1775 - if not(nnts): 1777 + nnts = sql_ask_database(conn, f"SELECT COUNT(nt_id) FROM nucleotide WHERE chain_id={c.db_chain_id};", warn_every=10)[0][0]
1776 - warn(f"Nucleotides not inserted: {c.error_messages}") 1778 + if not(nnts):
1777 - c.delete_me = True 1779 + warn(f"Nucleotides not inserted: {c.error_messages}")
1778 - c.error_messages = "Nucleotides not inserted !" 1780 + c.delete_me = True
1779 - else: 1781 + c.error_messages = "Nucleotides not inserted !"
1780 - notify(f"Inserted {nnts} nucleotides to chain {c.chain_label}") 1782 + else:
1783 + notify(f"Inserted {nnts} nucleotides to chain {c.chain_label}")
1781 1784
1782 # extract the portion we want 1785 # extract the portion we want
1783 if extract and not c.delete_me: 1786 if extract and not c.delete_me:
...@@ -2095,7 +2098,7 @@ if __name__ == "__main__": ...@@ -2095,7 +2098,7 @@ if __name__ == "__main__":
2095 2098
2096 # compute an update compared to what is in the table "chain" 2099 # compute an update compared to what is in the table "chain"
2097 #DEBUG: list everything 2100 #DEBUG: list everything
2098 - # pp.REUSE_ALL = True 2101 + pp.REUSE_ALL = True
2099 pp.list_available_mappings() 2102 pp.list_available_mappings()
2100 2103
2101 # =========================================================================== 2104 # ===========================================================================
......