warn(f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. Ignoring chain {self.chain_label}.",error=True)
warn(f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. Ignoring chain {self.chain_label}.")
no_nts_set.add(self.pdb_id)
self.delete_me=True
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
return1
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. Either there is a problem with {self.pdb_id} mmCIF download, or the bases are not resolved in the structure. Delete it and retry."
returnNone
# Remove low pertinence or undocumented descriptors, convert angles values
warn(f"Error while parsing DSSR {self.pdb_id}.json output:{e}",error=True)
self.delete_me=True
self.error_messages=f"Error while parsing DSSR's json output:\n{e}"
return1
returnNone
#############################################
# Solve some common issues and drop ligands
...
...
@@ -303,7 +303,7 @@ class Chain:
no_nts_set.add(self.pdb_id)
self.delete_me=True
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
return1
returnNone
# If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one
ifdf.iloc[0,0]!=1:
...
...
@@ -355,11 +355,11 @@ class Chain:
l=df.iloc[-1,1]-df.iloc[0,1]+1# update length of chain from nt_resnum point of view
exceptIndexError:
warn(f"Could not find real nucleotides of chain {self.pdb_chain_id} between {self.pdb_start} and "
f"{self.pdb_end} ({'not' if not self.inferred else ''} inferred). Ignoring chain {self.chain_label}.",error=True)
f"{self.pdb_end} ({'not ' if not self.inferred else ''}inferred). Ignoring chain {self.chain_label}.")
no_nts_set.add(self.pdb_id)
self.delete_me=True
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
return1
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. Either there is a problem with {self.pdb_id} mmCIF download, or the bases are not resolved in the structure. Delete it and retry."
returnNone
# Add eventual missing rows because of unsolved residues in the chain.
# Sometimes, the 3D structure is REALLY shorter than the family it's mapped to,
warn(f"{self.chain_label} sequence is too short, let's ignore it.\t",error=True)
self.delete_me=True
self.error_messages="Sequence is too short. (< 5 resolved nts)"
return1
return0
defremap(self,columns_to_save,s_seq):
"""Maps the object's sequence to its version in a MSA, to compute nucleotide frequencies at every position.
...
...
@@ -1355,13 +1357,13 @@ class Pipeline:
conn=sqlite3.connect(runDir+"/results/RNANet.db")
# Assert every structure is used
r=sql_ask_database(conn,"""SELECT DISTINCT pdb_id FROM structure WHERE pdb_id NOT IN (SELECT DISTINCT structure_id FROM chain WHERE issue = 0);""")
r=sql_ask_database(conn,"""SELECT DISTINCT pdb_id FROM structure WHERE pdb_id NOT IN (SELECT DISTINCT structure_id FROM chain);""")
iflen(r)andr[0][0]isnotNone:
warn("Structures without referenced chains have been detected.")
print(" ".join([x[0]forxinr]))
# Assert every chain is attached to a structure
r=sql_ask_database(conn,"""SELECT DISTINCT chain_id, structure_id FROM chain WHERE structure_id NOT IN (SELECT DISTINCT pdb_id FROM structure) AND issue = 0;""")
r=sql_ask_database(conn,"""SELECT DISTINCT chain_id, structure_id FROM chain WHERE structure_id NOT IN (SELECT DISTINCT pdb_id FROM structure);""")
iflen(r)andr[0][0]isnotNone:
warn("Chains without referenced structures have been detected")
print(" ".join([str(x[1])+'-'+str(x[0])forxinr]))
...
...
@@ -1371,11 +1373,16 @@ class Pipeline:
r=sql_ask_database(conn,"""SELECT COUNT(DISTINCT chain_id) AS Count, rfam_acc FROM chain
WHERE issue = 0 AND chain_id NOT IN (SELECT DISTINCT chain_id FROM re_mapping)
GROUP BY rfam_acc;""")
iflen(r)andr[0][0]isnotNone:
warn("Chains were not remapped:")
forxinr:
print(str(x[0])+" chains of family "+x[1])
try:
iflen(r)andr[0][0]isnotNone:
warn("Chains were not remapped:")
forxinr:
print(str(x[0])+" chains of family "+x[1])
exceptTypeErrorase:
print(r)
print(next(r))
print(e)
exit()
# # TODO : Optimize this (too slow)
# # check if some columns are missing in the remappings: