df=df[df.chain_name==self.pdb_chain_id]# keeping only this chain's nucleotides
ifdf.empty:
warn(f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. Ignoring chain {self.chain_label}.",error=True)
self.delete_me=True
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
return1
# remove low pertinence or undocumented descriptors
warn(f"Error while parsing DSSR's {self.chain_label} json output:{e}",error=True)
warn(f"Error while parsing DSSR {self.pdb_id}.json output:{e}",error=True)
self.delete_me=True
self.error_messages=f"Error while parsing DSSR's json output:\n{e}"
return1
...
...
@@ -265,9 +270,9 @@ class Chain:
try:
l=df.iloc[-1,1]-df.iloc[0,1]+1# length of chain from nt_resnum point of view
exceptIndexError:
warn(f"Error while parsing DSSR's annotation: No nucleotides are part of {self.chain_label}!",error=True)
warn(f"Could not find real nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. Ignoring chain {self.chain_label}.",error=True)
self.delete_me=True
self.error_messages=f"Error while parsing DSSR's json output: No nucleotides from {self.chain_label}. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
self.error_messages=f"Could not find nucleotides of chain {self.pdb_chain_id} in annotation {self.pdb_id}.json. We expect a problem with {self.pdb_id} mmCIF download. Delete it and retry."
return1
# If, for some reason, index_chain does not start at one (e.g. 6boh, chain GB), make it start at one
...
...
@@ -846,7 +851,7 @@ class Pipeline:
"\n\t\t\t\tAllows to yield more 3D data (consider chains without a Rfam mapping).")
print()
print("--ignore-issues\t\t\tDo not ignore already known issues and attempt to compute them")
print("--update-homologous\t\tRe-download Rfam sequences and SILVA arb databases, and realign all families")
print("--update-homologous\t\tRe-download Rfam and SILVA databases, realign all families, and recompute all CSV files")
print("--from-scratch\t\t\tDelete database, local 3D and sequence files, and known issues, and recompute.")
print()
print("Typical usage:")
...
...
@@ -893,6 +898,9 @@ class Pipeline:
runDir+"/known_issues_reasons.txt",
runDir+"/results/RNANet.db"])
elifopt=="--update-homologous":
if"tobedefinedbyoptions"==path_to_seq_data:
warn("Please provide --seq-folder before --update-homologous in the list of options.",error=True)
exit(1)
warn("Deleting previous sequence files and recomputing alignments.")
subprocess.run(["rm","-rf",
path_to_seq_data+"realigned",
...
...
@@ -1136,7 +1144,7 @@ class Pipeline:
WHERE rfam_acc = ?;""",many=True,data=data)
defremap(self):
"""Compute nucleotide frequencies of the previous alignments and save them in the database
"""Compute nucleotide frequencies of some alignments and save them in the database