Merge branch 'master' into stage_aglae

Aglaé TABOT
Commit ad0e234c8b100878728b92c6249f99fc271abf47 ad0e234c 2 parents 9dedcd5b 60f0af5a
Showing 18 changed files with 1027 additions and 1606 deletions
.dockerignore
CHANGELOG
LICENSE
README.md
RNAnet.py
doc/Errors.md
doc/FAQ.md
doc/INSTALL.md
doc/KnownIssues.md
geometric_stats.py
known_issues.txt
known_issues_reasons.txt
scripts/automate.sh
scripts/automate_from_scratch.sh
scripts/build_docker_image.sh
scripts/recompute_family.py
scripts/recompute_some_chains.py
statistics.py
--- a/.dockerignore
View file @ad0e234
+++ b/.dockerignore
View file @ad0e234
@@ -23,3 +23,5 @@ scripts/*.sh
 scripts/*.tar
 scripts/measure.py
 scripts/recompute_some_chains.py
+ scripts/convert_rna_jsons.py
+ scripts/recompute_family.py
--- a/CHANGELOG
View file @ad0e234
+++ b/CHANGELOG
View file @ad0e234
 ############################################################################################
+ v 1.6 beta, August 2021
+ 
+ Aglaé Tabot joins the development team. Khodor Hannoush leaves.
+ 
+ FEATURE CHANGES
+     - Distinct options --cmalign-opts and --cmalign-rrna-opts allow to adapt the parameters for LSU and SSU families.
+       The LSU and SSU are now aligned with Infernal options '--cpu 10 --mxsize 8192 --mxtau 0.1', which is slow, 
+       requires up to 100 GB of RAM, and yields a suboptimal alignment (tau=0.1 is quite bad), but is homogenous with the other families.
+     - The LSU and SSU therefore have defined cm_coords fields, and therefore distance matrices can be computed.
+     - Distances matrices are computed on all availables molecules of the family by default, but you can use statistics.py --non-redundant to only
+       select the equivalence class representatives at a given resolution into account (new option). For storage reasons, rRNAs are always run in 
+       this mode (but this might change in the future : space required is 'only' ~300 GB).
+     - We now provide for download the renumbered (standardised) 3D MMCIF files, the nucleotides being numbered by their "index_chain" in the database.
+     - We now provide for download the sequences of the 3D chains aligned by Rfam family (without Rfam sequences, which have been removed).
+     - statistics.py now computes histograms and a density estimation with Gaussian mixture models for a large set of geometric parameters, 
+       measured on the unmapped data at a given resolution threshold. The parameters include:
+         * All atom bonded distances and torsion angles
+         * Distances, flat angles and torsion angles in the Pyle/VFold model
+         * Distances, flat angles and torsion anfles in the HiRE-RNA model
+         * Sequence-dependant geometric parameters of the basepairs for all non-canonical basepairs in the HiRE-RNA model.
+       The data is saved as JSON files of parameters, and numerous figures are produced to illustrate the distributions.
+       The number of gaussians to use in the GMMs are hard-coded in geometric_stats.py after our first estimation. If you do not want to trust this estimation,
+       you can ignore it with option --rescan-nmodes. An exploration of the number of Gaussians from 1 to 8 will be performed, and the best GMM will be kept. 
+ 
+ BUG CORRECTIONS
+     - New code file geometric_stats.py
+     - New automation script that starts from scratch
+     - Many small fixes, leading to the support of many previously "known issues"
+     - Performance tweaks
+ 
+ TECHNICAL CHANGES
+     - Switched to DSSR Pro.
+     - Switched to esl-alimerge instead of cmalign --merge to merge alignments.
+     - Tested successfully with Python 3.9.6 + BioPython 1.79. 
+       However, the production server still runs with Python 3.8.1 + BioPython 1.78.
+ 
+ ############################################################################################
 v 1.5 beta, April 2021
 
 FEATURE CHANGES
--- a/LICENSE
View file @ad0e234
+++ b/LICENSE
View file @ad0e234
 MIT License
 
- Copyright (c) 2019 Louis Becquey
+ Copyright (c) 2019-2021 IBISC, Université Paris Saclay
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/README.md
View file @ad0e234
+++ b/README.md
View file @ad0e234
@@ -10,6 +10,7 @@ Contents:
 * [Database tables documentation](doc/Database.md)
 * [FAQ](doc/FAQ.md)
 * [Troubleshooting](#troubleshooting)
+ * [Known Issues and Feature Requests](doc/KnownIssues.md)
 * [Contact](#contact)
 
 ## Cite us
@@ -18,15 +19,13 @@ Contents:
 
 Additional relevant references:
 
- The "ProteinNet" philosophy which inspired this work:
- * AlQuraishi, M. (2019b). **ProteinNet: A standardized data set for machine learning of protein structure.** *BMC Bioinformatics*, 20(1), 311
- 
 If you use our annotations by DSSR, you might want to cite:
 * Lu, X.-J.et al.(2015). **DSSR: An integrated software tool for dissecting the spatial structure of RNA.** *Nucleic Acids Research*, 43(21), e142–e142.
 
 If you use our multiple sequence alignments and homology data, you might want to cite:
- * Pruesse, E. et al.(2012). **Sina: accurate high-throughput multiple sequence alignment of ribosomal RNA genes.** *Bioinformatics*, 28(14), 1823–1829
 * Nawrocki, E. P. and Eddy, S. R. (2013). **Infernal 1.1: 100-fold faster RNA homology searches.** *Bioinformatics*, 29(22), 2933–2935.
+ * Pruesse, E. et al.(2012). **Sina: accurate high-throughput multiple sequence alignment of ribosomal RNA genes.** *Bioinformatics*, 28(14), 1823–1829
+ 
 
 
 # What is RNANet ?
@@ -39,7 +38,8 @@ Most interestingly, nucleotides have been renumered in a standardized way, and t
 
 ## Methodology
 We use the Rfam mappings between 3D structures and known Rfam families, using the sequences that are known to belong to an Rfam family (hits provided in RF0XXXX.fasta files from Rfam).
- Future versions might compute a real MSA-based clusering directly with Rfamseq ncRNA sequences, like ProteinNet does with protein sequences, but this requires a tool similar to jackHMMER in the Infernal software suite, which is not available yet.
+ Future versions might compute a real MSA-based clusering directly with Rfamseq ncRNA sequences, like ProteinNet does with protein sequences, but this requires a tool similar to jackHMMER in the Infernal software suite, which is not available yet. 
+ If interested by such approaches, the user may check tools like RNAlien.
 
 This script prepares the dataset from available public data in PDB, RNA 3D Hub, Rfam and SILVA.
 
@@ -48,15 +48,16 @@ This script prepares the dataset from available public data in PDB, RNA 3D Hub, 
 The script follows these steps:
 
 To gather structures:
- * Gets a list of 3D structures containing RNA from BGSU's non-redundant list (but keeps the redundant structures /!\\),
+ * Gets a list of 3D structures containing RNA from BGSU's non-redundant list (redundancy can be kept or eliminated, see command line option `--redundant`),
 * Asks Rfam for mappings of these structures onto Rfam families (~50% of structures have a direct mapping, some more are inferred using the redundancy list)
 * Downloads the corresponding 3D structures (mmCIFs)
- * If desired, extracts the right chain portions that map onto an Rfam family to a separate mmCIF file
+ * Standardizes the residue numbering from 1 to N, including missing residues (gaps)
+ * If desired, extracts the renumbered chain portions that map onto an Rfam family to a separate mmCIF file
 
 To compute homology information:
- * Extract the sequence for every 3D chain
+ * Extracts the sequence of every 3D chain
 * Downloads Rfamseq ncRNA sequence hits for the concerned Rfam families (or ARB databases of SSU or LSU sequences from SILVA for rRNAs)
- * Realigns Rfamseq hits and sequences from the 3D structures together to obtain a multiple sequence alignment for each Rfam family (using `cmalign --cyk`, except for ribosomal LSU and SSU, where SINA is used)
+ * Realigns Rfamseq hits and sequences from the 3D structures together to obtain a multiple sequence alignment for each Rfam family (using `cmalign`, but SINA can be used for ribosomal LSU and SSU)
 * Computes nucleotide frequencies at every position for each alignment
 * Map each nucleotide of a 3D chain to its position in the corresponding family sequence alignment
 
@@ -65,6 +66,15 @@ To compute 3D annotations:
 
 Finally, export this data from the SQLite database into flat CSV files.
 
+ Statistical analysis of the structures:
+ * Computes statistics about the amount of data from various resolutions and experimental methods (by RNA family)
+ * Computes basic statistics about the frequency of (modified) nucleotides by chain and by family,
+ * Computes basic statistics about the frequencies of non-canonical interactions,
+ * Computes density estimations (using Gaussian mixtures) for various geometrical parameters like distances and torsion angles for different representations : all-atom, the Pyle/VFold model, and the HiRE-RNA model,
+ * Computes pairwise residue distance matrices for each chain, and average + std-dev by RNA family
+ * Computes sequence identity matrices for each RNA family (based on the alignments)
+ * Saves covariance models (Infernal .cm files) for each RNA family
+ 
 ## Data provided
 
 We provide couple of resources to exploit this dataset. You can download them on [EvryRNA](https://evryrna.ibisc.univ-evry.fr/evryrna/rnanet/rnanet_home).
--- a/RNAnet.py
View file @ad0e234
+++ b/RNAnet.py
View file @ad0e234
@@ -31,7 +31,7 @@ import time
 import traceback
 import warnings
 from functools import partial, wraps
- from multiprocessing import Pool, Manager
+ from multiprocessing import Pool, Manager, Value
 from time import sleep
 from tqdm import tqdm
 from setproctitle import setproctitle
@@ -45,42 +45,44 @@ from Bio.PDB.PDBIO import Select
 runDir = os.getcwd()
 
 def trace_unhandled_exceptions(func):
+     """
+     Captures exceptions even in parallel sections of the code and child processes,
+     and throws logs in red to stderr and to errors.txt.
+ 
+     Should be defined before the classes that use it.
+     """
     @wraps(func)
     def wrapped_func(*args, **kwargs):
         try:
             return func(*args, **kwargs)
         except:
             s = traceback.format_exc()
-             with open(runDir + "/errors.txt", "a") as f:
-                 f.write("Exception in "+func.__name__+"\n")
-                 f.write(s)
-                 f.write("\n\n")
- 
-             warn('Exception in '+func.__name__, error=True)
-             print(s)
+             if not "KeyboardInterrupt" in s:
+                 with open(runDir + "/errors.txt", "a") as f:
+                     f.write("Exception in "+func.__name__+"\n")
+                     f.write(s)
+                     f.write("\n\n")
+ 
+                 warn('Exception in '+func.__name__, error=True)
+                 print(s)
     return wrapped_func
 
- 
 pd.set_option('display.max_rows', None)
 sqlite3.enable_callback_tracebacks(True)
 sqlite3.register_adapter(np.int64, lambda val: int(val))        # Tell Sqlite what to do with <class numpy.int64> objects ---> convert to int
 sqlite3.register_adapter(np.float64, lambda val: float(val))    # Tell Sqlite what to do with <class numpy.float64> objects ---> convert to float
 
- m = Manager()
- running_stats = m.list()
- running_stats.append(0)  # n_launched
- running_stats.append(0)  # n_finished
- running_stats.append(0)  # n_skipped
+ n_launched = Value('i', 0)
+ n_finished = Value('i', 0)
+ n_skipped = Value('i', 0)
 path_to_3D_data = "tobedefinedbyoptions"
 path_to_seq_data = "tobedefinedbyoptions"
 python_executable = "python"+".".join(platform.python_version().split('.')[:2])  # Cuts python3.8.1 into python3.8 for example.
 validsymb = '\U00002705'
 warnsymb = '\U000026A0'
 errsymb = '\U0000274C'
- LSU_set = {"RF00002", "RF02540", "RF02541",
-            "RF02543", "RF02546"}   # From Rfam CLAN 00112
- SSU_set = {"RF00177", "RF02542",  "RF02545",
-            "RF01959", "RF01960"}  # From Rfam CLAN 00111
+ LSU_set = {"RF00002", "RF02540", "RF02541", "RF02543", "RF02546"}   # From Rfam CLAN 00112
+ SSU_set = {"RF00177", "RF02542",  "RF02545", "RF01959", "RF01960"}  # From Rfam CLAN 00111
 
 no_nts_set = set()
 weird_mappings = set()
@@ -103,17 +105,15 @@ class MutableFastaIterator(FastaIterator):
                     first_word = title.split(None, 1)[0]
                 except IndexError:
                     assert not title, repr(title)
-                     # Should we use SeqRecord default for no ID?
                     first_word = ""
-                 yield SeqRecord(
-                     MutableSeq(sequence), id=first_word, name=first_word, description=title,
-                 )
+                 yield SeqRecord(MutableSeq(sequence), id=first_word, name=first_word, description=title)
 
 
 class SelectivePortionSelector(object):
     """Class passed to MMCIFIO to select some chain portions in an MMCIF file.
 
     Validates every chain, residue, nucleotide, to say if it is in the selection or not.
+     The primary use is to select the portion of a chain which is mapped to a family.
     """
 
     def __init__(self, model_id, chain_id, valid_resnums, khetatm):
@@ -156,123 +156,6 @@ class SelectivePortionSelector(object):
         return 1
 
 
- _select=Select()
- 
- def save_mmcif(ioobj, out_file, select=_select, preserve_atom_numbering=False):
-     # reuse and modification of the source code of Biopython
-     # to have the 2 columns of numbering of residues numbered with the index_chain of DSSR
-     if isinstance(out_file, str):
-         fp = open(out_file, "w")
-         close_file = True
-     else:
-         fp = out_file
-         close_file = False
-     atom_dict = defaultdict(list)
- 
-     for model in ioobj.structure.get_list():
-         if not select.accept_model(model):
-             continue
-         # mmCIF files with a single model have it specified as model 1
-         if model.serial_num == 0:
-             model_n = "1"
-         else:
-             model_n = str(model.serial_num)
-         # This is used to write label_entity_id and label_asym_id and
-         # increments from 1, changing with each molecule
-         entity_id = 0
-         if not preserve_atom_numbering:
-             atom_number = 1
-         for chain in model.get_list():
-             if not select.accept_chain(chain):
-                 continue
-             chain_id = chain.get_id()
-             if chain_id == " ":
-                 chain_id = "."
-             # This is used to write label_seq_id,
-             # remaining blank for hetero residues
-             
-             prev_residue_type = ""
-             prev_resname = ""
-             for residue in chain.get_unpacked_list():
-                 if not select.accept_residue(residue):
-                     continue
-                 hetfield, resseq, icode = residue.get_id()
-                 if hetfield == " ":
-                     residue_type = "ATOM"
-                     label_seq_id = str(resseq)
-                     
-                 else:
-                     residue_type = "HETATM"
-                     label_seq_id = "."
-                 resseq = str(resseq)
-                 if icode == " ":
-                     icode = "?"
-                 resname = residue.get_resname()
-                 # Check if the molecule changes within the chain
-                 # This will always increment for the first residue in a
-                 # chain due to the starting values above
-                 if residue_type != prev_residue_type or (
-                     residue_type == "HETATM" and resname != prev_resname
-                 ):
-                     entity_id += 1
-                 prev_residue_type = residue_type
-                 prev_resname = resname
-                 label_asym_id = ioobj._get_label_asym_id(entity_id)
-                 for atom in residue.get_unpacked_list():
-                     if select.accept_atom(atom):
-                         atom_dict["_atom_site.group_PDB"].append(residue_type)
-                         if preserve_atom_numbering:
-                             atom_number = atom.get_serial_number()
-                         atom_dict["_atom_site.id"].append(str(atom_number))
-                         if not preserve_atom_numbering:
-                             atom_number += 1
-                         element = atom.element.strip()
-                         if element == "":
-                             element = "?"
-                         atom_dict["_atom_site.type_symbol"].append(element)
-                         atom_dict["_atom_site.label_atom_id"].append(
-                             atom.get_name().strip()
-                         )
-                         altloc = atom.get_altloc()
-                         if altloc == " ":
-                             altloc = "."
-                         atom_dict["_atom_site.label_alt_id"].append(altloc)
-                         atom_dict["_atom_site.label_comp_id"].append(
-                             resname.strip()
-                         )
-                         atom_dict["_atom_site.label_asym_id"].append(label_asym_id)
-                         # The entity ID should be the same for similar chains
-                         # However this is non-trivial to calculate so we write "?"
-                         atom_dict["_atom_site.label_entity_id"].append("?")
-                         atom_dict["_atom_site.label_seq_id"].append(label_seq_id)
-                         atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode)
-                         coord = atom.get_coord()
-                         atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0])
-                         atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1])
-                         atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2])
-                         atom_dict["_atom_site.occupancy"].append(
-                             str(atom.get_occupancy())
-                         )
-                         atom_dict["_atom_site.B_iso_or_equiv"].append(
-                             str(atom.get_bfactor())
-                         )
-                         atom_dict["_atom_site.auth_seq_id"].append(resseq)
-                         atom_dict["_atom_site.auth_asym_id"].append(chain_id)
-                         atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n)
- 
-     # Data block name is the structure ID with special characters removed
-     structure_id = ioobj.structure.id
-     for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]:
-         structure_id = structure_id.replace(c, "")
-     atom_dict["data_"] = structure_id
- 
-     # Set the dictionary and write out using the generic dictionary method
-     ioobj.dic = atom_dict
-     ioobj._save_dict(fp)
-     if close_file:
-         fp.close()
- 
- 
 class Chain:
     """ 
     The object which stores all our data and the methods to process it.
@@ -379,7 +262,7 @@ class Chain:
                     new_s.add(new_model)
 
             # renumber this structure (portion of the original) with the index_chain and save it in a cif file
-             t=pdb.Structure.Structure(new_s.get_id())
+             t = pdb.Structure.Structure(new_s.get_id())
             for model in new_s:
                 new_model_t=pdb.Model.Model(model.get_id())
                 for chain in model:
@@ -406,7 +289,7 @@ class Chain:
                         # particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G)
                         if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' or resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' :
                             res=chain[(' ', resseq, icode_res)]
-                         else : #modified nucleotides (e.g. chain 5l4o_1_A)
+                         else : # modified nucleotides (e.g. chain 5l4o_1_A)
                             het='H_' + nt
                             res=chain[(het, resseq, icode_res)]
                         res_id=res.get_id()
@@ -424,13 +307,11 @@ class Chain:
                         for atom in list(res.get_atoms()):
                             # rename the remaining phosphate group to P, OP1, OP2, OP3
                             if atom.get_name() in ['PA', 'O1A', 'O2A', 'O3A'] and res_name != 'RIA': 
- 
-                             # RIA is a residue made up of 2 riboses and 2 phosphates, 
-                             # so it has an O2A atom between the C2A and C1 'atoms, 
-                             # and it also has an OP2 atom attached to one of its phosphates 
-                             # (see chains 6fyx_1_1, 6zu9_1_1, 6fyy_1_1, 6gsm_1_1 , 3jaq_1_1 and 1yfg_1_A)
-                             # we do not modify the atom names of RIA residue
- 
+                                 # RIA is a residue made up of 2 riboses and 2 phosphates, 
+                                 # so it has an O2A atom between the C2A and C1 'atoms, 
+                                 # and it also has an OP2 atom attached to one of its phosphates 
+                                 # (see chains 6fyx_1_1, 6zu9_1_1, 6fyy_1_1, 6gsm_1_1 , 3jaq_1_1 and 1yfg_1_A)
+                                 # we do not modify the atom names of RIA residue
                                 if atom.get_name() == 'PA':
                                     atom_name = 'P'
                                 if atom.get_name() == 'O1A':
@@ -440,7 +321,7 @@ class Chain:
                                 if atom.get_name() == 'O3A':
                                     atom_name = 'OP3'
                                 new_atom_t = pdb.Atom.Atom(atom_name, atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom_name, atom.get_serial_number())
-                             else :
+                             else:
                                 new_atom_t=atom.copy()
                             new_residu_t.add(new_atom_t)
                         new_chain_t.add(new_residu_t)
@@ -750,12 +631,24 @@ class Chain:
                 if nt2 in res_ids:
                     interacts[nt2_idx] += 1
                     if paired[nt2_idx] == "":
-                         pair_type_LW[nt2_idx] = lw_pair[0] + lw_pair[2] + lw_pair[1]
-                         pair_type_DSSR[nt2_idx] = dssr_pair[0] + dssr_pair[3] + dssr_pair[2] + dssr_pair[1]
+                         if lw_pair != "--":
+                             pair_type_LW[nt2_idx] = lw_pair[0] + lw_pair[2] + lw_pair[1]
+                         else:
+                             pair_type_LW[nt2_idx] = "--"
+                         if dssr_pair != "--":
+                             pair_type_DSSR[nt2_idx] = dssr_pair[0] + dssr_pair[3] + dssr_pair[2] + dssr_pair[1]
+                         else:
+                             pair_type_DSSR[nt2_idx] = "--"
                         paired[nt2_idx] = str(nt1_idx + 1)
                     else:
-                         pair_type_LW[nt2_idx] += ',' + lw_pair[0] + lw_pair[2] + lw_pair[1]
-                         pair_type_DSSR[nt2_idx] += ',' + dssr_pair[0] + dssr_pair[3] + dssr_pair[2] + dssr_pair[1]
+                         if lw_pair != "--":
+                             pair_type_LW[nt2_idx] += ',' + lw_pair[0] + lw_pair[2] + lw_pair[1]
+                         else:
+                             pair_type_LW[nt2_idx] += ",--"
+                         if dssr_pair != "--":
+                             pair_type_DSSR[nt2_idx] += ',' + dssr_pair[0] + dssr_pair[3] + dssr_pair[2] + dssr_pair[1]
+                         else:
+                             pair_type_DSSR[nt2_idx] += ",--"
                         paired[nt2_idx] += ',' + str(nt1_idx + 1)
         
         # transform nt_id to shorter values
@@ -787,7 +680,8 @@ class Chain:
         return df
 
     def register_chain(self, df):
-         """Saves the extracted 3D data to the database.
+         """
+         Saves the extracted 3D data to the database.
         """
 
         setproctitle(f"RNANet.py {self.chain_label} register_chain()")
@@ -920,6 +814,10 @@ class Monitor:
 
 
 class Downloader:
+     """
+     An object with methods to download public data from the internet.
+     """
+ 
     def download_Rfam_PDB_mappings(self):
         """Query the Rfam public MySQL database for mappings between their RNA families and PDB structures.
 
@@ -1170,6 +1068,10 @@ class Mapping:
 
 
 class Pipeline:
+     """
+     The RNANet pipeline steps.
+     """
+ 
     def __init__(self):
         self.dl = Downloader()
         self.known_issues = []  # list of chain_labels to ignore
@@ -1189,6 +1091,7 @@ class Pipeline:
         self.REUSE_ALL = False
         self.REDUNDANT = False
         self.ALIGNOPTS = None
+         self.RRNAALIGNOPTS = ["--mxsize", "8192", "--cpu", "10", "--maxtau", "0.1"]
         self.STATSOPTS = None
         self.USESINA = False
         self.SELECT_ONLY = None
@@ -1207,7 +1110,7 @@ class Pipeline:
 
         try:
             opts, _ = getopt.getopt(sys.argv[1:], "r:fhs", ["help", "resolution=", "3d-folder=", "seq-folder=", "keep-hetatm=", 
-                                                             "only=", "cmalign-opts=", "stats-opts=", "maxcores=", "sina", "from-scratch", 
+                                                             "only=", "cmalign-opts=", "cmalign-rrna-opts=", "stats-opts=", "maxcores=", "sina", "from-scratch", 
                                                             "full-inference", "no-homology", "redundant", "ignore-issues", "extract", 
                                                             "all", "no-logs", "archive", "update-homologous", "version"])
         except getopt.GetoptError as err:
@@ -1256,6 +1159,7 @@ class Pipeline:
                       "\n\t\t\t\t need of RAM. Should be a number between 1 and your number of CPUs. Note that portions"
                       "\n\t\t\t\t of the pipeline already limit themselves to 50% or 70% of that number by default.")
                 print("--cmalign-opts=…\t\tA string of additional options to pass to cmalign aligner, e.g. \"--nonbanded --mxsize 2048\"")
+                 print("--cmalign-rrna-opts=…\tLike cmalign-opts, but applied for rRNA (large families, memory-heavy jobs).")
                 print("--archive\t\t\tCreate tar.gz archives of the datapoints text files and the alignments,"
                       "\n\t\t\t\t and update the link to the latest archive. ")
                 print("--no-logs\t\t\tDo not save per-chain logs of the numbering modifications.")
@@ -1321,10 +1225,12 @@ class Pipeline:
                                 path_to_seq_data + "realigned",
                                 path_to_seq_data + "rfam_sequences"])
                 self.REUSE_ALL = True
-             elif opt == "cmalign-opts":
-                 self.ALIGNOPTS = arg
-             elif opt == "stats-opts":
-                 self.STATSOPTS = " ".split(arg)
+             elif opt == "--cmalign-opts":
+                 self.ALIGNOPTS = arg.split(" ")
+             elif opt == "--cmalign-rrna-opts":
+                 self.RRNAALIGNOPTS = arg.split(" ")
+             elif opt == "--stats-opts":
+                 self.STATSOPTS = arg.split(" ")
             elif opt == "--all":
                 self.REUSE_ALL = True
                 self.USE_KNOWN_ISSUES = False
@@ -1382,7 +1288,7 @@ class Pipeline:
             # If self.FULLINFERENCE is False, the extended list is already filtered to remove
             # the chains which already are in the database.
             print("> Building list of structures...", flush=True)
-             p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores)
+             p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=ncores)
             try:
 
                 pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1,
@@ -1421,6 +1327,7 @@ class Pipeline:
             conn.execute('pragma journal_mode=wal')
             for eq_class, representative, codelist in tqdm(full_structures_list, desc="Eq. classes"):
                 codes = codelist.replace('+', ',').split(',')
+                 representatives = representative.replace('+', ',').split(',')
 
                 # Simply convert the list of codes to Chain() objects
                 if self.REDUNDANT:
@@ -1438,18 +1345,19 @@ class Pipeline:
                         if not len(res) or self.REUSE_ALL:  # the chain is NOT yet in the database, or this is a known issue
                             self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class))
                 else:
-                     nr = representative.split('|')
-                     pdb_id = nr[0].lower()
-                     pdb_model = int(nr[1])
-                     pdb_chain_id = nr[2].upper()
-                     chain_label = f"{pdb_id}_{str(pdb_model)}_{pdb_chain_id}"
-                     res = sql_ask_database(conn, f"""SELECT chain_id from chain 
-                                                         WHERE structure_id='{pdb_id}' 
-                                                         AND chain_name='{pdb_chain_id}' 
-                                                         AND rfam_acc = 'unmappd' 
-                                                         AND issue=0""")
-                     if not len(res) or self.REUSE_ALL:  # the chain is NOT yet in the database, or this is a known issue
-                         self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class))
+                     for rep in representatives:
+                         nr = rep.split('|')
+                         pdb_id = nr[0].lower()
+                         pdb_model = int(nr[1])
+                         pdb_chain_id = nr[2].upper()
+                         chain_label = f"{pdb_id}_{str(pdb_model)}_{pdb_chain_id}"
+                         res = sql_ask_database(conn, f"""SELECT chain_id from chain 
+                                                             WHERE structure_id='{pdb_id}' 
+                                                             AND chain_name='{pdb_chain_id}' 
+                                                             AND rfam_acc = 'unmappd' 
+                                                             AND issue=0""")
+                         if not len(res) or self.REUSE_ALL:  # the chain is NOT yet in the database, or this is a known issue
+                             self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class))
             conn.close()
 
         if self.SELECT_ONLY is not None:
@@ -1491,7 +1399,7 @@ class Pipeline:
         else:
             mmcif_list = sorted(set([c.pdb_id for c in self.update]))
         try:
-             p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=int(coeff_ncores*ncores))
+             p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=int(coeff_ncores*ncores))
             pbar = tqdm(mmcif_list, maxinterval=1.0, miniters=1, desc="mmCIF files")
             for _ in p.imap_unordered(work_mmcif, mmcif_list, chunksize=1):
                 pbar.update(1)  # Everytime the iteration finishes, update the global progress bar
@@ -1539,8 +1447,9 @@ class Pipeline:
                                    args=[c, self.EXTRACT_CHAINS, self.KEEP_HETATM, retry, self.SAVELOGS]))
         try:
             results = execute_joblist(joblist)
-         except:
-             print("Exiting", flush=True)
+         except Exception as e:
+             warn(str(e), error=True)
+             print("Exiting", str(e), flush=True)
             exit(1)
 
         # If there were newly discovered problems, add this chain to the known issues
@@ -1634,7 +1543,11 @@ class Pipeline:
         joblist = []
         for f in self.fam_list:
             # the function already uses all CPUs so launch them one by one (how_many_in_parallel=1)
-             joblist.append(Job(function=work_realign, args=[self.USESINA, self.ALIGNOPTS, f], how_many_in_parallel=1, label=f))
+             if f in LSU_set or f in SSU_set:
+                 opts = self.RRNAALIGNOPTS
+             else:
+                 opts = self.ALIGNOPTS
+             joblist.append(Job(function=work_realign, args=[self.USESINA, opts, f], how_many_in_parallel=1, label=f))
 
         # Execute the jobs
         try:
@@ -1649,7 +1562,7 @@ class Pipeline:
             align = AlignIO.read(path_to_seq_data + "realigned/" + r[0] + "++.afa", "fasta")
             nb_3d_chains = len([1 for r in align if '[' in r.id])
             if r[0] in SSU_set:  # SSU v138.1 is used
-                 nb_homologs = 2224740 	    # source: https://www.arb-silva.de/documentation/release-1381/
+                 nb_homologs = 2224740         # source: https://www.arb-silva.de/documentation/release-1381/
                 nb_total_homol = nb_homologs + nb_3d_chains
             elif r[0] in LSU_set:  # LSU v138.1 is used
                 nb_homologs = 227331        # source: https://www.arb-silva.de/documentation/release-1381/
@@ -1684,12 +1597,12 @@ class Pipeline:
 
         # Start a process pool to dispatch the RNA families,
         # over multiple CPUs (one family by CPU)
-         p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers)
+         p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=nworkers)
 
         try:
             fam_pbar = tqdm(total=len(self.fam_list), desc="RNA families", position=0, leave=True)
             # Apply work_pssm_remap to each RNA family
-             for i, _ in enumerate(p.imap_unordered(work_pssm_remap, self.fam_list, chunksize=1)):
+             for i, _ in enumerate(p.imap_unordered(partial(work_pssm_remap, useSina=pp.USESINA), self.fam_list, chunksize=1)):
                 # Everytime the iteration finishes on a family, update the global progress bar over the RNA families
                 fam_pbar.update(1)
             fam_pbar.close()
@@ -1741,10 +1654,10 @@ class Pipeline:
             os.makedirs(path_to_3D_data + "datapoints/")
 
         # Save to by-chain CSV files
-         p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3)
+         p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=3)
         try:
             pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True)
-             for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains)):
+             for _, _2 in enumerate(p.imap_unordered(partial(work_save, homology=pp.HOMOLOGY), self.loaded_chains)):
                 pbar.update(1)
             pbar.close()
             p.close()
@@ -1790,18 +1703,28 @@ class Pipeline:
         if self.ARCHIVE:
             os.makedirs(runDir + "/archive", exist_ok=True)
             datestr = time.strftime('%Y%m%d')
+ 
+             # The text files
             subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_datapoints_latest.tar.gz"])
             subprocess.run(["tar", "-C", path_to_3D_data + "/datapoints", "-czf", runDir + f"/archive/RNANET_datapoints_{datestr}.tar.gz", "."])
             subprocess.run(["ln", "-s", runDir + f"/archive/RNANET_datapoints_{datestr}.tar.gz", runDir + f"/archive/RNANET_datapoints_latest.tar.gz"])
 
+             # The alignments
             if self.HOMOLOGY:
-                 # gather the alignments
                 os.makedirs(path_to_seq_data + "realigned/3d_only", exist_ok=True)
                 for f in os.listdir(path_to_seq_data + "realigned"):
                     if "3d_only.afa" in f:
                         subprocess.run(["cp", path_to_seq_data + "realigned/" + f, path_to_seq_data + "realigned/3d_only"])
-                 subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_alignments_latest.tar.gz"])
-                 subprocess.run(["tar", "-C", path_to_seq_data + "realigned/3d_only" , "-czf", runDir + f"/archive/RNANET_alignments_latest.tar.gz", "."])
+                 subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_3dOnlyAlignments_latest.tar.gz"])
+                 subprocess.run(["tar", "-C", path_to_seq_data + "realigned/3d_only" , "-czf", runDir + f"/archive/RNANET_3dOnlyAlignments_latest.tar.gz", "."])
+ 
+             # The 3D files
+             if os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"):
+                 subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_MMCIFmappedToRfam_latest.tar.gz"])
+                 subprocess.run(["tar", "-C", path_to_3D_data + "rna_mapped_to_Rfam" , "-czf", runDir + f"/archive/RNANET_MMCIFmappedToRfam_latest.tar.gz", "."])
+             if os.path.isdir(path_to_3D_data + "rna_only"):
+                 subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_MMCIFall_latest.tar.gz"])
+                 subprocess.run(["tar", "-C", path_to_3D_data + "rna_only" , "-czf", runDir + f"/archive/RNANET_MMCIFall_latest.tar.gz", "."])
 
     def sanitize_database(self):
         """Searches for issues in the database and correct them"""
@@ -1867,6 +1790,7 @@ class Pipeline:
 
         conn.close()
 
+ # ==================== General helper functions =====================
 
 def read_cpu_number():
     """This function reads the number of CPU cores available from /proc/cpuinfo.
@@ -1876,17 +1800,33 @@ def read_cpu_number():
     p = subprocess.run(['grep', '-Ec', '(Intel|AMD)', '/proc/cpuinfo'], stdout=subprocess.PIPE)
     return int(int(p.stdout.decode('utf-8')[:-1])/2)
 
- def init_worker(tqdm_lock=None):
+ def init_with_tqdm(tqdm_lock=None):
+     """
+     This initiation method kills the children when signal is received,
+     and the children progress is followed using TQDM progress bars.
+     """
     signal.signal(signal.SIGINT, signal.SIG_IGN)
     if tqdm_lock is not None:
         tqdm.set_lock(tqdm_lock)
 
+ def init_no_tqdm(arg1, arg2, arg3):
+     """
+     This initiaiton method does not kill the children when signal is received,
+     they will complete and die even after the main process stops.
+     The children progress is followed using stdout text logs (notify(), warn(), etc)
+     """
+     global n_launched, n_finished, n_skipped
+     n_launched = arg1
+     n_finished = arg2
+     n_skipped = arg3
+ 
 def warn(message, error=False):
-     """Pretty-print warnings and error messages.
+     """
+     Pretty-print warnings and error messages.
     """
     # Cut if too long
     if len(message) > 66:
-         x = message.find(' ', 50, 66)
+         x = message.find(' ', 40, 66)
         if x != -1:
             warn(message[:x], error=error)
             warn(message[x+1:], error=error)
@@ -1900,20 +1840,133 @@ def warn(message, error=False):
         print(f"\t> \033[33mWARN: {message:64s}\033[0m\t{warnsymb}", flush=True)
 
 def notify(message, post=''):
+     """
+     Pretty-print successful finished tasks.
+     """
     if len(post):
         post = '(' + post + ')'
     print(f"\t> {message:70s}\t{validsymb}\t{post}", flush=True)
 
- def _mutable_SeqIO_to_alignment_iterator(handle):
-     records = list(MutableFastaIterator(handle))
-     if records:
-         yield MultipleSeqAlignment(records)
+ # ========================= Biopython overloads =====================
 
- def parse(handle):
-     with open(handle, 'r') as fp:
-         yield from _mutable_SeqIO_to_alignment_iterator(fp)
+ def save_mmcif(ioobj, out_file, select=Select(), preserve_atom_numbering=False):
+     """
+     MMCIF writer which renumbers residues according to the RNANet index_chain (coming from DSSR).
+     """
+ 
+     if isinstance(out_file, str):
+         fp = open(out_file, "w")
+         close_file = True
+     else:
+         fp = out_file
+         close_file = False
+     atom_dict = defaultdict(list)
+ 
+     # Iterate on models
+     for model in ioobj.structure.get_list():
+         if not select.accept_model(model):
+             continue
+ 
+         # mmCIF files with a single model have it specified as model 1
+         if model.serial_num == 0:
+             model_n = "1"
+         else:
+             model_n = str(model.serial_num)
+ 
+         # This is used to write label_entity_id and label_asym_id and
+         # increments from 1, changing with each molecule
+         entity_id = 0
+         if not preserve_atom_numbering:
+             atom_number = 1
+ 
+         # Iterate on chains
+         for chain in model.get_list():
+             if not select.accept_chain(chain):
+                 continue
+             chain_id = chain.get_id()
+             if chain_id == " ":
+                 chain_id = "."
+ 
+             # This is used to write label_seq_id, remaining blank for hetero residues
+             prev_residue_type = ""
+             prev_resname = ""
+ 
+             # Iterate on residues
+             for residue in chain.get_unpacked_list():
+                 if not select.accept_residue(residue):
+                     continue
+                 hetfield, resseq, icode = residue.get_id()
+                 if hetfield == " ":
+                     residue_type = "ATOM"
+                     label_seq_id = str(resseq)
+                 else:
+                     residue_type = "HETATM"
+                     label_seq_id = "."
+                 resseq = str(resseq)
+                 if icode == " ":
+                     icode = "?"
+                 resname = residue.get_resname()
+ 
+                 # Check if the molecule changes within the chain.
+                 # This will always increment for the first residue in a
+                 # chain due to the starting values above
+                 if residue_type != prev_residue_type or (residue_type == "HETATM" and resname != prev_resname):
+                     entity_id += 1
+                 prev_residue_type = residue_type
+                 prev_resname = resname
+                 label_asym_id = ioobj._get_label_asym_id(entity_id)
+ 
+                 # Iterate on atoms
+                 for atom in residue.get_unpacked_list():
+                     if select.accept_atom(atom):
+                         atom_dict["_atom_site.group_PDB"].append(residue_type)
+                         if preserve_atom_numbering:
+                             atom_number = atom.get_serial_number()
+                         atom_dict["_atom_site.id"].append(str(atom_number))
+                         if not preserve_atom_numbering:
+                             atom_number += 1
+                         element = atom.element.strip()
+                         if element == "":
+                             element = "?"
+                         atom_dict["_atom_site.type_symbol"].append(element)
+                         atom_dict["_atom_site.label_atom_id"].append(atom.get_name().strip())
+                         altloc = atom.get_altloc()
+                         if altloc == " ":
+                             altloc = "."
+                         atom_dict["_atom_site.label_alt_id"].append(altloc)
+                         atom_dict["_atom_site.label_comp_id"].append(resname.strip())
+                         atom_dict["_atom_site.label_asym_id"].append(label_asym_id)
+                         # The entity ID should be the same for similar chains
+                         # However this is non-trivial to calculate so we write "?"
+                         atom_dict["_atom_site.label_entity_id"].append("?")
+                         atom_dict["_atom_site.label_seq_id"].append(label_seq_id)
+                         atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode)
+                         coord = atom.get_coord()
+                         atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0])
+                         atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1])
+                         atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2])
+                         atom_dict["_atom_site.occupancy"].append(str(atom.get_occupancy()))
+                         atom_dict["_atom_site.B_iso_or_equiv"].append(str(atom.get_bfactor())                        )
+                         atom_dict["_atom_site.auth_seq_id"].append(resseq)
+                         atom_dict["_atom_site.auth_asym_id"].append(chain_id)
+                         atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n)
+ 
+     # Data block name is the structure ID with special characters removed
+     structure_id = ioobj.structure.id
+     for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]:
+         structure_id = structure_id.replace(c, "")
+     atom_dict["data_"] = structure_id
+ 
+     # Set the dictionary and write out using the generic dictionary method
+     ioobj.dic = atom_dict
+     ioobj._save_dict(fp)
+     if close_file:
+         fp.close()
 
 def read(handle):
+     """
+     A shortcut to parse alignment files with our custom class MutableFastaIterator.
+     """
     iterator = parse(handle)
     try:
         alignment = next(iterator)
@@ -1926,6 +1979,25 @@ def read(handle):
         pass
     return alignment
 
+ def parse(handle):
+     """
+     A shortcut to parse alignment files with our custom class MutableFastaIterator.
+     Called by function read().
+     """
+     with open(handle, 'r') as fp:
+         yield from _mutable_SeqIO_to_alignment_iterator(fp)
+ 
+ def _mutable_SeqIO_to_alignment_iterator(handle):
+     """
+     A shortcut to parse alignment files with our custom class MutableFastaIterator.
+     Used by the parse() function.
+     """
+     records = list(MutableFastaIterator(handle))
+     if records:
+         yield MultipleSeqAlignment(records)
+ 
+ # ========================== SQL related ============================
+ 
 def sql_define_tables(conn):
     conn.executescript(
         """ PRAGMA foreign_keys = on;
@@ -2085,12 +2157,19 @@ def sql_execute(conn, sql, many=False, data=None, warn_every=10):
             time.sleep(0.2)
     warn("Tried to reach database 100 times and failed. Aborting.", error=True)
 
+ # ======================= RNANet Jobs and tasks ======================
+ 
 @trace_unhandled_exceptions
 def execute_job(j, jobcount):
-     """Run a Job object.
     """
+     Run a Job object.
+     """
+ 
+     global n_launched, n_skipped, n_finished
+ 
     # increase the counter of running jobs
-     running_stats[0] += 1
+     with n_launched.get_lock():
+         n_launched.value += 1
 
     # Monitor this process
     m = -1
@@ -2098,7 +2177,7 @@ def execute_job(j, jobcount):
 
     if len(j.cmd_):  # The job is a system command
 
-         print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.label}")
+         print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.label}")
 
         # Add the command to logfile
         os.makedirs(runDir+"/logs", exist_ok=True)
@@ -2114,9 +2193,20 @@ def execute_job(j, jobcount):
 
             # run the command. subprocess.run will be a child of this process, and stays monitored.
             start_time = time.time()
-             r = subprocess.run(j.cmd_, timeout=j.timeout_,
-                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+             r = subprocess.run(j.cmd_, timeout=j.timeout_, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
             end_time = time.time()
+             if r.returncode != 0:
+                 if r.stderr is not None:
+                     print(r.stderr, flush=True)
+                 print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\tIssue faced with {j.label}, skipping it and adding it to known issues (if not known).")
+                 with n_launched.get_lock():
+                     n_launched.value -= 1
+                 with n_skipped.get_lock():
+                     n_skipped.value += 1
+                 if j.label not in issues:
+                     issues.add(j.label)
+                     with open("known_issues.txt", "a") as iss:
+                         iss.write(j.label+"\n")
 
             # Stop the Monitor, then get its result
             monitor.keep_watching = False
@@ -2124,7 +2214,7 @@ def execute_job(j, jobcount):
 
     elif j.func_ is not None:
 
-         print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if type(a) != list])})", flush=True)
+         print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if type(a) != list])})", flush=True)
 
         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
             # put the monitor in a different thread
@@ -2140,7 +2230,8 @@ def execute_job(j, jobcount):
             m = assistant_future.result()
 
     # increase the counter of finished jobs
-     running_stats[1] += 1
+     with n_finished.get_lock():
+         n_finished.value += 1
 
     # return time and memory statistics, plus the job results
     t = end_time - start_time
@@ -2155,9 +2246,12 @@ def execute_joblist(fulljoblist):
     """
 
     # Reset counters
-     running_stats[0] = 0       # started
-     running_stats[1] = 0       # finished
-     running_stats[2] = 0       # failed
+     with n_launched.get_lock():
+         n_launched.value = 0
+     with n_skipped.get_lock():
+         n_skipped.value = 0
+     with n_finished.get_lock():
+         n_finished.value = 0
 
     # Sort jobs in a tree structure, first by priority, then by CPU numbers
     jobs = {}
@@ -2193,21 +2287,21 @@ def execute_joblist(fulljoblist):
 
             print("using", n, "processes:")
             # execute jobs of priority i that should be processed n by n:
-             p = Pool(processes=n, maxtasksperchild=1, initializer=init_worker)
+             p = Pool(processes=n, maxtasksperchild=1, initializer=init_no_tqdm, initargs=(n_launched, n_finished, n_skipped))
             try:
                 raw_results = p.map(partial(execute_job, jobcount=jobcount), bunch, chunksize=2)
                 p.close()
                 p.join()
             except KeyboardInterrupt as e:
                 warn("KeyboardInterrupt, killing workers (SIGKILL).", error=True)
-                 p.kill()
+                 p.terminate()
                 p.join()
                 raise e
 
             for j, r in zip(bunch, raw_results):
                 j.comp_time = round(r[0], 2)  # seconds
                 j.max_mem = int(r[1]/1000000)  # MB
-                 results.append((j.label, r[2], round(r[0], 2), int(r[1]/1000000)))
+                 results.append((j.label, r[2], j.comp_time, j.max_mem))
 
     # throw back the money
     return results
@@ -2235,7 +2329,7 @@ def work_infer_mappings(update_only, allmappings, fullinference, redundant, code
     # Split the comma-separated list of chain codes into chain codes:
     eq_class = codelist[0]
     codes = codelist[2].replace('+', ',').split(',')
-     representative=codelist[1].replace('+', ',').split(',')[0]
+     representative = codelist[1].replace('+', ',').split(',')[0]
     # Search for mappings that apply to an element of this PDB chains list:
     for c in codes:
         # search for Rfam mappings with this chain c:
@@ -2394,7 +2488,20 @@ def work_mmcif(pdb_id):
     # if not, read the CIF header and register the structure
     if not len(r):
         # Load the MMCIF file with Biopython
-         mmCif_info = pdb.MMCIF2Dict.MMCIF2Dict(final_filepath)
+         try:
+             mmCif_info = pdb.MMCIF2Dict.MMCIF2Dict(final_filepath)
+         except ValueError:
+             # mmcif file is empty or wrong. This happens when you interrupt RNANet while it 
+             # downloads. Retry to download it properly.
+             subprocess.run(
+                 ["wget", f'http://files.rcsb.org/download/{pdb_id}.cif', "-O", final_filepath],
+                 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+             )
+             try:
+                 mmCif_info = pdb.MMCIF2Dict.MMCIF2Dict(final_filepath)
+             except ValueError:
+                 warn(f"Empty or wrong {final_filepath.split('/')[-1]} file. Ignoring this structure.")
+                 return 1
 
         # Get info about that structure
         try:
@@ -2597,20 +2704,41 @@ def use_infernal(rfam_acc, alignopts):
         new_ali_path = path_to_seq_data + f"realigned/{rfam_acc}_new.stk"
 
         # Align the new sequences
-         with open(new_ali_path, 'w') as o:
-             p1 = subprocess.run(["cmalign", "--ifile", path_to_seq_data + f"realigned/{rfam_acc}.ins", 
+         with open(path_to_seq_data + f"realigned/{rfam_acc}_new.log", 'w') as o:
+             cmd = ["cmalign"]
+             if alignopts is not None:
+                 cmd += alignopts
+             p1 = subprocess.run(cmd + ["--ifile", path_to_seq_data + f"realigned/{rfam_acc}.ins", 
                                 "--sfile", path_to_seq_data + f"realigned/{rfam_acc}.tsv",
-                                 "-o", path_to_seq_data + f"realigned/{rfam_acc}++.stk",
+                                 "-o", new_ali_path,
                                 path_to_seq_data + f"realigned/{rfam_acc}.cm",
                                 path_to_seq_data + f"realigned/{rfam_acc}_new.fa"],
                                 stdout=o, stderr=subprocess.PIPE)
+             align_errors = p1.stderr.decode("utf-8")
+             if len(align_errors):
+                 if "--mxsize" in align_errors:
+                     # not enough available RAM to allocate the DP matrix
+                     warn(f"Not enough RAM to allocate cmalign DP matrix for family {rfam_acc}. Use --sina or --cmalign-opts.", error=True)
+                 else:
+                     warn(align_errors, error=True)
+                 return
         notify("Aligned new sequences together")
 
         # Detect doublons and remove them
-         existing_stk = AlignIO.read(existing_ali_path, "stockholm")
+         try:
+             existing_stk = AlignIO.read(existing_ali_path, "stockholm")
+         except ValueError:
+             # Not a stockholm file
+             warn(f"Existing alignment is not a Stockholm file !", error=True)
+             return
         existing_ids = [r.id for r in existing_stk]
         del existing_stk
-         new_stk = AlignIO.read(new_ali_path, "stockholm")
+         try:
+             new_stk = AlignIO.read(new_ali_path, "stockholm")
+         except ValueError:
+             # Not a stockholm file
+             warn(f"New alignment {new_ali_path} is not a Stockholm file !", error=True)
+             return
         new_ids = [r.id for r in new_stk]
         del new_stk
         doublons = [i for i in existing_ids if i in new_ids]
@@ -2629,7 +2757,11 @@ def use_infernal(rfam_acc, alignopts):
         p2 = subprocess.run(["esl-alimerge", "-o", path_to_seq_data + f"realigned/{rfam_acc}_merged.stk",
                             "--rna", existing_ali_path, new_ali_path],
                             stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
-         stderr = p1.stderr.decode('utf-8') + p2.stderr.decode('utf-8')
+         alignErrors = p1.stderr.decode('utf-8')
+         mergeErrors = p2.stderr.decode('utf-8')
+         alignErrors = "Alignment: "+ alignErrors if len(alignErrors) else "" 
+         mergeErrors = "Alignment: "+ mergeErrors if len(mergeErrors) else "" 
+         stderr = alignErrors + mergeErrors
         subprocess.run(["mv", path_to_seq_data + f"realigned/{rfam_acc}_merged.stk", existing_ali_path])
         notify("Merged alignments into one")
 
@@ -2642,7 +2774,7 @@ def use_infernal(rfam_acc, alignopts):
         
         cmd = ["cmalign"]
         if alignopts is not None:
-             cmd += " ".split(alignopts)
+             cmd += alignopts
         cmd += ['-o', path_to_seq_data + f"realigned/{rfam_acc}++.stk",
                 "--ifile", path_to_seq_data + f"realigned/{rfam_acc}.ins", 
                 "--sfile", path_to_seq_data + f"realigned/{rfam_acc}.tsv",
@@ -2663,8 +2795,8 @@ def use_infernal(rfam_acc, alignopts):
 
     # Convert Stockholm to aligned FASTA
     subprocess.run(["esl-reformat", "-o", path_to_seq_data + f"realigned/{rfam_acc}++.afa", 
-                         "--informat", "stockholm", 
-                         "afa", path_to_seq_data + f"realigned/{rfam_acc}++.stk"])
+                     "--informat", "stockholm", 
+                     "afa", path_to_seq_data + f"realigned/{rfam_acc}++.stk"])
     subprocess.run(["rm", "-f", "esltmp*"]) # We can use a joker here, because we are not running in parallel for this part.
 
 @trace_unhandled_exceptions
@@ -2708,7 +2840,7 @@ def work_save_pydca(f,alignment):
             warn(e)
 
 @trace_unhandled_exceptions
- def work_pssm_remap(f):
+ def work_pssm_remap(f, useSina=False):
     """Computes Position-Specific-Scoring-Matrices given the multiple sequence alignment of the RNA family.
     This also remaps the 3D object sequence with the aligned sequence in the MSA.
     If asked, the 3D object sequence is completed by the consensus nucleotide when one of them is missing.
@@ -2890,7 +3022,7 @@ def work_pssm_remap(f):
     setproctitle(f"RNAnet.py work_pssm_remap({f}) insert/match states")
 
     # Get back the information of match/insertion states from the STK file
-     if (not use_sina) or (f not in SSU_set and f not in LSU_set):
+     if (not useSina) or (f not in SSU_set and f not in LSU_set):
         alignstk = AlignIO.read(path_to_seq_data + "realigned/" + f + "++.stk", "stockholm")
         consensus_2d = alignstk.column_annotations["secondary_structure"]
         del alignstk
@@ -2936,8 +3068,6 @@ def work_pssm_remap(f):
                           gap_percent, consensus, cons_sec_struct)
                           VALUES (?, 0, 0, NULL, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, '-', NULL);""", data=(f,))
     
-     
-     
     # Save the number of "used columns" to table family ( = the length of the alignment if it was composed only of the RNANet chains)
     sql_execute(conn, f"UPDATE family SET ali_filtered_len = ? WHERE rfam_acc = ?;", data=(len(columns_to_save), f))
     conn.close()
@@ -3021,6 +3151,8 @@ def work_save(c, homology=True):
 
     df.to_csv(filename, float_format="%.2f", index=False)
 
+ # =========================== Main function =============================
+ 
 if __name__ == "__main__":
 
     fileDir = os.path.dirname(os.path.realpath(__file__))
@@ -3068,17 +3200,9 @@ if __name__ == "__main__":
         print(f"Among errors, {len(no_nts_set)} structures seem to contain RNA chains without defined nucleotides:", no_nts_set, flush=True)
     if len(weird_mappings):
         print(f"{len(weird_mappings)} mappings to Rfam were taken as absolute positions instead of residue numbers:", weird_mappings, flush=True)
-     if pp.SELECT_ONLY is None:
+     if pp.HOMOLOGY and pp.SELECT_ONLY is None:
         pp.checkpoint_save_chains()
 
-     if not pp.HOMOLOGY:
-         # Save chains to file
-         for c in pp.loaded_chains:
-             work_save(c, homology=False)
-         print("Completed.")
-         exit(0)
-     
- 
     # At this point, structure, chain and nucleotide tables of the database are up to date.
     # (Modulo some statistics computed by statistics.py)
 
@@ -3086,33 +3210,34 @@ if __name__ == "__main__":
     # Homology information
     # ===========================================================================
 
-     if pp.SELECT_ONLY is None:
-         # If your job failed, you can comment all the "3D information" part and start from here.
-         pp.checkpoint_load_chains()
+     if pp.HOMOLOGY:
+         if pp.SELECT_ONLY is None:
+             # If your job failed, you can comment all the "3D information" part and start from here.
+             pp.checkpoint_load_chains()
 
-     # Get the list of Rfam families found in the update
-     rfam_acc_to_download = {}
-     for c in pp.loaded_chains:
-         if c.mapping.rfam_acc not in rfam_acc_to_download.keys():
-             rfam_acc_to_download[c.mapping.rfam_acc] = [c]
-         else:
-             rfam_acc_to_download[c.mapping.rfam_acc].append(c)
+         # Get the list of Rfam families found in the update
+         rfam_acc_to_download = {}
+         for c in pp.loaded_chains:
+             if c.mapping.rfam_acc not in rfam_acc_to_download.keys():
+                 rfam_acc_to_download[c.mapping.rfam_acc] = [c]
+             else:
+                 rfam_acc_to_download[c.mapping.rfam_acc].append(c)
 
-     print(f"> Identified {len(rfam_acc_to_download.keys())} families to update and re-align with the crystals' sequences")
-     pp.fam_list = sorted(rfam_acc_to_download.keys())
+         print(f"> Identified {len(rfam_acc_to_download.keys())} families to update and re-align with the crystals' sequences")
+         pp.fam_list = sorted(rfam_acc_to_download.keys())
 
-     if len(pp.fam_list):
-         pp.prepare_sequences()
-         pp.realign()
+         if len(pp.fam_list):
+             pp.prepare_sequences()
+             pp.realign()
 
-         # At this point, the family table is almost up to date 
-         # (lacking idty_percent and ali_filtered_length, both set in statistics.py)
+             # At this point, the family table is almost up to date 
+             # (lacking idty_percent and ali_filtered_length, both set in statistics.py)
 
-         thr_idx_mgr = Manager()
-         idxQueue = thr_idx_mgr.Queue()
+             thr_idx_mgr = Manager()
+             idxQueue = thr_idx_mgr.Queue()
 
-         pp.remap()
-         pp.extractCMs()
+             pp.remap()
+             pp.extractCMs()
 
     # At this point, the align_column and re_mapping tables are up-to-date.
 
--- a/doc/Errors.md
View file @ad0e234
+++ b/doc/Errors.md
View file @ad0e234
- 
 # Warnings and errors in RNANet
 
 Use Ctrl + F on this page to look for your error message in the list.
@@ -27,7 +26,7 @@ DSSR complains because the CIF structure does not seem to contain nucleotides. T
 
 * **Error downloading and/or extracting Rfam.cm !** : We cannot retrieve the Rfam covariance models file. RNANet tries to find it at ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/Rfam.cm.gz so, check that your network is not blocking the FTP protocol (port 21 is open on your network), and check that the adress has not changed. If so, contact us so that we update RNANet with the correct address.
 
- * **Something's wrong with the SQL database. Check mysql-rfam-public.ebi.ac.uk status and try again later. Not printing statistics.** : We cannot retrieve family statistics from Rfam public server. Check if you can connect to it by hand : `mysql -u rfamro -P 4497 -D Rfam -h mysql-rfam-public.ebi.ac.uk`. if not, check that the port 497 is opened on your network.
+ * **Something's wrong with the SQL database. Check mysql-rfam-public.ebi.ac.uk status and try again later. Not printing statistics.** : We cannot retrieve family statistics from Rfam public server. Check if you can connect to it by hand : `mysql -u rfamro -P 4497 -D Rfam -h mysql-rfam-public.ebi.ac.uk`. if not, check that the port 4497 is opened on your network.
 
 * **Error downloading RFXXXXX.fa.gz: {custom-error}** : We cannot reach the Rfam FTP server to download homologous sequences. We look in ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/fasta_files/ so, check if you can access it from your network (check that port 21 is opened on your network). Check if the address has changed and notify us.
 
--- a/doc/FAQ.md
View file @ad0e234
+++ b/doc/FAQ.md
View file @ad0e234
@@ -7,6 +7,15 @@ In `cmalign` alignments, - means a nucleotide is missing compared to the covaria
 
 In the final filtered alignment that we provide for download, the same rule applies, but on top of that, some '.' are replaced by '-' when a gap in the 3D structure (a missing, unresolved nucleotide) is mapped to an insertion gap.
 
+ * **What are the cmalign options for ?**
+ 
+ From Infernal's user guide, we can quote that Infernal uses an HMM banding technique to accelerate alignment by default. It also takes care of 3' or 5' truncated sequences to be aligned correctly (and we have some).
+ First, one can choose an algorithm, between `--optacc` (maximizing posterior probabilities, the default) and `--cyk` (maximizing likelihood).
+ 
+ Then, the use of bands allows faster and more memory efficient computation, at the price of the guarantee of determining the optimal alignment. Bands can be disabled using the `--nonbanded` option. A best idea would be to control the threshold of probability mass to be considered negligible during HMM band calculation with the `--tau` parameter. Higher values of Tau yield greater speedups and lower memory usage, but a greater chance to miss the optimal alignment. In practice, the algorithm explores several Tau values (increasing it by a factor 2.0 from the original `--tau` value) until the DP matrix size falls below the threshold given by `--mxsize` (default 1028 Mb) or the value of `--maxtau` is reached (in this case, the program fails). One can disable this exploration with option `--fixedtau`. The default value of `--tau` is 1e-7, the default `--maxtau` is 0.05. Basically, you may decide on a value of `--mxsize` by dividing your available RAM by the number of cores used with cmalign. If necessary, you may use less cores than you have, using option `--cpu`.
+ 
+ Finally, if using `--cyk --nonbanded --notrunc --noprob`, one can use the `--small` option to align using the divide-and-conquer CYK algorithm from Eddy 2002, requiring a very few memory but a lot of time. The major drawback of this is that it requires `--notrunc` and `--noprob`, so we give up on the correct alignment of truncated sequences, and the computation of posterior probabilities.
+ 
 * **Why are there some gap-only columns in the alignment ?**
 
 These columns are not completely gap-only, they contain at least one dash-gap '-'. This means an actual, physical nucleotide which should exist in the 3D structure should be located there. The previous and following nucleotides are **not** contiguous in space in 3D.
@@ -31,5 +40,5 @@ We first remove the nucleotides whose number is outside the family mapping (if a
 
 * **What are the versions of the dependencies you use ?**
 
- `cmalign` is v1.1.4, `sina` is v1.6.0, `x3dna-dssr` is v1.9.9, Biopython is v1.78.
+ `cmalign` is v1.1.4, `sina` is v1.6.0, `x3dna-dssr` is v2.3.2-2021jun29, Biopython is v1.78.
     
\ No newline at end of file
--- a/doc/INSTALL.md
View file @ad0e234
+++ b/doc/INSTALL.md
View file @ad0e234
 
- * [Required computational resources](#required-computational-resources)
+ * [Required hardware resources](#required-computational-resources)
 * [Method 1 : Using Docker](#method-1-:-installation-using-docker)
 * [Method 2 : Classical command-line installation](#method-2-:-classical-command-line-installation-linux-only)
 * [Command options](#command-options)
@@ -7,19 +7,29 @@
 * [Post-computation tasks](#post-computation-tasks-estimate-quality)
 * [Output files](#output-files)
 
- # Required computational resources
- - CPU: no requirements. The program is optimized for multi-core CPUs, you might want to use Intel Xeons, AMD Ryzens, etc.
- - GPU: not required
- - RAM: 16 GB with a large swap partition is okay. 32 GB is recommended (usage peaks at ~27 GB, but this number depends on your number of CPU cores)
- - Storage: to date, it takes 60 GB for the 3D data (36 GB if you don't use the --extract option), 11 GB for the sequence data, and 7GB for the outputs (5.6 GB database, 1 GB archive of CSV files). You need to add a few more for the dependencies. Pick a 100GB partition and you are good to go. The computation speed is way better if you use a fast storage device (e.g. SSD instead of hard drive, or even better, a NVMe SSD) because of constant I/O with the SQlite database.
- - Network : We query the Rfam public MySQL server on port 4497. Make sure your network enables communication (there should not be any issue on private networks, but maybe you company/university closes ports by default). You will get an error message if the port is not open. Around 30 GB of data is downloaded.
+ # Required hardware resources
+ - **CPU**: The program is optimized for highly multi-core CPUs. The more you have, the faster the computation. Ensure you have enough RAM to follow.
+ - **GPU**: not required.
+ - **RAM**: This depends on the usage. 
+ 	- In regular mode, the first computation of alignments requires a huge 100GB. If you do not have them, you might:
+         - either want to use SINA (--sina) instead of Infernal to align the rRNAs. However, all information related to covariance models will not be available for them (distance matrices, 3D-only alignments...)
+         - or customize options --cmalign-opts and --cmalign-rrna-opts with cmalign arguments --cpu (number of cores to use) and --mxsize (max memory to allocate per core), so that it fits your machine. In very hard cases, also increase the parameter --maxtau from 0.05 to 0.1, but this reduces the quality of the alignments.
+     - In regular "update" mode, when the alignments already exists, less RAM is required, 64GB should be fine. If not, use the same options than the first time for your update runs.
+ 	- In 'no homology' mode, just for annotation of the structures without mapping to families, each core can peak to ~3GB (but not all at the same time if you are lucky). Use option --maxcores to reduce the number of cores if you do not have enough RAM. 32GB is fine in most cases. 
+ - **Storage**: to date, it takes 60 GB for the 3D data (36 GB if you don't use the --extract option), 11 GB for the sequence data, and 7GB for the outputs (5.6 GB database, 1 GB archive of CSV files). You need to add a few more for the dependencies. If you compute geometry statistics and parameter distributions, you need to count a 80GB more (permanent) and 100GB more (that will be deleted at the end of the run). So, pick a 250GB partition and you are good to go. The computation speed is much higher if you use a fast storage device (e.g. SSD instead of hard drive, or even better, a NVMe M.2) because of constant I/O with the SQlite database.
+ - **Network** : We query the Rfam public MySQL server on port 4497. Make sure your network enables communication (there should not be any issue on private networks, but your university may close ports by default). You will get an error message if the port is not open. Around 30 GB of data is downloaded.
+ 
+ The IBISC-EvryRNA server example :
+ * Intel Xeon E7-4850 v4 (60 cores, 2.10GHz)
+ * 112 GB of RAM
+ * 250 GB of hard-disk storage
 
 # Method 1 : Installation using Docker
 
- * Step 1 : Download the [Docker container](https://entrepot.ibisc.univ-evry.fr/d/1aff90a9ef214a19b848/files/?p=/rnanet_v1.5b_docker.tar&dl=1). Open a terminal and move to the appropriate directory.
+ * Step 1 : Download the [Docker container](https://entrepot.ibisc.univ-evry.fr/d/1aff90a9ef214a19b848/files/?p=/rnanet_v1.6b_docker.tar&dl=1). Open a terminal and move to the appropriate directory.
 * Step 2 : Extract the archive to a Docker image named *rnanet* in your local installation
 ```
- $ docker load -i rnanet_v1.5b_docker.tar
+ $ docker load -i rnanet_v1.6b_docker.tar
 ```
 * Step 3 : Run the container, giving it 3 folders to mount as volumes: a first to store the 3D data, a second to store the sequence data and alignments, and a third to output the results, data and logs:
 ```
@@ -35,11 +45,11 @@ nohup bash -c 'time docker run --rm -v /path/to/3D/data/folder:/3D -v /path/to/s
 # Method 2 : Classical command line installation (Linux only)
 
 You need to install the dependencies:
- - DSSR, you need to register to the X3DNA forum [here](http://forum.x3dna.org/site-announcements/download-instructions/) and then download the DSSR binary [on that page](http://forum.x3dna.org/downloads/3dna-download/).  Make sure to have the `x3dna-dssr` binary in your $PATH variable so that RNANet.py finds it.
- - Infernal, to download at [Eddylab](http://eddylab.org/infernal/), several options are available depending on your preferences. Make sure to have the `cmalign`, `cmfetch`, `cmbuild`, `esl-alimanip`, `esl-alipid` and `esl-reformat` binaries in your $PATH variable, so that RNANet.py can find them.
- - SINA, follow [these instructions](https://sina.readthedocs.io/en/latest/install.html) for example. Make sure to have the `sina` binary in your $PATH.
+ - DSSR 1.9.9 or newer, you need to register to ask for a DSSR (academic) license [on that page](http://innovation.columbia.edu/technologies/CU20391). Make sure to have the `x3dna-dssr` binary in your $PATH variable so that RNANet.py finds it.
+ - Infernal 1.1.4 or newer, to download at [Eddylab](http://eddylab.org/infernal/), several options are available depending on your preferences. Make sure to have the `cmalign`, `cmfetch`, `cmbuild`, `esl-alimanip`, `esl-alimerge`, `esl-alipid` and `esl-reformat` binaries in your $PATH variable, so that RNANet.py can find them.
+ - SINA (if you plan to use it), follow [these instructions](https://sina.readthedocs.io/en/latest/install.html) for example. Make sure to have the `sina` binary in your $PATH.
 - Sqlite 3, available under the name *sqlite* in every distro's package manager,
- - Python >= 3.8, (Unfortunately, python3.6 is no longer supported, because of changes in the multiprocessing and Threading packages. Untested with Python 3.7.\*)
+ - Python >= 3.8, (Unfortunately, python3.6 is no longer supported, because of changes in the multiprocessing and Threading packages. Untested with Python 3.7.\*). 
 - The following Python packages: `python3.8 -m pip install biopython matplotlib pandas psutil pymysql requests scipy setproctitle sqlalchemy tqdm`. 
 
 Then, run it from the command line, preferably using nohup if your shell will be interrupted:
@@ -57,50 +67,52 @@ nohup bash -c 'time ~/Projects/RNANet/RNAnet.py --3d-folder ~/Data/RNA/3D/ --seq
 The detailed list of options is below:
 
 ```
- -h [ --help ]			Print this help message
- --version			Print the program version
+ -h [ --help ]                   Print this help message
+ --version                       Print the program version
 
 Select what to do:
 --------------------------------------------------------------------------------------------------------------
- -f [ --full-inference ]		Infer new mappings even if Rfam already provides some. Yields more copies of
- 				 chains mapped to different families.
- -s				Run statistics computations after completion
- --stats-opts=…			Pass additional command line options to the statistics.py script, e.g. "--wadley --distance-matrices"
- --extract			Extract the portions of 3D RNA chains to individual mmCIF files.
- --keep-hetatm=False		(True | False) Keep ions, waters and ligands in produced mmCIF files. 
- 				 Does not affect the descriptors.
- --no-homology			Do not try to compute PSSMs and do not align sequences.
- 				 Allows to yield more 3D data (consider chains without a Rfam mapping).
+ -f [ --full-inference ]         Infer new mappings even if Rfam already provides some. Yields more copies of
+                                  chains mapped to different families.
+ -s                              Run statistics computations after completion
+ --stats-opts=…                  Pass additional command line options to the statistics.py script, e.g. "--wadley --distance-matrices"
+ --extract                       Extract the portions of 3D RNA chains to individual mmCIF files.
+ --keep-hetatm=False             (True | False) Keep ions, waters and ligands in produced mmCIF files. 
+                                  Does not affect the descriptors.
+ --no-homology                   Do not try to compute PSSMs and do not align sequences.
+                                  Allows to yield more 3D data (consider chains without a Rfam mapping).
 
 Select how to do it:
 --------------------------------------------------------------------------------------------------------------
- --3d-folder=…			Path to a folder to store the 3D data files. Subfolders will contain:
- 					RNAcifs/		Full structures containing RNA, in mmCIF format
- 					rna_mapped_to_Rfam/	Extracted 'pure' portions of RNA chains mapped to families
- 					rna_only/	Extracted 'pure' RNA chains, not truncated
- 					datapoints/		Final results in CSV file format.
- --seq-folder=…			Path to a folder to store the sequence and alignment files. Subfolders will be:
- 					rfam_sequences/fasta/	Compressed hits to Rfam families
- 					realigned/		Sequences, covariance models, and alignments by family
- --sina				Align large subunit LSU and small subunit SSU ribosomal RNA using SINA instead of Infernal,
- 				 the other RNA families will be aligned using infernal.
- --maxcores=…			Limit the number of cores to use in parallel portions to reduce the simultaneous
- 				 need of RAM. Should be a number between 1 and your number of CPUs. Note that portions
- 				 of the pipeline already limit themselves to 50% or 70% of that number by default.
- --cmalign-opts=…		A string of additional options to pass to cmalign aligner, e.g. "--nonbanded --mxsize 2048"
- --archive			Create tar.gz archives of the datapoints text files and the alignments,
- 				 and update the link to the latest archive. 
- --no-logs			Do not save per-chain logs of the numbering modifications.
+ --3d-folder=…                   Path to a folder to store the 3D data files. Subfolders will contain:
+                                         RNAcifs/                Full structures containing RNA, in mmCIF format
+                                         rna_mapped_to_Rfam/     Extracted 'pure' portions of RNA chains mapped to families
+                                         rna_only/       	Extracted 'pure' RNA chains, not truncated
+                                         datapoints/             Final results in CSV file format.
+ --seq-folder=…                  Path to a folder to store the sequence and alignment files. Subfolders will be:
+                                         rfam_sequences/fasta/   Compressed hits to Rfam families
+                                         realigned/              Sequences, covariance models, and alignments by family
+ --sina                          Align large subunit LSU and small subunit SSU ribosomal RNA using SINA instead of Infernal,
+                                  the other RNA families will be aligned using infernal.
+ --maxcores=…                    Limit the number of cores to use in parallel portions to reduce the simultaneous
+                                  need of RAM. Should be a number between 1 and your number of CPUs. Note that portions
+                                  of the pipeline already limit themselves to 50% or 70% of that number by default.
+ --cmalign-opts=…                A string of additional options to pass to cmalign aligner, e.g. "--nonbanded --mxsize 2048"
+ --cmalign-rrna-opts=…   	Like cmalign-opts, but applied for rRNA (large families, memory-heavy jobs).
+ --archive                       Create tar.gz archives of the datapoints text files and the alignments,
+                                  and update the link to the latest archive. 
+ --no-logs                       Do not save per-chain logs of the numbering modifications.
 
 Select which data we are interested in:
 --------------------------------------------------------------------------------------------------------------
- -r 4.0 [ --resolution=4.0 ]	Maximum 3D structure resolution to consider a RNA chain.
- --all				Process chains even if they already are in the database.
- --redundant			Process all members of the equivalence classes not only the representative.
- --only				Ask to process a specific chains only (e.g. 4v49, 4v49_1_AA, or 4v49_1_AA_5-1523).
- --ignore-issues			Do not ignore already known issues and attempt to compute them.
- --update-homologous		Re-download Rfam and SILVA databases, realign all families, and recompute all CSV files.
- --from-scratch			Delete database, local 3D and sequence files, and known issues, and recompute.
+ -r 4.0 [ --resolution=4.0 ]     Maximum 3D structure resolution to consider a RNA chain.
+ --all                           Process chains even if they already are in the database.
+ --redundant                     Process all members of the equivalence classes not only the representative.
+ --only                          Ask to process a specific chains only (could be 4v49, 4v49_1_AA, or 4v49_1_AA_5-1523).
+ --ignore-issues                 Do not ignore already known issues and attempt to compute them.
+ --update-homologous             Re-download Rfam and SILVA databases, realign all families, and recompute all CSV files.
+ --from-scratch                  Delete database, local 3D and sequence files, and known issues, and recompute.
+ 
 
 ```
 Options --3d-folder and --seq-folder are mandatory for command-line installations, but should not be used for installations with Docker. In the Docker container, they are set by default to the paths you provide with the -v options.
--- a/doc/KnownIssues.md
View file @ad0e234
+++ b/doc/KnownIssues.md
View file @ad0e234
 # Known Issues
 
 ## Annotation and numbering issues
- * Some GDPs that are listed as HETATMs in the mmCIF files are not detected correctly to be real nucleotides. (e.g. 1e8o-E)
+ * [SOLVED] Some GDPs that are listed as HETATMs in the mmCIF files are not detected correctly to be real nucleotides. (e.g. 1e8o-E)
 * Some chains are truncated in different pieces with different chain names. Reason unknown (e.g. 6ztp-AX)
- * Some chains are not correctly renamed A in the produced separate files (e.g. 1d4r-B)
+ * [SOLVED] Some chains are not correctly renamed A in the produced separate files (e.g. 1d4r-B)
 
 ## Alignment issues
- * [SOLVED] Filtered alignments are shorter than the number of alignment columns saved to the SQL table `align_column`
- * Chain names appear in triple in the FASTA header (e.g. 1d4r[1]-B 1d4r[1]-B 1d4r[1]-B)
- 
- ## Technical running issues
- * [SOLVED] Files produced by Docker containers are owned by root and require root permissions to be read 
- * [SOLVED] SQLite WAL files are not deleted properly
+ * [SOLVED] Chain names appear in triple in the FASTA header (e.g. 1d4r[1]-B 1d4r[1]-B 1d4r[1]-B)
 
 # Known feature requests
- * [DONE] Get filtered versions of the sequence alignments containing the 3D chains, publicly available for download
- * [DONE] Get a consensus residue for each alignement column
- * [DONE] Get an option to limit the number of cores 
- * [DONE] Move to SILVA LSU release 138.1
- * [UPCOMING] Automated annotation of detected Recurrent Interaction Networks (RINs), see http://carnaval.lri.fr/ .
- * [UPCOMING] Possibly, automated detection of HLs and ILs from the 3D Motif Atlas (BGSU). Maybe. Their own website already does the job.
- * [UPCOMING] Weight sequences in alignment to give more importance to rarer sequences 
- * [UPCOMING] Give both gap_percent and insertion_gap_percent
+ * Automated annotation of detected Recurrent Interaction Networks (RINs), see http://carnaval.lri.fr/ .
+ * Possibly, automated detection of HLs and ILs from the 3D Motif Atlas (BGSU). Maybe. Their own website already does the job.
+ * Weight sequences in alignment to give more importance to rarer sequences 
+ * Give both gap_percent and insertion_gap_percent
 * A field estimating the quality of the sequence alignment in table family.
 * Possibly, more metrics about the alignments coming from Infernal.
 * Run cmscan ourselves from the NDB instead of using Rfam-PDB mappings ? (Iff this actually makes a real difference, untested yet)
 * Use and save Infernal alignment bounds and truncation information
+ * Save if a chain is a representative or not in BGSU list, so that they can be filtered easily
+ * Annotate unstructured regions (on a nucleotide basis)
+ 
+ ## Technical to-do list
+ * `cmalign --merge` is now deprecated, we use `esl-alimerge` instead. But, esl is a single-core process. We should run the merges of alignements of different families in parallel to save some time [TODO]. 
--- a/geometric_stats.py 0 → 100644
View file @ad0e234
+++ b/geometric_stats.py 0 → 100644
View file @ad0e234
--- a/known_issues.txt
View file @ad0e234
+++ b/known_issues.txt
View file @ad0e234
- 6ydp_1_AA_1176-2737
- 6ydw_1_AA_1176-2737
- 2z9q_1_A_1-72
- 1ml5_1_b_5-121
- 1ml5_1_a_1-2914
- 3ep2_1_Y_1-72
- 3eq3_1_Y_1-72
- 4v48_1_A6_1-73
- 1ml5_1_A_2-1520
- 1qzb_1_B_1-73
- 1qza_1_B_1-73
- 1ls2_1_B_1-73
- 1gsg_1_T_1-72
- 7d1a_1_A_805-902
- 7d0g_1_A_805-913
- 7d0f_1_A_817-913
- 3jcr_1_H_1-115
- 1vy7_1_AY_1-73
- 1vy7_1_CY_1-73
- 4w2h_1_CY_1-73
- 5zzm_1_M_3-118
- 2rdo_1_A_3-118
- 4v48_1_A9_3-118
- 4v47_1_A9_3-118
- 2ob7_1_A_10-319
- 1x1l_1_A_1-130
- 1zc8_1_Z_1-91
- 2ob7_1_D_1-130
- 4v42_1_BA_1-2914
- 4v42_1_BB_5-121
- 1r2x_1_C_1-58
- 1r2w_1_C_1-58
- 1eg0_1_L_1-56
- 3dg2_1_A_1-1542
- 3dg0_1_A_1-1542
- 4v48_1_BA_1-1543
- 4v47_1_BA_1-1542
- 3dg4_1_A_1-1542
- 3dg5_1_A_1-1542
- 5zzm_1_N_1-2903
- 2rdo_1_B_1-2904
- 3dg2_1_B_1-2904
- 3dg0_1_B_1-2904
- 4v48_1_A0_1-2904
- 4v47_1_A0_1-2904
- 3dg4_1_B_1-2904
- 3dg5_1_B_1-2904
- 1eg0_1_O_1-73
- 1zc8_1_A_1-59
- 1jgq_1_A_2-1520
- 4v42_1_AA_2-1520
- 1jgo_1_A_2-1520
- 1jgp_1_A_2-1520
- 1mvr_1_D_1-59
- 4c9d_1_D_29-1
- 4c9d_1_C_29-1
- 4adx_1_9_1-121
- 1zn1_1_B_1-59
- 1emi_1_B_1-108
- 3iy9_1_A_498-1027
- 3ep2_1_B_1-50
- 3eq3_1_B_1-50
- 3eq4_1_B_1-50
- 3pgw_1_R_1-164
- 3pgw_1_N_1-164
- 3cw1_1_x_1-138
- 3cw1_1_w_1-138
- 3cw1_1_V_1-138
- 3cw1_1_v_1-138
- 2iy3_1_B_9-105
- 3jcr_1_N_1-106
- 2vaz_1_A_64-177
- 2ftc_1_R_81-1466
- 3jcr_1_M_1-141
- 4v5z_1_B0_1-2902
- 5g2x_1_A_595-692
- 3iy8_1_A_1-540
- 4v5z_1_BY_2-113
- 4v5z_1_BZ_1-70
- 4v5z_1_B1_2-123
- 1mvr_1_B_1-96
- 4adx_1_0_1-2923
- 3eq4_1_Y_1-69
- 7a5p_1_2_259-449
- 6uz7_1_8_2140-2825
- 4v5z_1_AA_1-1563
 6cfj_1_1X
 6cfj_1_2X
 5hcq_1_1X
@@ -196,7 +110,6 @@
 5lzb_1_V
 6h58_1_W
 6h58_1_WW
- 1eg0_1_O
 5j8b_1_X
 4v7j_1_AV
 4v7j_1_BV
@@ -224,10 +137,6 @@
 7k00_1_B
 6ys3_1_A
 6qdw_1_A
- 5zzm_1_M
- 2rdo_1_A
- 4v48_1_A9
- 4v47_1_A9
 6hcj_1_Q3
 6hcq_1_Q3
 6o8w_1_U
@@ -295,7 +204,12 @@
 6ucq_1_2Y
 4w2e_1_X
 6ucq_1_2X
+ 7n1p_1_DT
+ 7n2u_1_DT
 6yss_1_W
+ 7n30_1_DT
+ 7n31_1_DT
+ 7n2c_1_DT
 5afi_1_Y
 5uq8_1_Z
 5wdt_1_Y
@@ -321,18 +235,20 @@
 4v4i_1_Y
 5uq8_1_X
 5uq7_1_X
- 1jgq_1_A
- 4v42_1_AA
- 1jgo_1_A
- 1jgp_1_A
 4v4j_1_W
 4v4i_1_W
- 4v42_1_BA
 4wt8_1_CS
 4wt8_1_DS
 4v4j_1_X
 4v4i_1_X
- 4v42_1_BB
+ 6lkq_1_S
+ 5h5u_1_H
+ 7d6z_1_F
+ 5lze_1_Y
+ 5lze_1_V
+ 5lze_1_X
+ 3jcj_1_G
+ 6o7k_1_G
 6d30_1_C
 6j7z_1_C
 3er9_1_D
@@ -367,20 +283,11 @@
 4oq9_1_1
 6rt5_1_A
 6rt5_1_E
- 4qu6_1_B
 6lkq_1_T
 6ys3_1_B
 6qdw_1_B
 3jbv_1_B
 3jbu_1_B
- 5zzm_1_N
- 2rdo_1_B
- 3dg2_1_B
- 3dg0_1_B
- 4v48_1_A0
- 4v47_1_A0
- 3dg4_1_B
- 3dg5_1_B
 6do8_1_B
 6dpi_1_B
 6dp9_1_B
@@ -437,25 +344,17 @@
 6doc_1_B
 6doe_1_B
 6n6g_1_D
- 6lkq_1_S
- 5h5u_1_H
- 7d6z_1_F
- 5lze_1_Y
- 5lze_1_V
- 5lze_1_X
- 3jcj_1_G
- 6o7k_1_G
- 3dg2_1_A
- 3dg0_1_A
- 4v48_1_BA
- 4v47_1_BA
- 3dg4_1_A
- 3dg5_1_A
 4b3r_1_W
 4b3t_1_W
 4b3s_1_W
+ 7b5k_1_X
 5o2r_1_X
 5kcs_1_1X
+ 7n1p_1_PT
+ 7n2u_1_PT
+ 7n30_1_PT
+ 7n31_1_PT
+ 7n2c_1_PT
 6zvk_1_E2
 6zvk_1_H2
 7a01_1_E2
@@ -549,15 +448,9 @@
 6xzb_1_G2
 6gz5_1_BW
 6gz3_1_BW
- 1qzb_1_B
- 1qza_1_B
- 1ls2_1_B
- 3ep2_1_Y
- 3eq3_1_Y
- 4v48_1_A6
- 2z9q_1_A
 4hot_1_X
 6d2z_1_C
+ 7eh0_1_I
 4tu0_1_F
 4tu0_1_G
 6r9o_1_B
@@ -572,37 +465,38 @@
 6sv4_1_MB
 7nrd_1_SM
 6i7o_1_MB
- 1gsg_1_T
 6zvi_1_D
 6sv4_1_NB
 6sv4_1_NC
 6i7o_1_NB
- 1ml5_1_A
+ 7nsq_1_V
+ 7nsp_1_V
 6swa_1_Q
 6swa_1_R
- 3j6x_1_IR
- 3j6y_1_IR
 6ole_1_T
 6om0_1_T
 6oli_1_T
 6om7_1_T
 6olf_1_T
 6w6l_1_T
+ 6tnu_1_M
+ 5mc6_1_M
+ 7nrc_1_SM
 6tb3_1_N
 7b7d_1_SM
 7b7d_1_SN
 6tnu_1_N
+ 7nrc_1_SN
 7nrd_1_SN
 6zot_1_C
+ 4qu6_1_B
 2uxb_1_X
 2x1f_1_B
 2x1a_1_B
- 3ep2_1_D
- 3eq3_1_D
- 1eg0_1_M
- 3eq4_1_D
 5o1y_1_B
- 3jcr_1_H
+ 4kzy_1_I
+ 4kzz_1_I
+ 4kzx_1_I
 6dzi_1_H
 5zeu_1_A
 6evj_1_N
@@ -705,7 +599,6 @@
 6ip6_1_ZZ
 6uu3_1_333
 6uu1_1_333
- 1pn8_1_D
 3er8_1_H
 3er8_1_G
 3er8_1_F
@@ -744,9 +637,8 @@
 4wtl_1_T
 4wtl_1_P
 1xnq_1_W
- 1x18_1_C
- 1x18_1_B
- 1x18_1_D
+ 7n2v_1_DT
+ 4peh_1_Z
 1vq6_1_4
 4am3_1_D
 4am3_1_H
@@ -764,6 +656,38 @@
 3rtj_1_D
 6ty9_1_M
 6tz1_1_N
+ 6q1h_1_D
+ 6q1h_1_H
+ 6p7p_1_F
+ 6p7p_1_E
+ 6p7p_1_D
+ 6vm6_1_J
+ 6vm6_1_G
+ 6wan_1_K
+ 6wan_1_H
+ 6wan_1_G
+ 6wan_1_L
+ 6wan_1_I
+ 6ywo_1_F
+ 6wan_1_J
+ 4oau_1_A
+ 6ywo_1_E
+ 6ywo_1_K
+ 6vm6_1_I
+ 6vm6_1_H
+ 6ywo_1_I
+ 2a1r_1_C
+ 6m6v_1_F
+ 6m6v_1_E
+ 2a1r_1_D
+ 3gpq_1_E
+ 3gpq_1_F
+ 6o79_1_C
+ 6vm6_1_K
+ 6m6v_1_G
+ 6hyu_1_D
+ 1laj_1_R
+ 6ybv_1_K
 6sce_1_B
 6xl1_1_C
 6scf_1_I
@@ -809,31 +733,20 @@
 1y1y_1_P
 5zuu_1_I
 5zuu_1_G
+ 7am2_1_R1
 4peh_1_W
 4peh_1_V
 4peh_1_X
 4peh_1_Y
- 4peh_1_Z
+ 7d8c_1_C
 6mkn_1_W
 7kl3_1_B
 4cxg_1_C
 4cxh_1_C
- 1x1l_1_A
- 1zc8_1_Z
- 2ob7_1_D
- 2ob7_1_A
 4eya_1_E
 4eya_1_F
 4eya_1_Q
 4eya_1_R
- 1qzc_1_B
- 1t1o_1_B
- 1mvr_1_C
- 1t1m_1_B
- 1t1o_1_C
- 1t1m_1_A
- 1t1o_1_A
- 2r1g_1_B
 4ht9_1_E
 6z1p_1_AB
 6z1p_1_AA
@@ -844,19 +757,14 @@
 5uk4_1_W
 5uk4_1_U
 5f6c_1_E
+ 7nwh_1_HH
 4rcj_1_B
 1xnr_1_W
- 2agn_1_A
- 2agn_1_C
- 2agn_1_B
 6e0o_1_C
 6o75_1_D
 6o75_1_C
 6e0o_1_B
 3j06_1_R
- 1r2x_1_C
- 1r2w_1_C
- 1eg0_1_L
 4eya_1_G
 4eya_1_H
 4eya_1_S
@@ -866,8 +774,7 @@
 1ibm_1_Z
 4dr5_1_V
 4d61_1_J
- 1trj_1_B
- 1trj_1_C
+ 7nwg_1_Q3
 5tbw_1_SR
 6hhq_1_SR
 6zvi_1_H
@@ -909,14 +816,8 @@
 6ppn_1_I
 5flx_1_Z
 6eri_1_AX
+ 7k5l_1_R
 7d80_1_Y
- 1zc8_1_A
- 1zc8_1_C
- 1zc8_1_B
- 1zc8_1_G
- 1zc8_1_I
- 1zc8_1_H
- 1zc8_1_J
 7du2_1_R
 4v8z_1_CX
 6kqe_1_I
@@ -930,7 +831,6 @@
 4xlr_1_Q
 6sty_1_C
 6sty_1_F
- 2xs5_1_D
 3ok4_1_N
 3ok4_1_L
 3ok4_1_Z
@@ -973,19 +873,17 @@
 3ol7_1_H
 3ol8_1_L
 3ol8_1_P
- 1qzc_1_C
- 1qzc_1_A
 6yrq_1_E
 6yrq_1_H
 6yrq_1_G
 6yrq_1_F
 6yrb_1_C
 6yrb_1_D
- 1mvr_1_D
 6gz5_1_BV
 6gz4_1_BV
 6gz3_1_BV
 6fti_1_Q
+ 7njc_1_B
 4v7e_1_AB
 4v7e_1_AE
 4v7e_1_AD
@@ -997,9 +895,7 @@
 3t1h_1_W
 3t1y_1_W
 1xmo_1_W
- 4adx_1_9
 6kr6_1_B
- 1zn1_1_B
 6z8k_1_X
 4csf_1_U
 4csf_1_Q
@@ -1025,7 +921,6 @@
 2xpj_1_D
 2vrt_1_H
 2vrt_1_G
- 1emi_1_B
 6r9m_1_B
 4nia_1_C
 4nia_1_A
@@ -1051,45 +946,23 @@
 1uvn_1_F
 1uvn_1_B
 1uvn_1_D
- 3iy9_1_A
 4wtk_1_T
 4wtk_1_P
 1vqn_1_4
 4oav_1_C
 4oav_1_A
- 3ep2_1_E
- 3eq3_1_E
- 3eq4_1_E
- 3ep2_1_A
- 3eq3_1_A
- 3eq4_1_A
- 3ep2_1_C
- 3eq3_1_C
- 3eq4_1_C
- 3ep2_1_B
- 3eq3_1_B
- 3eq4_1_B
 4i67_1_B
- 3pgw_1_R
- 3pgw_1_N
- 3cw1_1_X
- 3cw1_1_W
- 3cw1_1_V
- 7b0y_1_A
 6k32_1_T
 6k32_1_P
 5mmj_1_A
 5x8r_1_A
- 2agn_1_E
- 2agn_1_D
- 4v5z_1_BD
 6yw5_1_AA
 6ywe_1_AA
 6ywy_1_AA
 6ywx_1_AA
 3nvk_1_G
 3nvk_1_S
- 2iy3_1_B
+ 1cwp_1_D
 1cwp_1_F
 5z4j_1_B
 5gmf_1_E
@@ -1129,7 +1002,6 @@
 4kzz_1_J
 7a09_1_F
 5t2c_1_AN
- 4v5z_1_BF
 3j6b_1_E
 4v4f_1_B6
 4v4f_1_A5
@@ -1153,21 +1025,21 @@
 4v4f_1_B4
 4v4f_1_A6
 4v4f_1_B2
+ 7m4y_1_V
+ 7m4x_1_V
+ 6v3a_1_V
+ 6v39_1_V
 5it9_1_I
 7jqc_1_I
 5zsb_1_C
 5zsb_1_D
 5zsn_1_D
 5zsn_1_E
- 1cwp_1_D
- 3jcr_1_N
 6gfw_1_R
- 2vaz_1_A
 6zm6_1_X
 6zm5_1_X
 6zm6_1_W
 6zm5_1_W
- 4v5z_1_BP
 6n6e_1_D
 4g7o_1_I
 4g7o_1_S
@@ -1177,11 +1049,9 @@
 5uh6_1_I
 6l74_1_I
 5uh9_1_I
- 2ftc_1_R
 7a5j_1_X
 6sag_1_R
 4udv_1_R
- 2r1g_1_E
 5zsc_1_D
 5zsc_1_C
 6woy_1_I
@@ -1209,7 +1079,6 @@
 3m85_1_X
 3m85_1_Z
 3m85_1_Y
- 1e8s_1_C
 5wnp_1_B
 5wnv_1_B
 5yts_1_B
@@ -1232,8 +1101,11 @@
 6ij2_1_E
 3u2e_1_D
 3u2e_1_C
+ 7eh1_1_I
 5uef_1_C
 5uef_1_D
+ 7eh2_1_R
+ 7eh2_1_I
 4x4u_1_H
 4afy_1_D
 6oy5_1_I
@@ -1249,8 +1121,6 @@
 4k4s_1_H
 4k4t_1_H
 4k4t_1_D
- 1zn1_1_C
- 1zn0_1_C
 1xpu_1_G
 1xpu_1_L
 1xpr_1_L
@@ -1275,6 +1145,7 @@
 6gc5_1_H
 6gc5_1_G
 1n1h_1_B
+ 7n2v_1_PT
 4ohz_1_B
 6t83_1_6B
 4gv6_1_C
@@ -1287,14 +1158,11 @@
 6qx3_1_G
 2xnr_1_C
 4gkj_1_W
- 4v5z_1_BC
 5y88_1_X
- 4v5z_1_BB
 3j0o_1_H
 3j0l_1_H
 3j0p_1_H
 3j0q_1_H
- 4v5z_1_BH
 3j0o_1_F
 3j0l_1_F
 3j0p_1_F
@@ -1309,7 +1177,6 @@
 3j0l_1_A
 3j0q_1_A
 3j0p_1_A
- 4v5z_1_BJ
 6ys3_1_V
 6qdw_1_V
 5hk0_1_F
@@ -1345,14 +1212,10 @@
 5mrc_1_BB
 5mre_1_BB
 5mrf_1_BB
- 4v5z_1_BN
 3j46_1_P
- 3jcr_1_M
 4e6b_1_A
 4e6b_1_B
 6a6l_1_D
- 4v5z_1_BS
- 4v8t_1_1
 1uvi_1_D
 1uvi_1_F
 1uvi_1_E
@@ -1376,10 +1239,7 @@
 6ip5_1_2M
 6ip6_1_2M
 6qcs_1_M
- 486d_1_G
- 2r1g_1_C
- 486d_1_F
- 4v5z_1_B0
+ 7b5k_1_Z
 4nia_1_O
 4nia_1_J
 4nia_1_K
@@ -1391,13 +1251,11 @@
 4oq9_1_F
 4oq9_1_L
 6r9q_1_B
+ 7m4u_1_A
 6v3a_1_SN1
 6v3b_1_SN1
 6v39_1_SN1
 6v3e_1_SN1
- 1pn7_1_C
- 1mj1_1_Q
- 1mj1_1_R
 4dr6_1_V
 6kql_1_I
 4eya_1_M
@@ -1437,13 +1295,20 @@
 6ow3_1_I
 6ovy_1_I
 6oy6_1_I
- 4bbl_1_Y
- 4bbl_1_Z
 4qvd_1_H
 5gxi_1_B
- 3iy8_1_A
- 6tnu_1_M
- 5mc6_1_M
+ 7n06_1_G
+ 7n06_1_H
+ 7n06_1_I
+ 7n06_1_J
+ 7n06_1_K
+ 7n06_1_L
+ 7n33_1_G
+ 7n33_1_H
+ 7n33_1_I
+ 7n33_1_J
+ 7n33_1_K
+ 7n33_1_L
 5mc6_1_N
 4eya_1_O
 4eya_1_P
@@ -1453,33 +1318,13 @@
 6htq_1_W
 6htq_1_U
 6uu6_1_333
- 6v3a_1_V
- 6v39_1_V
 5a0v_1_F
 3avt_1_T
 6d1v_1_C
 4s2x_1_B
 4s2y_1_B
 5wnu_1_B
- 1zc8_1_F
 1vtm_1_R
- 4v5z_1_BA
- 4v5z_1_BE
- 4v5z_1_BG
- 4v5z_1_BI
- 4v5z_1_BK
- 4v5z_1_BM
- 4v5z_1_BL
- 4v5z_1_BV
- 4v5z_1_BO
- 4v5z_1_BQ
- 4v5z_1_BR
- 4v5z_1_BT
- 4v5z_1_BU
- 4v5z_1_BW
- 4v5z_1_BY
- 4v5z_1_BX
- 4v5z_1_BZ
 5elt_1_F
 5elt_1_E
 6xlj_1_R
@@ -1492,11 +1337,11 @@
 6bk8_1_I
 4cxg_1_B
 4cxh_1_B
- 4v5z_1_B1
 5z4d_1_B
 6o78_1_E
 6xa1_1_BV
 6ha8_1_X
+ 2xs5_1_D
 1m8w_1_E
 1m8w_1_F
 5udi_1_B
@@ -1525,11 +1370,13 @@
 3rzo_1_R
 2f4v_1_Z
 1qln_1_R
+ 3cw1_1_X
+ 3cw1_1_W
+ 7b0y_1_A
 6ogy_1_M
 6ogy_1_N
 6uej_1_B
 6ywy_1_BB
- 1x18_1_A
 5ytx_1_B
 4g0a_1_H
 6r9p_1_B
@@ -1559,11 +1406,6 @@
 5lzc_1_W
 5lzb_1_W
 3wzi_1_C
- 1mvr_1_E
- 1mvr_1_B
- 1mvr_1_A
- 4adx_1_0
- 4adx_1_8
 1n33_1_Z
 6dti_1_W
 3d2s_1_F
@@ -1572,12 +1414,7 @@
 5mre_1_AA
 5mrf_1_AA
 7jhy_1_Z
- 2r1g_1_A
- 2r1g_1_D
- 2r1g_1_F
- 3eq4_1_Y
 4wkr_1_C
- 2r1g_1_X
 4v99_1_EC
 4v99_1_AC
 4v99_1_BH
@@ -1647,38 +1484,6 @@
 2xs7_1_B
 1n38_1_B
 4qvc_1_G
- 6q1h_1_D
- 6q1h_1_H
- 6p7p_1_F
- 6p7p_1_E
- 6p7p_1_D
- 6vm6_1_J
- 6vm6_1_G
- 6wan_1_K
- 6wan_1_H
- 6wan_1_G
- 6wan_1_L
- 6wan_1_I
- 6ywo_1_F
- 6wan_1_J
- 4oau_1_A
- 6ywo_1_E
- 6ywo_1_K
- 6vm6_1_I
- 6vm6_1_H
- 6ywo_1_I
- 2a1r_1_C
- 6m6v_1_F
- 6m6v_1_E
- 2a1r_1_D
- 3gpq_1_E
- 3gpq_1_F
- 6o79_1_C
- 6vm6_1_K
- 6m6v_1_G
- 6hyu_1_D
- 1laj_1_R
- 6ybv_1_K
 6mpf_1_W
 6spc_1_A
 6spe_1_A
@@ -1692,43 +1497,36 @@
 4g0a_1_E
 2b2d_1_S
 5hkc_1_C
- 4kzy_1_I
- 4kzz_1_I
- 4kzx_1_I
 1rmv_1_B
 4qu7_1_X
 4qu7_1_V
 4qu7_1_U
- 4v5z_1_AH
- 4v5z_1_AA
- 4v5z_1_AB
- 4v5z_1_AC
- 4v5z_1_AD
- 4v5z_1_AE
- 4v5z_1_AF
- 4v5z_1_AG
 6pmi_1_3
 6pmj_1_3
 5hjz_1_C
- 7nrc_1_SM
- 7nrc_1_SN
- 7am2_1_R1
- 7k5l_1_R
- 7b5k_1_X
- 7d8c_1_C
- 7m4y_1_V
- 7m4x_1_V
- 7b5k_1_Z
- 7m4u_1_A
- 7n06_1_G
- 7n06_1_H
- 7n06_1_I
- 7n06_1_J
- 7n06_1_K
- 7n06_1_L
- 7n33_1_G
- 7n33_1_H
- 7n33_1_I
- 7n33_1_J
- 7n33_1_K
- 7n33_1_L
+ 6ydp_1_AA_1176-2737
+ 6ydw_1_AA_1176-2737
+ 1vy7_1_AY_1-73
+ 1vy7_1_CY_1-73
+ 4w2h_1_CY_1-73
+ 7d1a_1_A_805-902
+ 7d0g_1_A_805-913
+ 7d0f_1_A_817-913
+ 7o7z_1_AH_144-220
+ 4c9d_1_D_29-1
+ 4c9d_1_C_29-1
+ 7aih_1_1_2400-2963
+ 7aih_1_1_2984-3610
+ 7ane_1_2_1904-2468
+ 7ane_1_2_2489-3115
+ 5g2x_1_A_595-692
+ 7aor_1_2_2020-2579
+ 7aor_1_2_2589-3210
+ 7a5p_1_2_259-449
+ 7aor_1_A_2020-2579
+ 7aor_1_A_2589-3210
+ 7am2_1_1_1904-2470
+ 7am2_1_1_2491-3117
+ 7ane_1_1_1904-2468
+ 7ane_1_1_2489-3115
+ 6uz7_1_8_2140-2825
--- a/known_issues_reasons.txt
View file @ad0e234
+++ b/known_issues_reasons.txt
View file @ad0e234
- 6ydp_1_AA_1176-2737
- Could not find nucleotides of chain AA in annotation 6ydp.json. Either there is a problem with 6ydp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 6ydw_1_AA_1176-2737
- Could not find nucleotides of chain AA in annotation 6ydw.json. Either there is a problem with 6ydw mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 2z9q_1_A_1-72
- DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A_1-72.
- 
- 1ml5_1_b_5-121
- Could not find nucleotides of chain b in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1ml5_1_a_1-2914
- Could not find nucleotides of chain a in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 3ep2_1_Y_1-72
- DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_Y_1-72.
- 
- 3eq3_1_Y_1-72
- DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_Y_1-72.
- 
- 4v48_1_A6_1-73
- DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A6_1-73.
- 
- 1ml5_1_A_2-1520
- Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1qzb_1_B_1-73
- DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B_1-73.
- 
- 1qza_1_B_1-73
- DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B_1-73.
- 
- 1ls2_1_B_1-73
- DSSR warning 1ls2.json: no nucleotides found. Ignoring 1ls2_1_B_1-73.
- 
- 1gsg_1_T_1-72
- DSSR warning 1gsg.json: no nucleotides found. Ignoring 1gsg_1_T_1-72.
- 
- 7d1a_1_A_805-902
- Could not find nucleotides of chain A in annotation 7d1a.json. Either there is a problem with 7d1a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 7d0g_1_A_805-913
- Could not find nucleotides of chain A in annotation 7d0g.json. Either there is a problem with 7d0g mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 7d0f_1_A_817-913
- Could not find nucleotides of chain A in annotation 7d0f.json. Either there is a problem with 7d0f mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 3jcr_1_H_1-115
- DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_H_1-115.
- 
- 1vy7_1_AY_1-73
- Sequence is too short. (< 5 resolved nts)
- 
- 1vy7_1_CY_1-73
- Sequence is too short. (< 5 resolved nts)
- 
- 4w2h_1_CY_1-73
- Sequence is too short. (< 5 resolved nts)
- 
- 5zzm_1_M_3-118
- DSSR warning 5zzm.json: no nucleotides found. Ignoring 5zzm_1_M_3-118.
- 
- 2rdo_1_A_3-118
- DSSR warning 2rdo.json: no nucleotides found. Ignoring 2rdo_1_A_3-118.
- 
- 4v48_1_A9_3-118
- DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A9_3-118.
- 
- 4v47_1_A9_3-118
- DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A9_3-118.
- 
- 2ob7_1_A_10-319
- DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_A_10-319.
- 
- 1x1l_1_A_1-130
- DSSR warning 1x1l.json: no nucleotides found. Ignoring 1x1l_1_A_1-130.
- 
- 1zc8_1_Z_1-91
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_Z_1-91.
- 
- 2ob7_1_D_1-130
- DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_D_1-130.
- 
- 4v42_1_BA_1-2914
- Could not find nucleotides of chain BA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 4v42_1_BB_5-121
- Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1r2x_1_C_1-58
- DSSR warning 1r2x.json: no nucleotides found. Ignoring 1r2x_1_C_1-58.
- 
- 1r2w_1_C_1-58
- DSSR warning 1r2w.json: no nucleotides found. Ignoring 1r2w_1_C_1-58.
- 
- 1eg0_1_L_1-56
- DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_L_1-56.
- 
- 3dg2_1_A_1-1542
- DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A_1-1542.
- 
- 3dg0_1_A_1-1542
- DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A_1-1542.
- 
- 4v48_1_BA_1-1543
- DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_BA_1-1543.
- 
- 4v47_1_BA_1-1542
- DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_BA_1-1542.
- 
- 3dg4_1_A_1-1542
- DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A_1-1542.
- 
- 3dg5_1_A_1-1542
- DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A_1-1542.
- 
- 5zzm_1_N_1-2903
- DSSR warning 5zzm.json: no nucleotides found. Ignoring 5zzm_1_N_1-2903.
- 
- 2rdo_1_B_1-2904
- DSSR warning 2rdo.json: no nucleotides found. Ignoring 2rdo_1_B_1-2904.
- 
- 3dg2_1_B_1-2904
- DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_B_1-2904.
- 
- 3dg0_1_B_1-2904
- DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_B_1-2904.
- 
- 4v48_1_A0_1-2904
- DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A0_1-2904.
- 
- 4v47_1_A0_1-2904
- DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A0_1-2904.
- 
- 3dg4_1_B_1-2904
- DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_B_1-2904.
- 
- 3dg5_1_B_1-2904
- DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B_1-2904.
- 
- 1eg0_1_O_1-73
- DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_O_1-73.
- 
- 1zc8_1_A_1-59
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_A_1-59.
- 
- 1jgq_1_A_2-1520
- Could not find nucleotides of chain A in annotation 1jgq.json. Either there is a problem with 1jgq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 4v42_1_AA_2-1520
- Could not find nucleotides of chain AA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1jgo_1_A_2-1520
- Could not find nucleotides of chain A in annotation 1jgo.json. Either there is a problem with 1jgo mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1jgp_1_A_2-1520
- Could not find nucleotides of chain A in annotation 1jgp.json. Either there is a problem with 1jgp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1mvr_1_D_1-59
- DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_D_1-59.
- 
- 4c9d_1_D_29-1
- Mapping is reversed, this case is not supported (yet).
- 
- 4c9d_1_C_29-1
- Mapping is reversed, this case is not supported (yet).
- 
- 4adx_1_9_1-121
- DSSR warning 4adx.json: no nucleotides found. Ignoring 4adx_1_9_1-121.
- 
- 1zn1_1_B_1-59
- DSSR warning 1zn1.json: no nucleotides found. Ignoring 1zn1_1_B_1-59.
- 
- 1emi_1_B_1-108
- DSSR warning 1emi.json: no nucleotides found. Ignoring 1emi_1_B_1-108.
- 
- 3iy9_1_A_498-1027
- DSSR warning 3iy9.json: no nucleotides found. Ignoring 3iy9_1_A_498-1027.
- 
- 3ep2_1_B_1-50
- DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_B_1-50.
- 
- 3eq3_1_B_1-50
- DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_B_1-50.
- 
- 3eq4_1_B_1-50
- DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B_1-50.
- 
- 3pgw_1_R_1-164
- DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R_1-164.
- 
- 3pgw_1_N_1-164
- DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N_1-164.
- 
- 3cw1_1_x_1-138
- DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_x_1-138.
- 
- 3cw1_1_w_1-138
- DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_w_1-138.
- 
- 3cw1_1_V_1-138
- DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V_1-138.
- 
- 3cw1_1_v_1-138
- DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_v_1-138.
- 
- 2iy3_1_B_9-105
- DSSR warning 2iy3.json: no nucleotides found. Ignoring 2iy3_1_B_9-105.
- 
- 3jcr_1_N_1-106
- DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_N_1-106.
- 
- 2vaz_1_A_64-177
- DSSR warning 2vaz.json: no nucleotides found. Ignoring 2vaz_1_A_64-177.
- 
- 2ftc_1_R_81-1466
- DSSR warning 2ftc.json: no nucleotides found. Ignoring 2ftc_1_R_81-1466.
- 
- 3jcr_1_M_1-141
- DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_M_1-141.
- 
- 4v5z_1_B0_1-2902
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B0_1-2902.
- 
- 5g2x_1_A_595-692
- Sequence is too short. (< 5 resolved nts)
- 
- 3iy8_1_A_1-540
- DSSR warning 3iy8.json: no nucleotides found. Ignoring 3iy8_1_A_1-540.
- 
- 4v5z_1_BY_2-113
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BY_2-113.
- 
- 4v5z_1_BZ_1-70
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BZ_1-70.
- 
- 4v5z_1_B1_2-123
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B1_2-123.
- 
- 1mvr_1_B_1-96
- DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_B_1-96.
- 
- 4adx_1_0_1-2923
- DSSR warning 4adx.json: no nucleotides found. Ignoring 4adx_1_0_1-2923.
- 
- 3eq4_1_Y_1-69
- DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_Y_1-69.
- 
- 7a5p_1_2_259-449
- Could not find nucleotides of chain 2 in annotation 7a5p.json. Either there is a problem with 7a5p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 6uz7_1_8_2140-2825
- Could not find nucleotides of chain 8 in annotation 6uz7.json. Either there is a problem with 6uz7 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 4v5z_1_AA_1-1563
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AA_1-1563.
- 
 6cfj_1_1X
 Could not find nucleotides of chain 1X in annotation 6cfj.json. Either there is a problem with 6cfj mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -592,9 +334,6 @@ Could not find nucleotides of chain W in annotation 6h58.json. Either there is a
 6h58_1_WW
 Could not find nucleotides of chain WW in annotation 6h58.json. Either there is a problem with 6h58 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1eg0_1_O
- DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_O.
- 
 5j8b_1_X
 Could not find nucleotides of chain X in annotation 5j8b.json. Either there is a problem with 5j8b mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -676,18 +415,6 @@ Could not find nucleotides of chain A in annotation 6ys3.json. Either there is a
 6qdw_1_A
 Could not find nucleotides of chain A in annotation 6qdw.json. Either there is a problem with 6qdw mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 5zzm_1_M
- DSSR warning 5zzm.json: no nucleotides found. Ignoring 5zzm_1_M.
- 
- 2rdo_1_A
- DSSR warning 2rdo.json: no nucleotides found. Ignoring 2rdo_1_A.
- 
- 4v48_1_A9
- DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A9.
- 
- 4v47_1_A9
- DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A9.
- 
 6hcj_1_Q3
 Could not find nucleotides of chain Q3 in annotation 6hcj.json. Either there is a problem with 6hcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -889,9 +616,24 @@ Could not find nucleotides of chain X in annotation 4w2e.json. Either there is a
 6ucq_1_2X
 Could not find nucleotides of chain 2X in annotation 6ucq.json. Either there is a problem with 6ucq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
+ 7n1p_1_DT
+ Could not find nucleotides of chain DT in annotation 7n1p.json. Either there is a problem with 7n1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7n2u_1_DT
+ Could not find nucleotides of chain DT in annotation 7n2u.json. Either there is a problem with 7n2u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 6yss_1_W
 Could not find nucleotides of chain W in annotation 6yss.json. Either there is a problem with 6yss mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
+ 7n30_1_DT
+ Could not find nucleotides of chain DT in annotation 7n30.json. Either there is a problem with 7n30 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7n31_1_DT
+ Could not find nucleotides of chain DT in annotation 7n31.json. Either there is a problem with 7n31 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7n2c_1_DT
+ Could not find nucleotides of chain DT in annotation 7n2c.json. Either there is a problem with 7n2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 5afi_1_Y
 Could not find nucleotides of chain Y in annotation 5afi.json. Either there is a problem with 5afi mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -967,27 +709,12 @@ Could not find nucleotides of chain X in annotation 5uq8.json. Either there is a
 5uq7_1_X
 Could not find nucleotides of chain X in annotation 5uq7.json. Either there is a problem with 5uq7 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1jgq_1_A
- Could not find nucleotides of chain A in annotation 1jgq.json. Either there is a problem with 1jgq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 4v42_1_AA
- Could not find nucleotides of chain AA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1jgo_1_A
- Could not find nucleotides of chain A in annotation 1jgo.json. Either there is a problem with 1jgo mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1jgp_1_A
- Could not find nucleotides of chain A in annotation 1jgp.json. Either there is a problem with 1jgp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 4v4j_1_W
 Could not find nucleotides of chain W in annotation 4v4j.json. Either there is a problem with 4v4j mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
 4v4i_1_W
 Could not find nucleotides of chain W in annotation 4v4i.json. Either there is a problem with 4v4i mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v42_1_BA
- Could not find nucleotides of chain BA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 4wt8_1_CS
 Could not find nucleotides of chain CS in annotation 4wt8.json. Either there is a problem with 4wt8 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -1000,8 +727,29 @@ Could not find nucleotides of chain X in annotation 4v4j.json. Either there is a
 4v4i_1_X
 Could not find nucleotides of chain X in annotation 4v4i.json. Either there is a problem with 4v4i mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v42_1_BB
- Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 6lkq_1_S
+ Could not find nucleotides of chain S in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 5h5u_1_H
+ Could not find nucleotides of chain H in annotation 5h5u.json. Either there is a problem with 5h5u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7d6z_1_F
+ Could not find nucleotides of chain F in annotation 7d6z.json. Either there is a problem with 7d6z mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 5lze_1_Y
+ Could not find nucleotides of chain Y in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 5lze_1_V
+ Could not find nucleotides of chain V in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 5lze_1_X
+ Could not find nucleotides of chain X in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 3jcj_1_G
+ Could not find nucleotides of chain G in annotation 3jcj.json. Either there is a problem with 3jcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 6o7k_1_G
+ Could not find nucleotides of chain G in annotation 6o7k.json. Either there is a problem with 6o7k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
 6d30_1_C
 Sequence is too short. (< 5 resolved nts)
@@ -1105,9 +853,6 @@ Sequence is too short. (< 5 resolved nts)
 6rt5_1_E
 Sequence is too short. (< 5 resolved nts)
 
- 4qu6_1_B
- Sequence is too short. (< 5 resolved nts)
- 
 6lkq_1_T
 Could not find nucleotides of chain T in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -1123,30 +868,6 @@ Could not find nucleotides of chain B in annotation 3jbv.json. Either there is a
 3jbu_1_B
 Could not find nucleotides of chain B in annotation 3jbu.json. Either there is a problem with 3jbu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 5zzm_1_N
- DSSR warning 5zzm.json: no nucleotides found. Ignoring 5zzm_1_N.
- 
- 2rdo_1_B
- DSSR warning 2rdo.json: no nucleotides found. Ignoring 2rdo_1_B.
- 
- 3dg2_1_B
- DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_B.
- 
- 3dg0_1_B
- DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_B.
- 
- 4v48_1_A0
- DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A0.
- 
- 4v47_1_A0
- DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A0.
- 
- 3dg4_1_B
- DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_B.
- 
- 3dg5_1_B
- DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B.
- 
 6do8_1_B
 Sequence is too short. (< 5 resolved nts)
 
@@ -1315,48 +1036,6 @@ Sequence is too short. (< 5 resolved nts)
 6n6g_1_D
 Sequence is too short. (< 5 resolved nts)
 
- 6lkq_1_S
- Could not find nucleotides of chain S in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 5h5u_1_H
- Could not find nucleotides of chain H in annotation 5h5u.json. Either there is a problem with 5h5u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 7d6z_1_F
- Could not find nucleotides of chain F in annotation 7d6z.json. Either there is a problem with 7d6z mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 5lze_1_Y
- Could not find nucleotides of chain Y in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 5lze_1_V
- Could not find nucleotides of chain V in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 5lze_1_X
- Could not find nucleotides of chain X in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 3jcj_1_G
- Could not find nucleotides of chain G in annotation 3jcj.json. Either there is a problem with 3jcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 6o7k_1_G
- Could not find nucleotides of chain G in annotation 6o7k.json. Either there is a problem with 6o7k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 3dg2_1_A
- DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A.
- 
- 3dg0_1_A
- DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A.
- 
- 4v48_1_BA
- DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_BA.
- 
- 4v47_1_BA
- DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_BA.
- 
- 3dg4_1_A
- DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A.
- 
- 3dg5_1_A
- DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A.
- 
 4b3r_1_W
 Sequence is too short. (< 5 resolved nts)
 
@@ -1366,12 +1045,30 @@ Sequence is too short. (< 5 resolved nts)
 4b3s_1_W
 Sequence is too short. (< 5 resolved nts)
 
+ 7b5k_1_X
+ Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 5o2r_1_X
 Could not find nucleotides of chain X in annotation 5o2r.json. Either there is a problem with 5o2r mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
 5kcs_1_1X
 Could not find nucleotides of chain 1X in annotation 5kcs.json. Either there is a problem with 5kcs mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
+ 7n1p_1_PT
+ Could not find nucleotides of chain PT in annotation 7n1p.json. Either there is a problem with 7n1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7n2u_1_PT
+ Could not find nucleotides of chain PT in annotation 7n2u.json. Either there is a problem with 7n2u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7n30_1_PT
+ Could not find nucleotides of chain PT in annotation 7n30.json. Either there is a problem with 7n30 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7n31_1_PT
+ Could not find nucleotides of chain PT in annotation 7n31.json. Either there is a problem with 7n31 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7n2c_1_PT
+ Could not find nucleotides of chain PT in annotation 7n2c.json. Either there is a problem with 7n2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 6zvk_1_E2
 Could not find nucleotides of chain E2 in annotation 6zvk.json. Either there is a problem with 6zvk mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -1651,33 +1348,15 @@ Could not find nucleotides of chain BW in annotation 6gz5.json. Either there is 
 6gz3_1_BW
 Could not find nucleotides of chain BW in annotation 6gz3.json. Either there is a problem with 6gz3 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1qzb_1_B
- DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B.
- 
- 1qza_1_B
- DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B.
- 
- 1ls2_1_B
- DSSR warning 1ls2.json: no nucleotides found. Ignoring 1ls2_1_B.
- 
- 3ep2_1_Y
- DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_Y.
- 
- 3eq3_1_Y
- DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_Y.
- 
- 4v48_1_A6
- DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A6.
- 
- 2z9q_1_A
- DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A.
- 
 4hot_1_X
 Sequence is too short. (< 5 resolved nts)
 
 6d2z_1_C
 Sequence is too short. (< 5 resolved nts)
 
+ 7eh0_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
 4tu0_1_F
 Sequence is too short. (< 5 resolved nts)
 
@@ -1720,9 +1399,6 @@ Could not find nucleotides of chain SM in annotation 7nrd.json. Either there is 
 6i7o_1_MB
 Could not find nucleotides of chain MB in annotation 6i7o.json. Either there is a problem with 6i7o mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1gsg_1_T
- DSSR warning 1gsg.json: no nucleotides found. Ignoring 1gsg_1_T.
- 
 6zvi_1_D
 Could not find nucleotides of chain D in annotation 6zvi.json. Either there is a problem with 6zvi mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -1735,8 +1411,11 @@ Could not find nucleotides of chain NC in annotation 6sv4.json. Either there is 
 6i7o_1_NB
 Could not find nucleotides of chain NB in annotation 6i7o.json. Either there is a problem with 6i7o mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1ml5_1_A
- Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 7nsq_1_V
+ Could not find nucleotides of chain V in annotation 7nsq.json. Either there is a problem with 7nsq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7nsp_1_V
+ Could not find nucleotides of chain V in annotation 7nsp.json. Either there is a problem with 7nsp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
 6swa_1_Q
 Could not find nucleotides of chain Q in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -1744,12 +1423,6 @@ Could not find nucleotides of chain Q in annotation 6swa.json. Either there is a
 6swa_1_R
 Could not find nucleotides of chain R in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 3j6x_1_IR
- Could not find nucleotides of chain IR in annotation 3j6x.json. Either there is a problem with 3j6x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 3j6y_1_IR
- Could not find nucleotides of chain IR in annotation 3j6y.json. Either there is a problem with 3j6y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 6ole_1_T
 Could not find nucleotides of chain T in annotation 6ole.json. Either there is a problem with 6ole mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -1768,6 +1441,15 @@ Could not find nucleotides of chain T in annotation 6olf.json. Either there is a
 6w6l_1_T
 Could not find nucleotides of chain T in annotation 6w6l.json. Either there is a problem with 6w6l mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
+ 6tnu_1_M
+ Could not find nucleotides of chain M in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 5mc6_1_M
+ Could not find nucleotides of chain M in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7nrc_1_SM
+ Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 6tb3_1_N
 Could not find nucleotides of chain N in annotation 6tb3.json. Either there is a problem with 6tb3 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -1780,12 +1462,18 @@ Could not find nucleotides of chain SN in annotation 7b7d.json. Either there is 
 6tnu_1_N
 Could not find nucleotides of chain N in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
+ 7nrc_1_SN
+ Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 7nrd_1_SN
 Could not find nucleotides of chain SN in annotation 7nrd.json. Either there is a problem with 7nrd mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
 6zot_1_C
 Sequence is too short. (< 5 resolved nts)
 
+ 4qu6_1_B
+ Sequence is too short. (< 5 resolved nts)
+ 
 2uxb_1_X
 Sequence is too short. (< 5 resolved nts)
 
@@ -1795,23 +1483,17 @@ Sequence is too short. (< 5 resolved nts)
 2x1a_1_B
 Sequence is too short. (< 5 resolved nts)
 
- 3ep2_1_D
- DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_D.
- 
- 3eq3_1_D
- DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_D.
- 
- 1eg0_1_M
- DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_M.
- 
- 3eq4_1_D
- DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_D.
- 
 5o1y_1_B
 Sequence is too short. (< 5 resolved nts)
 
- 3jcr_1_H
- DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_H.
+ 4kzy_1_I
+ Could not find nucleotides of chain I in annotation 4kzy.json. Either there is a problem with 4kzy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 4kzz_1_I
+ Could not find nucleotides of chain I in annotation 4kzz.json. Either there is a problem with 4kzz mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 4kzx_1_I
+ Could not find nucleotides of chain I in annotation 4kzx.json. Either there is a problem with 4kzx mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
 6dzi_1_H
 Could not find nucleotides of chain H in annotation 6dzi.json. Either there is a problem with 6dzi mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -2119,9 +1801,6 @@ Sequence is too short. (< 5 resolved nts)
 6uu1_1_333
 Sequence is too short. (< 5 resolved nts)
 
- 1pn8_1_D
- DSSR warning 1pn8.json: no nucleotides found. Ignoring 1pn8_1_D.
- 
 3er8_1_H
 Sequence is too short. (< 5 resolved nts)
 
@@ -2180,7 +1859,7 @@ Sequence is too short. (< 5 resolved nts)
 Sequence is too short. (< 5 resolved nts)
 
 1mvr_1_1
- DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_1.
+ Sequence is too short. (< 5 resolved nts)
 
 6vyt_1_Y
 Sequence is too short. (< 5 resolved nts)
@@ -2236,14 +1915,11 @@ Sequence is too short. (< 5 resolved nts)
 1xnq_1_W
 Sequence is too short. (< 5 resolved nts)
 
- 1x18_1_C
- DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_C.
- 
- 1x18_1_B
- DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_B.
+ 7n2v_1_DT
+ Could not find nucleotides of chain DT in annotation 7n2v.json. Either there is a problem with 7n2v mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1x18_1_D
- DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_D.
+ 4peh_1_Z
+ Sequence is too short. (< 5 resolved nts)
 
 1vq6_1_4
 Sequence is too short. (< 5 resolved nts)
@@ -2296,22 +1972,118 @@ Sequence is too short. (< 5 resolved nts)
 6tz1_1_N
 Sequence is too short. (< 5 resolved nts)
 
- 6sce_1_B
+ 6q1h_1_D
 Sequence is too short. (< 5 resolved nts)
 
- 6xl1_1_C
+ 6q1h_1_H
 Sequence is too short. (< 5 resolved nts)
 
- 6scf_1_I
+ 6p7p_1_F
 Sequence is too short. (< 5 resolved nts)
 
- 6scf_1_K
+ 6p7p_1_E
 Sequence is too short. (< 5 resolved nts)
 
- 6yud_1_K
+ 6p7p_1_D
 Sequence is too short. (< 5 resolved nts)
 
- 6yud_1_O
+ 6vm6_1_J
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6vm6_1_G
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6wan_1_K
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6wan_1_H
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6wan_1_G
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6wan_1_L
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6wan_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6ywo_1_F
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6wan_1_J
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 4oau_1_A
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6ywo_1_E
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6ywo_1_K
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6vm6_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6vm6_1_H
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6ywo_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 2a1r_1_C
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6m6v_1_F
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6m6v_1_E
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 2a1r_1_D
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 3gpq_1_E
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 3gpq_1_F
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6o79_1_C
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6vm6_1_K
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6m6v_1_G
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6hyu_1_D
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 1laj_1_R
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6ybv_1_K
+ Could not find nucleotides of chain K in annotation 6ybv.json. Either there is a problem with 6ybv mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 6sce_1_B
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6xl1_1_C
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6scf_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6scf_1_K
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6yud_1_K
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 6yud_1_O
 Sequence is too short. (< 5 resolved nts)
 
 6scf_1_M
@@ -2423,7 +2195,7 @@ Sequence is too short. (< 5 resolved nts)
 Sequence is too short. (< 5 resolved nts)
 
 1y1y_1_P
- DSSR warning 1y1y.json: no nucleotides found. Ignoring 1y1y_1_P.
+ Sequence is too short. (< 5 resolved nts)
 
 5zuu_1_I
 Sequence is too short. (< 5 resolved nts)
@@ -2431,6 +2203,9 @@ Sequence is too short. (< 5 resolved nts)
 5zuu_1_G
 Sequence is too short. (< 5 resolved nts)
 
+ 7am2_1_R1
+ Sequence is too short. (< 5 resolved nts)
+ 
 4peh_1_W
 Sequence is too short. (< 5 resolved nts)
 
@@ -2443,7 +2218,7 @@ Sequence is too short. (< 5 resolved nts)
 4peh_1_Y
 Sequence is too short. (< 5 resolved nts)
 
- 4peh_1_Z
+ 7d8c_1_C
 Sequence is too short. (< 5 resolved nts)
 
 6mkn_1_W
@@ -2458,18 +2233,6 @@ Could not find nucleotides of chain C in annotation 4cxg.json. Either there is a
 4cxh_1_C
 Could not find nucleotides of chain C in annotation 4cxh.json. Either there is a problem with 4cxh mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1x1l_1_A
- DSSR warning 1x1l.json: no nucleotides found. Ignoring 1x1l_1_A.
- 
- 1zc8_1_Z
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_Z.
- 
- 2ob7_1_D
- DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_D.
- 
- 2ob7_1_A
- DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_A.
- 
 4eya_1_E
 Could not find nucleotides of chain E in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -2482,30 +2245,6 @@ Could not find nucleotides of chain Q in annotation 4eya.json. Either there is a
 4eya_1_R
 Could not find nucleotides of chain R in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1qzc_1_B
- DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_B.
- 
- 1t1o_1_B
- DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_B.
- 
- 1mvr_1_C
- DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_C.
- 
- 1t1m_1_B
- DSSR warning 1t1m.json: no nucleotides found. Ignoring 1t1m_1_B.
- 
- 1t1o_1_C
- DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_C.
- 
- 1t1m_1_A
- DSSR warning 1t1m.json: no nucleotides found. Ignoring 1t1m_1_A.
- 
- 1t1o_1_A
- DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_A.
- 
- 2r1g_1_B
- DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_B.
- 
 4ht9_1_E
 Sequence is too short. (< 5 resolved nts)
 
@@ -2536,21 +2275,15 @@ Could not find nucleotides of chain U in annotation 5uk4.json. Either there is a
 5f6c_1_E
 Sequence is too short. (< 5 resolved nts)
 
+ 7nwh_1_HH
+ Could not find nucleotides of chain HH in annotation 7nwh.json. Either there is a problem with 7nwh mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 4rcj_1_B
 Sequence is too short. (< 5 resolved nts)
 
 1xnr_1_W
 Sequence is too short. (< 5 resolved nts)
 
- 2agn_1_A
- DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_A.
- 
- 2agn_1_C
- DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_C.
- 
- 2agn_1_B
- DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_B.
- 
 6e0o_1_C
 Sequence is too short. (< 5 resolved nts)
 
@@ -2566,15 +2299,6 @@ Sequence is too short. (< 5 resolved nts)
 3j06_1_R
 Sequence is too short. (< 5 resolved nts)
 
- 1r2x_1_C
- DSSR warning 1r2x.json: no nucleotides found. Ignoring 1r2x_1_C.
- 
- 1r2w_1_C
- DSSR warning 1r2w.json: no nucleotides found. Ignoring 1r2w_1_C.
- 
- 1eg0_1_L
- DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_L.
- 
 4eya_1_G
 Could not find nucleotides of chain G in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -2602,11 +2326,8 @@ Sequence is too short. (< 5 resolved nts)
 4d61_1_J
 Could not find nucleotides of chain J in annotation 4d61.json. Either there is a problem with 4d61 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1trj_1_B
- DSSR warning 1trj.json: no nucleotides found. Ignoring 1trj_1_B.
- 
- 1trj_1_C
- DSSR warning 1trj.json: no nucleotides found. Ignoring 1trj_1_C.
+ 7nwg_1_Q3
+ Could not find nucleotides of chain Q3 in annotation 7nwg.json. Either there is a problem with 7nwg mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
 5tbw_1_SR
 Could not find nucleotides of chain SR in annotation 5tbw.json. Either there is a problem with 5tbw mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -2731,30 +2452,12 @@ Could not find nucleotides of chain Z in annotation 5flx.json. Either there is a
 6eri_1_AX
 Could not find nucleotides of chain AX in annotation 6eri.json. Either there is a problem with 6eri mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
+ 7k5l_1_R
+ Sequence is too short. (< 5 resolved nts)
+ 
 7d80_1_Y
 Could not find nucleotides of chain Y in annotation 7d80.json. Either there is a problem with 7d80 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1zc8_1_A
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_A.
- 
- 1zc8_1_C
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_C.
- 
- 1zc8_1_B
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_B.
- 
- 1zc8_1_G
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_G.
- 
- 1zc8_1_I
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_I.
- 
- 1zc8_1_H
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_H.
- 
- 1zc8_1_J
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_J.
- 
 7du2_1_R
 Sequence is too short. (< 5 resolved nts)
 
@@ -2794,9 +2497,6 @@ Sequence is too short. (< 5 resolved nts)
 6sty_1_F
 Sequence is too short. (< 5 resolved nts)
 
- 2xs5_1_D
- Sequence is too short. (< 5 resolved nts)
- 
 3ok4_1_N
 Could not find nucleotides of chain N in annotation 3ok4.json. Either there is a problem with 3ok4 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -2923,12 +2623,6 @@ Sequence is too short. (< 5 resolved nts)
 3ol8_1_P
 Sequence is too short. (< 5 resolved nts)
 
- 1qzc_1_C
- DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_C.
- 
- 1qzc_1_A
- DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_A.
- 
 6yrq_1_E
 Sequence is too short. (< 5 resolved nts)
 
@@ -2947,9 +2641,6 @@ Sequence is too short. (< 5 resolved nts)
 6yrb_1_D
 Sequence is too short. (< 5 resolved nts)
 
- 1mvr_1_D
- DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_D.
- 
 6gz5_1_BV
 Could not find nucleotides of chain BV in annotation 6gz5.json. Either there is a problem with 6gz5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -2962,6 +2653,9 @@ Could not find nucleotides of chain BV in annotation 6gz3.json. Either there is 
 6fti_1_Q
 Could not find nucleotides of chain Q in annotation 6fti.json. Either there is a problem with 6fti mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
+ 7njc_1_B
+ Sequence is too short. (< 5 resolved nts)
+ 
 4v7e_1_AB
 Could not find nucleotides of chain AB in annotation 4v7e.json. Either there is a problem with 4v7e mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -2995,15 +2689,9 @@ Sequence is too short. (< 5 resolved nts)
 1xmo_1_W
 Sequence is too short. (< 5 resolved nts)
 
- 4adx_1_9
- DSSR warning 4adx.json: no nucleotides found. Ignoring 4adx_1_9.
- 
 6kr6_1_B
 Sequence is too short. (< 5 resolved nts)
 
- 1zn1_1_B
- DSSR warning 1zn1.json: no nucleotides found. Ignoring 1zn1_1_B.
- 
 6z8k_1_X
 Sequence is too short. (< 5 resolved nts)
 
@@ -3079,9 +2767,6 @@ Sequence is too short. (< 5 resolved nts)
 2vrt_1_G
 Sequence is too short. (< 5 resolved nts)
 
- 1emi_1_B
- DSSR warning 1emi.json: no nucleotides found. Ignoring 1emi_1_B.
- 
 6r9m_1_B
 Sequence is too short. (< 5 resolved nts)
 
@@ -3157,9 +2842,6 @@ Sequence is too short. (< 5 resolved nts)
 1uvn_1_D
 Sequence is too short. (< 5 resolved nts)
 
- 3iy9_1_A
- DSSR warning 3iy9.json: no nucleotides found. Ignoring 3iy9_1_A.
- 
 4wtk_1_T
 Sequence is too short. (< 5 resolved nts)
 
@@ -3175,63 +2857,9 @@ Sequence is too short. (< 5 resolved nts)
 4oav_1_A
 Sequence is too short. (< 5 resolved nts)
 
- 3ep2_1_E
- DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_E.
- 
- 3eq3_1_E
- DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_E.
- 
- 3eq4_1_E
- DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_E.
- 
- 3ep2_1_A
- DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_A.
- 
- 3eq3_1_A
- DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_A.
- 
- 3eq4_1_A
- DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_A.
- 
- 3ep2_1_C
- DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_C.
- 
- 3eq3_1_C
- DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_C.
- 
- 3eq4_1_C
- DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_C.
- 
- 3ep2_1_B
- DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_B.
- 
- 3eq3_1_B
- DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_B.
- 
- 3eq4_1_B
- DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B.
- 
 4i67_1_B
 Sequence is too short. (< 5 resolved nts)
 
- 3pgw_1_R
- DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R.
- 
- 3pgw_1_N
- DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N.
- 
- 3cw1_1_X
- DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_X.
- 
- 3cw1_1_W
- DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_W.
- 
- 3cw1_1_V
- DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V.
- 
- 7b0y_1_A
- Could not find nucleotides of chain A in annotation 7b0y.json. Either there is a problem with 7b0y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 6k32_1_T
 Could not find nucleotides of chain T in annotation 6k32.json. Either there is a problem with 6k32 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3244,15 +2872,6 @@ Could not find nucleotides of chain A in annotation 5mmj.json. Either there is a
 5x8r_1_A
 Could not find nucleotides of chain A in annotation 5x8r.json. Either there is a problem with 5x8r mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 2agn_1_E
- DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_E.
- 
- 2agn_1_D
- DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_D.
- 
- 4v5z_1_BD
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BD.
- 
 6yw5_1_AA
 Could not find nucleotides of chain AA in annotation 6yw5.json. Either there is a problem with 6yw5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3271,8 +2890,8 @@ Sequence is too short. (< 5 resolved nts)
 3nvk_1_S
 Sequence is too short. (< 5 resolved nts)
 
- 2iy3_1_B
- DSSR warning 2iy3.json: no nucleotides found. Ignoring 2iy3_1_B.
+ 1cwp_1_D
+ Sequence is too short. (< 5 resolved nts)
 
 1cwp_1_F
 Sequence is too short. (< 5 resolved nts)
@@ -3391,9 +3010,6 @@ Could not find nucleotides of chain F in annotation 7a09.json. Either there is a
 5t2c_1_AN
 Could not find nucleotides of chain AN in annotation 5t2c.json. Either there is a problem with 5t2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v5z_1_BF
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BF.
- 
 3j6b_1_E
 Could not find nucleotides of chain E in annotation 3j6b.json. Either there is a problem with 3j6b mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3463,6 +3079,18 @@ Sequence is too short. (< 5 resolved nts)
 4v4f_1_B2
 Sequence is too short. (< 5 resolved nts)
 
+ 7m4y_1_V
+ Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7m4x_1_V
+ Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 6v3a_1_V
+ Could not find nucleotides of chain V in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 6v39_1_V
+ Could not find nucleotides of chain V in annotation 6v39.json. Either there is a problem with 6v39 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 5it9_1_I
 Could not find nucleotides of chain I in annotation 5it9.json. Either there is a problem with 5it9 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3481,18 +3109,9 @@ Sequence is too short. (< 5 resolved nts)
 5zsn_1_E
 Sequence is too short. (< 5 resolved nts)
 
- 1cwp_1_D
- Sequence is too short. (< 5 resolved nts)
- 
- 3jcr_1_N
- DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_N.
- 
 6gfw_1_R
 Sequence is too short. (< 5 resolved nts)
 
- 2vaz_1_A
- DSSR warning 2vaz.json: no nucleotides found. Ignoring 2vaz_1_A.
- 
 6zm6_1_X
 Could not find nucleotides of chain X in annotation 6zm6.json. Either there is a problem with 6zm6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3505,9 +3124,6 @@ Could not find nucleotides of chain W in annotation 6zm6.json. Either there is a
 6zm5_1_W
 Could not find nucleotides of chain W in annotation 6zm5.json. Either there is a problem with 6zm5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v5z_1_BP
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BP.
- 
 6n6e_1_D
 Sequence is too short. (< 5 resolved nts)
 
@@ -3535,9 +3151,6 @@ Sequence is too short. (< 5 resolved nts)
 5uh9_1_I
 Sequence is too short. (< 5 resolved nts)
 
- 2ftc_1_R
- DSSR warning 2ftc.json: no nucleotides found. Ignoring 2ftc_1_R.
- 
 7a5j_1_X
 Could not find nucleotides of chain X in annotation 7a5j.json. Either there is a problem with 7a5j mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3547,9 +3160,6 @@ Sequence is too short. (< 5 resolved nts)
 4udv_1_R
 Sequence is too short. (< 5 resolved nts)
 
- 2r1g_1_E
- DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_E.
- 
 5zsc_1_D
 Sequence is too short. (< 5 resolved nts)
 
@@ -3631,9 +3241,6 @@ Sequence is too short. (< 5 resolved nts)
 3m85_1_Y
 Sequence is too short. (< 5 resolved nts)
 
- 1e8s_1_C
- DSSR warning 1e8s.json: no nucleotides found. Ignoring 1e8s_1_C.
- 
 5wnp_1_B
 Could not find nucleotides of chain B in annotation 5wnp.json. Either there is a problem with 5wnp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3700,12 +3307,21 @@ Sequence is too short. (< 5 resolved nts)
 3u2e_1_C
 Sequence is too short. (< 5 resolved nts)
 
+ 7eh1_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
 5uef_1_C
 Sequence is too short. (< 5 resolved nts)
 
 5uef_1_D
 Sequence is too short. (< 5 resolved nts)
 
+ 7eh2_1_R
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7eh2_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
 4x4u_1_H
 Sequence is too short. (< 5 resolved nts)
 
@@ -3751,12 +3367,6 @@ Sequence is too short. (< 5 resolved nts)
 4k4t_1_D
 Sequence is too short. (< 5 resolved nts)
 
- 1zn1_1_C
- DSSR warning 1zn1.json: no nucleotides found. Ignoring 1zn1_1_C.
- 
- 1zn0_1_C
- DSSR warning 1zn0.json: no nucleotides found. Ignoring 1zn0_1_C.
- 
 1xpu_1_G
 Sequence is too short. (< 5 resolved nts)
 
@@ -3829,6 +3439,9 @@ Sequence is too short. (< 5 resolved nts)
 1n1h_1_B
 Sequence is too short. (< 5 resolved nts)
 
+ 7n2v_1_PT
+ Could not find nucleotides of chain PT in annotation 7n2v.json. Either there is a problem with 7n2v mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 4ohz_1_B
 Sequence is too short. (< 5 resolved nts)
 
@@ -3865,15 +3478,9 @@ Sequence is too short. (< 5 resolved nts)
 4gkj_1_W
 Sequence is too short. (< 5 resolved nts)
 
- 4v5z_1_BC
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BC.
- 
 5y88_1_X
 Could not find nucleotides of chain X in annotation 5y88.json. Either there is a problem with 5y88 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v5z_1_BB
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BB.
- 
 3j0o_1_H
 Could not find nucleotides of chain H in annotation 3j0o.json. Either there is a problem with 3j0o mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3886,9 +3493,6 @@ Could not find nucleotides of chain H in annotation 3j0p.json. Either there is a
 3j0q_1_H
 Could not find nucleotides of chain H in annotation 3j0q.json. Either there is a problem with 3j0q mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v5z_1_BH
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BH.
- 
 3j0o_1_F
 Could not find nucleotides of chain F in annotation 3j0o.json. Either there is a problem with 3j0o mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -3931,9 +3535,6 @@ Could not find nucleotides of chain A in annotation 3j0q.json. Either there is a
 3j0p_1_A
 Could not find nucleotides of chain A in annotation 3j0p.json. Either there is a problem with 3j0p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v5z_1_BJ
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BJ.
- 
 6ys3_1_V
 Could not find nucleotides of chain V in annotation 6ys3.json. Either there is a problem with 6ys3 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -4039,15 +3640,9 @@ Could not find nucleotides of chain BB in annotation 5mre.json. Either there is 
 5mrf_1_BB
 Could not find nucleotides of chain BB in annotation 5mrf.json. Either there is a problem with 5mrf mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v5z_1_BN
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BN.
- 
 3j46_1_P
 Could not find nucleotides of chain P in annotation 3j46.json. Either there is a problem with 3j46 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 3jcr_1_M
- DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_M.
- 
 4e6b_1_A
 Sequence is too short. (< 5 resolved nts)
 
@@ -4057,12 +3652,6 @@ Sequence is too short. (< 5 resolved nts)
 6a6l_1_D
 Sequence is too short. (< 5 resolved nts)
 
- 4v5z_1_BS
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BS.
- 
- 4v8t_1_1
- Could not find nucleotides of chain 1 in annotation 4v8t.json. Either there is a problem with 4v8t mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 1uvi_1_D
 Sequence is too short. (< 5 resolved nts)
 
@@ -4132,17 +3721,8 @@ Could not find nucleotides of chain 2M in annotation 6ip6.json. Either there is 
 6qcs_1_M
 Sequence is too short. (< 5 resolved nts)
 
- 486d_1_G
- Could not find nucleotides of chain G in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 2r1g_1_C
- DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_C.
- 
- 486d_1_F
- Could not find nucleotides of chain F in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 4v5z_1_B0
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B0.
+ 7b5k_1_Z
+ Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
 4nia_1_O
 Could not find nucleotides of chain O in annotation 4nia.json. Either there is a problem with 4nia mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4177,6 +3757,9 @@ Could not find nucleotides of chain L in annotation 4oq9.json. Either there is a
 6r9q_1_B
 Sequence is too short. (< 5 resolved nts)
 
+ 7m4u_1_A
+ Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 6v3a_1_SN1
 Could not find nucleotides of chain SN1 in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -4189,15 +3772,6 @@ Could not find nucleotides of chain SN1 in annotation 6v39.json. Either there is
 6v3e_1_SN1
 Could not find nucleotides of chain SN1 in annotation 6v3e.json. Either there is a problem with 6v3e mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1pn7_1_C
- DSSR warning 1pn7.json: no nucleotides found. Ignoring 1pn7_1_C.
- 
- 1mj1_1_Q
- Could not find nucleotides of chain Q in annotation 1mj1.json. Either there is a problem with 1mj1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 1mj1_1_R
- Could not find nucleotides of chain R in annotation 1mj1.json. Either there is a problem with 1mj1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 4dr6_1_V
 Sequence is too short. (< 5 resolved nts)
 
@@ -4315,26 +3889,47 @@ Sequence is too short. (< 5 resolved nts)
 6oy6_1_I
 Sequence is too short. (< 5 resolved nts)
 
- 4bbl_1_Y
- DSSR warning 4bbl.json: no nucleotides found. Ignoring 4bbl_1_Y.
- 
- 4bbl_1_Z
- DSSR warning 4bbl.json: no nucleotides found. Ignoring 4bbl_1_Z.
- 
 4qvd_1_H
 Sequence is too short. (< 5 resolved nts)
 
 5gxi_1_B
 Sequence is too short. (< 5 resolved nts)
 
- 3iy8_1_A
- DSSR warning 3iy8.json: no nucleotides found. Ignoring 3iy8_1_A.
+ 7n06_1_G
+ Sequence is too short. (< 5 resolved nts)
 
- 6tnu_1_M
- Could not find nucleotides of chain M in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 7n06_1_H
+ Sequence is too short. (< 5 resolved nts)
 
- 5mc6_1_M
- Could not find nucleotides of chain M in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 7n06_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n06_1_J
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n06_1_K
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n06_1_L
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n33_1_G
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n33_1_H
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n33_1_I
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n33_1_J
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n33_1_K
+ Sequence is too short. (< 5 resolved nts)
+ 
+ 7n33_1_L
+ Sequence is too short. (< 5 resolved nts)
 
 5mc6_1_N
 Could not find nucleotides of chain N in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4363,12 +3958,6 @@ Could not find nucleotides of chain U in annotation 6htq.json. Either there is a
 6uu6_1_333
 Sequence is too short. (< 5 resolved nts)
 
- 6v3a_1_V
- Could not find nucleotides of chain V in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 6v39_1_V
- Could not find nucleotides of chain V in annotation 6v39.json. Either there is a problem with 6v39 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 5a0v_1_F
 Sequence is too short. (< 5 resolved nts)
 
@@ -4387,63 +3976,9 @@ Sequence is too short. (< 5 resolved nts)
 5wnu_1_B
 Could not find nucleotides of chain B in annotation 5wnu.json. Either there is a problem with 5wnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1zc8_1_F
- DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_F.
- 
 1vtm_1_R
 Sequence is too short. (< 5 resolved nts)
 
- 4v5z_1_BA
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BA.
- 
- 4v5z_1_BE
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BE.
- 
- 4v5z_1_BG
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BG.
- 
- 4v5z_1_BI
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BI.
- 
- 4v5z_1_BK
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BK.
- 
- 4v5z_1_BM
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BM.
- 
- 4v5z_1_BL
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BL.
- 
- 4v5z_1_BV
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BV.
- 
- 4v5z_1_BO
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BO.
- 
- 4v5z_1_BQ
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BQ.
- 
- 4v5z_1_BR
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BR.
- 
- 4v5z_1_BT
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BT.
- 
- 4v5z_1_BU
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BU.
- 
- 4v5z_1_BW
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BW.
- 
- 4v5z_1_BY
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BY.
- 
- 4v5z_1_BX
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BX.
- 
- 4v5z_1_BZ
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BZ.
- 
 5elt_1_F
 Sequence is too short. (< 5 resolved nts)
 
@@ -4480,9 +4015,6 @@ Could not find nucleotides of chain B in annotation 4cxg.json. Either there is a
 4cxh_1_B
 Could not find nucleotides of chain B in annotation 4cxh.json. Either there is a problem with 4cxh mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 4v5z_1_B1
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B1.
- 
 5z4d_1_B
 Sequence is too short. (< 5 resolved nts)
 
@@ -4495,6 +4027,9 @@ Could not find nucleotides of chain BV in annotation 6xa1.json. Either there is 
 6ha8_1_X
 Could not find nucleotides of chain X in annotation 6ha8.json. Either there is a problem with 6ha8 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
+ 2xs5_1_D
+ Sequence is too short. (< 5 resolved nts)
+ 
 1m8w_1_E
 Could not find nucleotides of chain E in annotation 1m8w.json. Either there is a problem with 1m8w mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -4579,6 +4114,15 @@ Sequence is too short. (< 5 resolved nts)
 1qln_1_R
 Sequence is too short. (< 5 resolved nts)
 
+ 3cw1_1_X
+ Could not find nucleotides of chain X in annotation 3cw1.json. Either there is a problem with 3cw1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 3cw1_1_W
+ Could not find nucleotides of chain W in annotation 3cw1.json. Either there is a problem with 3cw1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7b0y_1_A
+ Could not find nucleotides of chain A in annotation 7b0y.json. Either there is a problem with 7b0y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
 6ogy_1_M
 Sequence is too short. (< 5 resolved nts)
 
@@ -4591,9 +4135,6 @@ Sequence is too short. (< 5 resolved nts)
 6ywy_1_BB
 Could not find nucleotides of chain BB in annotation 6ywy.json. Either there is a problem with 6ywy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 1x18_1_A
- DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_A.
- 
 5ytx_1_B
 Sequence is too short. (< 5 resolved nts)
 
@@ -4681,21 +4222,6 @@ Could not find nucleotides of chain W in annotation 5lzb.json. Either there is a
 3wzi_1_C
 Sequence is too short. (< 5 resolved nts)
 
- 1mvr_1_E
- DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_E.
- 
- 1mvr_1_B
- DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_B.
- 
- 1mvr_1_A
- DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_A.
- 
- 4adx_1_0
- DSSR warning 4adx.json: no nucleotides found. Ignoring 4adx_1_0.
- 
- 4adx_1_8
- DSSR warning 4adx.json: no nucleotides found. Ignoring 4adx_1_8.
- 
 1n33_1_Z
 Sequence is too short. (< 5 resolved nts)
 
@@ -4720,24 +4246,9 @@ Could not find nucleotides of chain AA in annotation 5mrf.json. Either there is 
 7jhy_1_Z
 Could not find nucleotides of chain Z in annotation 7jhy.json. Either there is a problem with 7jhy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 2r1g_1_A
- DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_A.
- 
- 2r1g_1_D
- DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_D.
- 
- 2r1g_1_F
- DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_F.
- 
- 3eq4_1_Y
- DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_Y.
- 
 4wkr_1_C
 Sequence is too short. (< 5 resolved nts)
 
- 2r1g_1_X
- DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_X.
- 
 4v99_1_EC
 Could not find nucleotides of chain EC in annotation 4v99.json. Either there is a problem with 4v99 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
@@ -4945,102 +4456,6 @@ Sequence is too short. (< 5 resolved nts)
 4qvc_1_G
 Sequence is too short. (< 5 resolved nts)
 
- 6q1h_1_D
- Sequence is too short. (< 5 resolved nts)
- 
- 6q1h_1_H
- Sequence is too short. (< 5 resolved nts)
- 
- 6p7p_1_F
- Sequence is too short. (< 5 resolved nts)
- 
- 6p7p_1_E
- Sequence is too short. (< 5 resolved nts)
- 
- 6p7p_1_D
- Sequence is too short. (< 5 resolved nts)
- 
- 6vm6_1_J
- Sequence is too short. (< 5 resolved nts)
- 
- 6vm6_1_G
- Sequence is too short. (< 5 resolved nts)
- 
- 6wan_1_K
- Sequence is too short. (< 5 resolved nts)
- 
- 6wan_1_H
- Sequence is too short. (< 5 resolved nts)
- 
- 6wan_1_G
- Sequence is too short. (< 5 resolved nts)
- 
- 6wan_1_L
- Sequence is too short. (< 5 resolved nts)
- 
- 6wan_1_I
- Sequence is too short. (< 5 resolved nts)
- 
- 6ywo_1_F
- Sequence is too short. (< 5 resolved nts)
- 
- 6wan_1_J
- Sequence is too short. (< 5 resolved nts)
- 
- 4oau_1_A
- Sequence is too short. (< 5 resolved nts)
- 
- 6ywo_1_E
- Sequence is too short. (< 5 resolved nts)
- 
- 6ywo_1_K
- Sequence is too short. (< 5 resolved nts)
- 
- 6vm6_1_I
- Sequence is too short. (< 5 resolved nts)
- 
- 6vm6_1_H
- Sequence is too short. (< 5 resolved nts)
- 
- 6ywo_1_I
- Sequence is too short. (< 5 resolved nts)
- 
- 2a1r_1_C
- Sequence is too short. (< 5 resolved nts)
- 
- 6m6v_1_F
- Sequence is too short. (< 5 resolved nts)
- 
- 6m6v_1_E
- Sequence is too short. (< 5 resolved nts)
- 
- 2a1r_1_D
- Sequence is too short. (< 5 resolved nts)
- 
- 3gpq_1_E
- Sequence is too short. (< 5 resolved nts)
- 
- 3gpq_1_F
- Sequence is too short. (< 5 resolved nts)
- 
- 6o79_1_C
- Sequence is too short. (< 5 resolved nts)
- 
- 6vm6_1_K
- Sequence is too short. (< 5 resolved nts)
- 
- 6m6v_1_G
- Sequence is too short. (< 5 resolved nts)
- 
- 6hyu_1_D
- Sequence is too short. (< 5 resolved nts)
- 
- 1laj_1_R
- Sequence is too short. (< 5 resolved nts)
- 
- 6ybv_1_K
- Could not find nucleotides of chain K in annotation 6ybv.json. Either there is a problem with 6ybv mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 6mpf_1_W
 Sequence is too short. (< 5 resolved nts)
 
@@ -5080,15 +4495,6 @@ Sequence is too short. (< 5 resolved nts)
 5hkc_1_C
 Sequence is too short. (< 5 resolved nts)
 
- 4kzy_1_I
- Could not find nucleotides of chain I in annotation 4kzy.json. Either there is a problem with 4kzy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 4kzz_1_I
- Could not find nucleotides of chain I in annotation 4kzz.json. Either there is a problem with 4kzz mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 4kzx_1_I
- Could not find nucleotides of chain I in annotation 4kzx.json. Either there is a problem with 4kzx mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
 1rmv_1_B
 Sequence is too short. (< 5 resolved nts)
 
@@ -5101,30 +4507,6 @@ Sequence is too short. (< 5 resolved nts)
 4qu7_1_U
 Sequence is too short. (< 5 resolved nts)
 
- 4v5z_1_AH
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AH.
- 
- 4v5z_1_AA
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AA.
- 
- 4v5z_1_AB
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AB.
- 
- 4v5z_1_AC
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AC.
- 
- 4v5z_1_AD
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AD.
- 
- 4v5z_1_AE
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AE.
- 
- 4v5z_1_AF
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AF.
- 
- 4v5z_1_AG
- DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_AG.
- 
 6pmi_1_3
 Sequence is too short. (< 5 resolved nts)
 
@@ -5134,69 +4516,81 @@ Sequence is too short. (< 5 resolved nts)
 5hjz_1_C
 Sequence is too short. (< 5 resolved nts)
 
- 7nrc_1_SM
- Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 6ydp_1_AA_1176-2737
+ Could not find nucleotides of chain AA in annotation 6ydp.json. Either there is a problem with 6ydp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7nrc_1_SN
- Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 6ydw_1_AA_1176-2737
+ Could not find nucleotides of chain AA in annotation 6ydw.json. Either there is a problem with 6ydw mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7am2_1_R1
+ 1vy7_1_AY_1-73
 Sequence is too short. (< 5 resolved nts)
 
- 7k5l_1_R
+ 1vy7_1_CY_1-73
 Sequence is too short. (< 5 resolved nts)
 
- 7b5k_1_X
- Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
- 
- 7d8c_1_C
+ 4w2h_1_CY_1-73
 Sequence is too short. (< 5 resolved nts)
 
- 7m4y_1_V
- Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 7d1a_1_A_805-902
+ Could not find nucleotides of chain A in annotation 7d1a.json. Either there is a problem with 7d1a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7m4x_1_V
- Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 7d0g_1_A_805-913
+ Could not find nucleotides of chain A in annotation 7d0g.json. Either there is a problem with 7d0g mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7b5k_1_Z
- Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 7d0f_1_A_817-913
+ Could not find nucleotides of chain A in annotation 7d0f.json. Either there is a problem with 7d0f mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7m4u_1_A
- Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 7o7z_1_AH_144-220
+ Could not find nucleotides of chain AH in annotation 7o7z.json. Either there is a problem with 7o7z mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n06_1_G
- Sequence is too short. (< 5 resolved nts)
+ 4c9d_1_D_29-1
+ Mapping is reversed, this case is not supported (yet).
 
- 7n06_1_H
- Sequence is too short. (< 5 resolved nts)
+ 4c9d_1_C_29-1
+ Mapping is reversed, this case is not supported (yet).
 
- 7n06_1_I
- Sequence is too short. (< 5 resolved nts)
+ 7aih_1_1_2400-2963
+ Could not find nucleotides of chain 1 in annotation 7aih.json. Either there is a problem with 7aih mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n06_1_J
- Sequence is too short. (< 5 resolved nts)
+ 7aih_1_1_2984-3610
+ Could not find nucleotides of chain 1 in annotation 7aih.json. Either there is a problem with 7aih mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n06_1_K
- Sequence is too short. (< 5 resolved nts)
+ 7ane_1_2_1904-2468
+ Could not find nucleotides of chain 2 in annotation 7ane.json. Either there is a problem with 7ane mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n06_1_L
- Sequence is too short. (< 5 resolved nts)
+ 7ane_1_2_2489-3115
+ Could not find nucleotides of chain 2 in annotation 7ane.json. Either there is a problem with 7ane mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n33_1_G
+ 5g2x_1_A_595-692
 Sequence is too short. (< 5 resolved nts)
 
- 7n33_1_H
- Sequence is too short. (< 5 resolved nts)
+ 7aor_1_2_2020-2579
+ Could not find nucleotides of chain 2 in annotation 7aor.json. Either there is a problem with 7aor mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n33_1_I
- Sequence is too short. (< 5 resolved nts)
+ 7aor_1_2_2589-3210
+ Could not find nucleotides of chain 2 in annotation 7aor.json. Either there is a problem with 7aor mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n33_1_J
- Sequence is too short. (< 5 resolved nts)
+ 7a5p_1_2_259-449
+ Could not find nucleotides of chain 2 in annotation 7a5p.json. Either there is a problem with 7a5p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n33_1_K
- Sequence is too short. (< 5 resolved nts)
+ 7aor_1_A_2020-2579
+ Could not find nucleotides of chain A in annotation 7aor.json. Either there is a problem with 7aor mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
- 7n33_1_L
- Sequence is too short. (< 5 resolved nts)
+ 7aor_1_A_2589-3210
+ Could not find nucleotides of chain A in annotation 7aor.json. Either there is a problem with 7aor mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7am2_1_1_1904-2470
+ Could not find nucleotides of chain 1 in annotation 7am2.json. Either there is a problem with 7am2 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7am2_1_1_2491-3117
+ Could not find nucleotides of chain 1 in annotation 7am2.json. Either there is a problem with 7am2 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7ane_1_1_1904-2468
+ Could not find nucleotides of chain 1 in annotation 7ane.json. Either there is a problem with 7ane mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 7ane_1_1_2489-3115
+ Could not find nucleotides of chain 1 in annotation 7ane.json. Either there is a problem with 7ane mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+ 
+ 6uz7_1_8_2140-2825
+ Could not find nucleotides of chain 8 in annotation 6uz7.json. Either there is a problem with 6uz7 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 
--- a/scripts/automate.sh
View file @ad0e234
+++ b/scripts/automate.sh
View file @ad0e234
@@ -5,7 +5,7 @@ rm -rf latest_run.log errors.txt
 
 # Run RNANet
 bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ -r 20.0 --no-homology --redundant --extract' > latest_run.log 2>&1
- bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ -r 20.0 --redundant --sina --extract -s --stats-opts="--wadley --distance-matrices" --archive' > latest_run.log 2>&1
+ bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ -r 20.0 --redundant --extract -s --stats-opts="-r 20.0 --wadley --hire-rna --distance-matrices" --archive' >> latest_run.log 2>&1
 echo 'Compressing RNANet.db.gz...' >> latest_run.log
 touch results/RNANet.db                                         # update last modification date
 gzip -k /home/lbecquey/Projects/RNANet/results/RNANet.db        # compress it
--- a/scripts/automate_from_scratch.sh 0 → 100755
View file @ad0e234
+++ b/scripts/automate_from_scratch.sh 0 → 100755
View file @ad0e234
+ # This is a script supposed to be run periodically as a cron job
+ # This one uses argument --from-scratch, so all is recomputed ! /!\ 
+ # run it one or twice a year, otherwise, the faster update runs should be enough.
+ 
+ cd /home/lbecquey/Projects/RNANet
+ rm -rf latest_run.log errors.txt known_issues.txt known_issues_reasons.txt
+ 
+ # Run RNANet
+ bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ --from-scratch --ignore-issues -r 20.0 --no-homology --redundant --extract' > latest_run.log 2>&1
+ bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ --from-scratch --ignore-issues -r 20.0 --redundant --extract -s --stats-opts="-r 20.0 --wadley --hire-rna --distance-matrices" --archive' >> latest_run.log 2>&1
+ echo 'Compressing RNANet.db.gz...' >> latest_run.log
+ touch results/RNANet.db                                         # update last modification date
+ gzip -k /home/lbecquey/Projects/RNANet/results/RNANet.db        # compress it
+ rm -f results/RNANet.db-wal results/RNANet.db-shm               # SQLite temporary files
+ 
+ # Save the latest results
+ export DATE=`date +%Y%m%d`
+ echo "Creating new release in ./archive/ folder ($DATE)..." >> latest_run.log
+ cp /home/lbecquey/Projects/RNANet/results/summary.csv /home/lbecquey/Projects/RNANet/archive/summary_latest.csv
+ cp /home/lbecquey/Projects/RNANet/results/summary.csv "/home/lbecquey/Projects/RNANet/archive/summary_$DATE.csv"
+ cp /home/lbecquey/Projects/RNANet/results/families.csv /home/lbecquey/Projects/RNANet/archive/families_latest.csv
+ cp /home/lbecquey/Projects/RNANet/results/families.csv "/home/lbecquey/Projects/RNANet/archive/families_$DATE.csv"
+ cp /home/lbecquey/Projects/RNANet/results/frequencies.csv /home/lbecquey/Projects/RNANet/archive/frequencies_latest.csv
+ cp /home/lbecquey/Projects/RNANet/results/pair_types.csv /home/lbecquey/Projects/RNANet/archive/pair_types_latest.csv
+ mv /home/lbecquey/Projects/RNANet/results/RNANet.db.gz /home/lbecquey/Projects/RNANet/archive/
+ 
+ # Init Seafile synchronization between RNANet library and ./archive/ folder (just the first time !)
+ # seaf-cli sync -l 8e082c6e-b9ed-4b2f-9279-de2177134c57 -s https://entrepot.ibisc.univ-evry.fr -u l****.b*****y@univ-evry.fr -p ****************** -d archive/
+ 
+ # Sync in Seafile
+ seaf-cli start >> latest_run.log 2>&1
+ echo 'Waiting 10m for SeaFile synchronization...' >> latest_run.log
+ sleep 15m
+ echo `seaf-cli status` >> latest_run.log
+ seaf-cli stop >> latest_run.log 2>&1
+ echo 'We are '`date`', update completed.' >> latest_run.log
+ 
--- a/scripts/build_docker_image.sh
View file @ad0e234
+++ b/scripts/build_docker_image.sh
View file @ad0e234
@@ -21,6 +21,6 @@ docker build -t rnanet:latest ..
 rm x3dna-dssr
 
 # to run, use something like:
- # docker run -v /home/persalteas/Data/RNA/3D/:/3D -v /home/persalteas/Data/RNA/sequences/:/sequences -v /home/persalteas/labo/:/runDir persalteas/rnanet [ additional options here ]
+ # docker run -v /home/lbecquey/Data/RNA/3D/:/3D -v /home/lbecquey/Data/RNA/sequences/:/sequences -v /home/lbecquey/labo/:/runDir rnanet [ additional options here ]
 # Without additional options, this runs a standard pass with known issues support, log output, and no statistics. The default resolution threshold is 4.0 Angstroms.
 
--- a/scripts/recompute_family.py
View file @ad0e234
+++ b/scripts/recompute_family.py
View file @ad0e234
@@ -36,6 +36,6 @@ for fam in families:
 
 # Now re run RNANet normally.
 command = ["python3.8", "./RNAnet.py", "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0",
-             "--redundant", "--sina", "--extract", "-s", "--stats-opts=\"--wadley --distance-matrices\""]
+             "--redundant", "--extract", "-s", "--stats-opts=\"-r 20.0 --wadley --hire-rna --distance-matrices\""]
 print(' '.join(command))
 subprocess.run(command)
\ No newline at end of file
--- a/scripts/recompute_some_chains.py
View file @ad0e234
+++ b/scripts/recompute_some_chains.py
View file @ad0e234
@@ -3,8 +3,9 @@ import subprocess, os, sys
 
 # Put a list of problematic chains here, they will be properly deleted and recomputed
 problems = [
-     "1k73_1_A",
-     "1k73_1_B"
+     "7nhm_1_A_1-2923"
+     "4wfa_1_X_1-2923"
+     "4wce_1_X_1-2923"
 ]
 
 # provide the path to your data folders, the RNANet.db file, and the RNANet.py file as arguments to this script
@@ -22,6 +23,7 @@ for p in problems:
 
     # Remove the datapoints files and 3D files
     subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_mapped_to_Rfam/{p}.cif"])
+     subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_only/{p}.cif"])
     files = [ f for f in os.listdir(path_to_3D_data + "/datapoints") if p in f ]
     for f in files:
         subprocess.run(["rm", '-f', path_to_3D_data + f"/datapoints/{f}"])
@@ -38,14 +40,14 @@ for p in problems:
             print(' '.join(command))
             subprocess.run(command)
 
-         command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--extract", "--only", p]
+         command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "--redundant", "-r", "20.0", "--extract", "--only", p]
     else:
         # Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys
         command = ["sqlite3", path_to_db, f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc is null;"]
         print(' '.join(command))
         subprocess.run(command)
 
-         command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--no-homology", "--extract", "--only", p]
+         command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "--redundant", "-r", "20.0", "--no-homology", "--extract", "--only", p]
 
     # Re-run RNANet
     os.chdir(os.path.dirname(os.path.realpath(path_to_db)) + '/../')
--- a/statistics.py
View file @ad0e234
+++ b/statistics.py
View file @ad0e234