Splitted Aglaé's code in a separate stats file

Louis BECQUEY · Louis BECQUEY
Commit 511a1a192f0c3f50c1dbc02732a6df4b1d0eb240 511a1a19 1 parent 672867a8
Showing 7 changed files with 1062 additions and 2517 deletions
RNAnet.py
doc/FAQ.md
doc/KnownIssues.md
geometric_stats.py
known_issues.txt
known_issues_reasons.txt
statistics.py
--- a/RNAnet.py
View file @511a1a1
+++ b/RNAnet.py
View file @511a1a1
@@ -31,7 +31,7 @@ import time
 import traceback
 import warnings
 from functools import partial, wraps
-from multiprocessing import Pool, Manager
+from multiprocessing import Pool, Manager, Value
 from time import sleep
 from tqdm import tqdm
 from setproctitle import setproctitle
@@ -45,6 +45,12 @@ from Bio.PDB.PDBIO import Select
 runDir = os.getcwd()
 def trace_unhandled_exceptions(func):
+    """
+    Captures exceptions even in parallel sections of the code and child processes,
+    and throws logs in red to stderr and to errors.txt.
+
+    Should be defined before the classes that use it.
+    """
     @wraps(func)
     def wrapped_func(*args, **kwargs):
         try:
@@ -60,27 +66,27 @@ def trace_unhandled_exceptions(func):
             print(s)
     return wrapped_func
-
 pd.set_option('display.max_rows', None)
 sqlite3.enable_callback_tracebacks(True)
 sqlite3.register_adapter(np.int64, lambda val: int(val))        # Tell Sqlite what to do with <class numpy.int64> objects ---> convert to int
 sqlite3.register_adapter(np.float64, lambda val: float(val))    # Tell Sqlite what to do with <class numpy.float64> objects ---> convert to float
-m = Manager()
+# m = Manager()
-running_stats = m.list()
+# running_stats = m.list()
-running_stats.append(0)  # n_launched
+# running_stats.append(0)  # n_launched
-running_stats.append(0)  # n_finished
+# running_stats.append(0)  # n_finished
-running_stats.append(0)  # n_skipped
+# running_stats.append(0)  # n_skipped
+n_launched = Value('i', 0)
+n_finished = Value('i', 0)
+n_skipped = Value('i', 0)
 path_to_3D_data = "tobedefinedbyoptions"
 path_to_seq_data = "tobedefinedbyoptions"
 python_executable = "python"+".".join(platform.python_version().split('.')[:2])  # Cuts python3.8.1 into python3.8 for example.
 validsymb = '\U00002705'
 warnsymb = '\U000026A0'
 errsymb = '\U0000274C'
-LSU_set = {"RF00002", "RF02540", "RF02541",
+LSU_set = {"RF00002", "RF02540", "RF02541", "RF02543", "RF02546"}   # From Rfam CLAN 00112
-           "RF02543", "RF02546"}   # From Rfam CLAN 00112
+SSU_set = {"RF00177", "RF02542",  "RF02545", "RF01959", "RF01960"}  # From Rfam CLAN 00111
-SSU_set = {"RF00177", "RF02542",  "RF02545",
-           "RF01959", "RF01960"}  # From Rfam CLAN 00111
 no_nts_set = set()
 weird_mappings = set()
@@ -103,17 +109,15 @@ class MutableFastaIterator(FastaIterator):
                     first_word = title.split(None, 1)[0]
                 except IndexError:
                     assert not title, repr(title)
-                    # Should we use SeqRecord default for no ID?
                     first_word = ""
-                yield SeqRecord(
+                yield SeqRecord(MutableSeq(sequence), id=first_word, name=first_word, description=title)
-                    MutableSeq(sequence), id=first_word, name=first_word, description=title,
-                )
 class SelectivePortionSelector(object):
     """Class passed to MMCIFIO to select some chain portions in an MMCIF file.
     Validates every chain, residue, nucleotide, to say if it is in the selection or not.
+    The primary use is to select the portion of a chain which is mapped to a family.
     """
     def __init__(self, model_id, chain_id, valid_resnums, khetatm):
@@ -156,123 +160,6 @@ class SelectivePortionSelector(object):
         return 1
-_select=Select()
-
-def save_mmcif(ioobj, out_file, select=_select, preserve_atom_numbering=False):
-    # reuse and modification of the source code of Biopython
-    # to have the 2 columns of numbering of residues numbered with the index_chain of DSSR
-    if isinstance(out_file, str):
-        fp = open(out_file, "w")
-        close_file = True
-    else:
-        fp = out_file
-        close_file = False
-    atom_dict = defaultdict(list)
-
-    for model in ioobj.structure.get_list():
-        if not select.accept_model(model):
-            continue
-        # mmCIF files with a single model have it specified as model 1
-        if model.serial_num == 0:
-            model_n = "1"
-        else:
-            model_n = str(model.serial_num)
-        # This is used to write label_entity_id and label_asym_id and
-        # increments from 1, changing with each molecule
-        entity_id = 0
-        if not preserve_atom_numbering:
-            atom_number = 1
-        for chain in model.get_list():
-            if not select.accept_chain(chain):
-                continue
-            chain_id = chain.get_id()
-            if chain_id == " ":
-                chain_id = "."
-            # This is used to write label_seq_id,
-            # remaining blank for hetero residues
-            
-            prev_residue_type = ""
-            prev_resname = ""
-            for residue in chain.get_unpacked_list():
-                if not select.accept_residue(residue):
-                    continue
-                hetfield, resseq, icode = residue.get_id()
-                if hetfield == " ":
-                    residue_type = "ATOM"
-                    label_seq_id = str(resseq)
-                    
-                else:
-                    residue_type = "HETATM"
-                    label_seq_id = "."
-                resseq = str(resseq)
-                if icode == " ":
-                    icode = "?"
-                resname = residue.get_resname()
-                # Check if the molecule changes within the chain
-                # This will always increment for the first residue in a
-                # chain due to the starting values above
-                if residue_type != prev_residue_type or (
-                    residue_type == "HETATM" and resname != prev_resname
-                ):
-                    entity_id += 1
-                prev_residue_type = residue_type
-                prev_resname = resname
-                label_asym_id = ioobj._get_label_asym_id(entity_id)
-                for atom in residue.get_unpacked_list():
-                    if select.accept_atom(atom):
-                        atom_dict["_atom_site.group_PDB"].append(residue_type)
-                        if preserve_atom_numbering:
-                            atom_number = atom.get_serial_number()
-                        atom_dict["_atom_site.id"].append(str(atom_number))
-                        if not preserve_atom_numbering:
-                            atom_number += 1
-                        element = atom.element.strip()
-                        if element == "":
-                            element = "?"
-                        atom_dict["_atom_site.type_symbol"].append(element)
-                        atom_dict["_atom_site.label_atom_id"].append(
-                            atom.get_name().strip()
-                        )
-                        altloc = atom.get_altloc()
-                        if altloc == " ":
-                            altloc = "."
-                        atom_dict["_atom_site.label_alt_id"].append(altloc)
-                        atom_dict["_atom_site.label_comp_id"].append(
-                            resname.strip()
-                        )
-                        atom_dict["_atom_site.label_asym_id"].append(label_asym_id)
-                        # The entity ID should be the same for similar chains
-                        # However this is non-trivial to calculate so we write "?"
-                        atom_dict["_atom_site.label_entity_id"].append("?")
-                        atom_dict["_atom_site.label_seq_id"].append(label_seq_id)
-                        atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode)
-                        coord = atom.get_coord()
-                        atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0])
-                        atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1])
-                        atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2])
-                        atom_dict["_atom_site.occupancy"].append(
-                            str(atom.get_occupancy())
-                        )
-                        atom_dict["_atom_site.B_iso_or_equiv"].append(
-                            str(atom.get_bfactor())
-                        )
-                        atom_dict["_atom_site.auth_seq_id"].append(resseq)
-                        atom_dict["_atom_site.auth_asym_id"].append(chain_id)
-                        atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n)
-
-    # Data block name is the structure ID with special characters removed
-    structure_id = ioobj.structure.id
-    for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]:
-        structure_id = structure_id.replace(c, "")
-    atom_dict["data_"] = structure_id
-
-    # Set the dictionary and write out using the generic dictionary method
-    ioobj.dic = atom_dict
-    ioobj._save_dict(fp)
-    if close_file:
-        fp.close()
-
-
 class Chain:
     """ 
     The object which stores all our data and the methods to process it.
@@ -424,13 +311,11 @@ class Chain:
                         for atom in list(res.get_atoms()):
                             # rename the remaining phosphate group to P, OP1, OP2, OP3
                             if atom.get_name() in ['PA', 'O1A', 'O2A', 'O3A'] and res_name != 'RIA': 
-
+                                # RIA is a residue made up of 2 riboses and 2 phosphates, 
-                            # RIA is a residue made up of 2 riboses and 2 phosphates, 
+                                # so it has an O2A atom between the C2A and C1 'atoms, 
-                            # so it has an O2A atom between the C2A and C1 'atoms, 
+                                # and it also has an OP2 atom attached to one of its phosphates 
-                            # and it also has an OP2 atom attached to one of its phosphates 
+                                # (see chains 6fyx_1_1, 6zu9_1_1, 6fyy_1_1, 6gsm_1_1 , 3jaq_1_1 and 1yfg_1_A)
-                            # (see chains 6fyx_1_1, 6zu9_1_1, 6fyy_1_1, 6gsm_1_1 , 3jaq_1_1 and 1yfg_1_A)
+                                # we do not modify the atom names of RIA residue
-                            # we do not modify the atom names of RIA residue
-
                                 if atom.get_name() == 'PA':
                                     atom_name = 'P'
                                 if atom.get_name() == 'O1A':
@@ -440,7 +325,7 @@ class Chain:
                                 if atom.get_name() == 'O3A':
                                     atom_name = 'OP3'
                                 new_atom_t = pdb.Atom.Atom(atom_name, atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom_name, atom.get_serial_number())
-                            else :
+                            else:
                                 new_atom_t=atom.copy()
                             new_residu_t.add(new_atom_t)
                         new_chain_t.add(new_residu_t)
@@ -787,7 +672,8 @@ class Chain:
         return df
     def register_chain(self, df):
-        """Saves the extracted 3D data to the database.
+        """
+        Saves the extracted 3D data to the database.
         """
         setproctitle(f"RNANet.py {self.chain_label} register_chain()")
@@ -920,6 +806,10 @@ class Monitor:
 class Downloader:
+    """
+    An object with methods to download public data from the internet.
+    """
+
     def download_Rfam_PDB_mappings(self):
         """Query the Rfam public MySQL database for mappings between their RNA families and PDB structures.
@@ -1170,6 +1060,10 @@ class Mapping:
 class Pipeline:
+    """
+    The RNANet pipeline steps.
+    """
+
     def __init__(self):
         self.dl = Downloader()
         self.known_issues = []  # list of chain_labels to ignore
@@ -1189,6 +1083,7 @@ class Pipeline:
         self.REUSE_ALL = False
         self.REDUNDANT = False
         self.ALIGNOPTS = None
+        self.RRNAALIGNOPTS = "--mxsize 8192 --cpu 10 --maxtau 0.1"
         self.STATSOPTS = None
         self.USESINA = False
         self.SELECT_ONLY = None
@@ -1207,7 +1102,7 @@ class Pipeline:
         try:
             opts, _ = getopt.getopt(sys.argv[1:], "r:fhs", ["help", "resolution=", "3d-folder=", "seq-folder=", "keep-hetatm=", 
-                                                            "only=", "cmalign-opts=", "stats-opts=", "maxcores=", "sina", "from-scratch", 
+                                                            "only=", "cmalign-opts=", "cmalign-rrna-opts=", "stats-opts=", "maxcores=", "sina", "from-scratch", 
                                                             "full-inference", "no-homology", "redundant", "ignore-issues", "extract", 
                                                             "all", "no-logs", "archive", "update-homologous", "version"])
         except getopt.GetoptError as err:
@@ -1323,6 +1218,8 @@ class Pipeline:
                 self.REUSE_ALL = True
             elif opt == "cmalign-opts":
                 self.ALIGNOPTS = arg
+            elif opt == "cmalign-rrna-opts":
+                self.RRNAALIGNOPTS = arg
             elif opt == "stats-opts":
                 self.STATSOPTS = " ".split(arg)
             elif opt == "--all":
@@ -1382,7 +1279,7 @@ class Pipeline:
             # If self.FULLINFERENCE is False, the extended list is already filtered to remove
             # the chains which already are in the database.
             print("> Building list of structures...", flush=True)
-            p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores)
+            p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=ncores)
             try:
                 pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1,
@@ -1491,7 +1388,7 @@ class Pipeline:
         else:
             mmcif_list = sorted(set([c.pdb_id for c in self.update]))
         try:
-            p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=int(coeff_ncores*ncores))
+            p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=int(coeff_ncores*ncores))
             pbar = tqdm(mmcif_list, maxinterval=1.0, miniters=1, desc="mmCIF files")
             for _ in p.imap_unordered(work_mmcif, mmcif_list, chunksize=1):
                 pbar.update(1)  # Everytime the iteration finishes, update the global progress bar
@@ -1634,7 +1531,11 @@ class Pipeline:
         joblist = []
         for f in self.fam_list:
             # the function already uses all CPUs so launch them one by one (how_many_in_parallel=1)
-            joblist.append(Job(function=work_realign, args=[self.USESINA, self.ALIGNOPTS, f], how_many_in_parallel=1, label=f))
+            if f in LSU_set or f in SSU_set:
+                opts = self.RRNAALIGNOPTS
+            else:
+                opts = self.ALIGNOPTS
+            joblist.append(Job(function=work_realign, args=[self.USESINA, opts, f], how_many_in_parallel=1, label=f))
         # Execute the jobs
         try:
@@ -1684,7 +1585,7 @@ class Pipeline:
         # Start a process pool to dispatch the RNA families,
         # over multiple CPUs (one family by CPU)
-        p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers)
+        p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=nworkers)
         try:
             fam_pbar = tqdm(total=len(self.fam_list), desc="RNA families", position=0, leave=True)
@@ -1741,7 +1642,7 @@ class Pipeline:
             os.makedirs(path_to_3D_data + "datapoints/")
         # Save to by-chain CSV files
-        p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3)
+        p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=3)
         try:
             pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True)
             for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains)):
@@ -1867,6 +1768,7 @@ class Pipeline:
         conn.close()
+# ==================== General helper functions =====================
 def read_cpu_number():
     """This function reads the number of CPU cores available from /proc/cpuinfo.
@@ -1876,13 +1778,29 @@ def read_cpu_number():
     p = subprocess.run(['grep', '-Ec', '(Intel|AMD)', '/proc/cpuinfo'], stdout=subprocess.PIPE)
     return int(int(p.stdout.decode('utf-8')[:-1])/2)
-def init_worker(tqdm_lock=None):
+def init_with_tqdm(tqdm_lock=None):
+    """
+    This initiation method kills the children when signal is received,
+    and the children progress is followed using TQDM progress bars.
+    """
     signal.signal(signal.SIGINT, signal.SIG_IGN)
     if tqdm_lock is not None:
         tqdm.set_lock(tqdm_lock)
+def init_no_tqdm(arg1, arg2, arg3):
+    """
+    This initiaiton method does not kill the children when signal is received,
+    they will complete and die even after the main process stops.
+    The children progress is followed using stdout text logs (notify(), warn(), etc)
+    """
+    global n_launched, n_finished, n_skipped
+	n_launched = arg1
+	n_finished = arg2
+	n_skipped = arg3
+
 def warn(message, error=False):
-    """Pretty-print warnings and error messages.
+    """
+    Pretty-print warnings and error messages.
     """
     # Cut if too long
     if len(message) > 66:
@@ -1900,20 +1818,133 @@ def warn(message, error=False):
         print(f"\t> \033[33mWARN: {message:64s}\033[0m\t{warnsymb}", flush=True)
 def notify(message, post=''):
+    """
+    Pretty-print successful finished tasks.
+    """
     if len(post):
         post = '(' + post + ')'
     print(f"\t> {message:70s}\t{validsymb}\t{post}", flush=True)
-def _mutable_SeqIO_to_alignment_iterator(handle):
+# ========================= Biopython overloads =====================
-    records = list(MutableFastaIterator(handle))
-    if records:
-        yield MultipleSeqAlignment(records)
-def parse(handle):
+def save_mmcif(ioobj, out_file, select=Select(), preserve_atom_numbering=False):
-    with open(handle, 'r') as fp:
+    """
-        yield from _mutable_SeqIO_to_alignment_iterator(fp)
+    MMCIF writer which renumbers residues according to the RNANet index_chain (coming from DSSR).
+    """
+
+    if isinstance(out_file, str):
+        fp = open(out_file, "w")
+        close_file = True
+    else:
+        fp = out_file
+        close_file = False
+    atom_dict = defaultdict(list)
+
+    # Iterate on models
+    for model in ioobj.structure.get_list():
+        if not select.accept_model(model):
+            continue
+
+        # mmCIF files with a single model have it specified as model 1
+        if model.serial_num == 0:
+            model_n = "1"
+        else:
+            model_n = str(model.serial_num)
+
+        # This is used to write label_entity_id and label_asym_id and
+        # increments from 1, changing with each molecule
+        entity_id = 0
+        if not preserve_atom_numbering:
+            atom_number = 1
+
+        # Iterate on chains
+        for chain in model.get_list():
+            if not select.accept_chain(chain):
+                continue
+            chain_id = chain.get_id()
+            if chain_id == " ":
+                chain_id = "."
+
+            # This is used to write label_seq_id, remaining blank for hetero residues
+            prev_residue_type = ""
+            prev_resname = ""
+
+            # Iterate on residues
+            for residue in chain.get_unpacked_list():
+                if not select.accept_residue(residue):
+                    continue
+                hetfield, resseq, icode = residue.get_id()
+                if hetfield == " ":
+                    residue_type = "ATOM"
+                    label_seq_id = str(resseq)
+                else:
+                    residue_type = "HETATM"
+                    label_seq_id = "."
+                resseq = str(resseq)
+                if icode == " ":
+                    icode = "?"
+                resname = residue.get_resname()
+
+                # Check if the molecule changes within the chain.
+                # This will always increment for the first residue in a
+                # chain due to the starting values above
+                if residue_type != prev_residue_type or (residue_type == "HETATM" and resname != prev_resname):
+                    entity_id += 1
+                prev_residue_type = residue_type
+                prev_resname = resname
+                label_asym_id = ioobj._get_label_asym_id(entity_id)
+
+                # Iterate on atoms
+                for atom in residue.get_unpacked_list():
+                    if select.accept_atom(atom):
+                        atom_dict["_atom_site.group_PDB"].append(residue_type)
+                        if preserve_atom_numbering:
+                            atom_number = atom.get_serial_number()
+                        atom_dict["_atom_site.id"].append(str(atom_number))
+                        if not preserve_atom_numbering:
+                            atom_number += 1
+                        element = atom.element.strip()
+                        if element == "":
+                            element = "?"
+                        atom_dict["_atom_site.type_symbol"].append(element)
+                        atom_dict["_atom_site.label_atom_id"].append(atom.get_name().strip())
+                        altloc = atom.get_altloc()
+                        if altloc == " ":
+                            altloc = "."
+                        atom_dict["_atom_site.label_alt_id"].append(altloc)
+                        atom_dict["_atom_site.label_comp_id"].append(resname.strip())
+                        atom_dict["_atom_site.label_asym_id"].append(label_asym_id)
+                        # The entity ID should be the same for similar chains
+                        # However this is non-trivial to calculate so we write "?"
+                        atom_dict["_atom_site.label_entity_id"].append("?")
+                        atom_dict["_atom_site.label_seq_id"].append(label_seq_id)
+                        atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode)
+                        coord = atom.get_coord()
+                        atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0])
+                        atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1])
+                        atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2])
+                        atom_dict["_atom_site.occupancy"].append(str(atom.get_occupancy()))
+                        atom_dict["_atom_site.B_iso_or_equiv"].append(str(atom.get_bfactor())                        )
+                        atom_dict["_atom_site.auth_seq_id"].append(resseq)
+                        atom_dict["_atom_site.auth_asym_id"].append(chain_id)
+                        atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n)
+
+    # Data block name is the structure ID with special characters removed
+    structure_id = ioobj.structure.id
+    for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]:
+        structure_id = structure_id.replace(c, "")
+    atom_dict["data_"] = structure_id
+
+    # Set the dictionary and write out using the generic dictionary method
+    ioobj.dic = atom_dict
+    ioobj._save_dict(fp)
+    if close_file:
+        fp.close()
 def read(handle):
+    """
+    A shortcut to parse alignment files with our custom class MutableFastaIterator.
+    """
     iterator = parse(handle)
     try:
         alignment = next(iterator)
@@ -1926,6 +1957,25 @@ def read(handle):
         pass
     return alignment
+def parse(handle):
+    """
+    A shortcut to parse alignment files with our custom class MutableFastaIterator.
+    Called by function read().
+    """
+    with open(handle, 'r') as fp:
+        yield from _mutable_SeqIO_to_alignment_iterator(fp)
+
+def _mutable_SeqIO_to_alignment_iterator(handle):
+    """
+    A shortcut to parse alignment files with our custom class MutableFastaIterator.
+    Used by the parse() function.
+    """
+    records = list(MutableFastaIterator(handle))
+    if records:
+        yield MultipleSeqAlignment(records)
+
+# ========================== SQL related ============================
+
 def sql_define_tables(conn):
     conn.executescript(
         """ PRAGMA foreign_keys = on;
@@ -2085,12 +2135,19 @@ def sql_execute(conn, sql, many=False, data=None, warn_every=10):
             time.sleep(0.2)
     warn("Tried to reach database 100 times and failed. Aborting.", error=True)
+# ======================= RNANet Jobs and tasks ======================
+
 @trace_unhandled_exceptions
 def execute_job(j, jobcount):
-    """Run a Job object.
     """
+    Run a Job object.
+    """
+
+    global n_launched, n_skipped, n_finished
+
     # increase the counter of running jobs
-    running_stats[0] += 1
+    with n_launched.get_lock():
+		n_launched.value += 1
     # Monitor this process
     m = -1
@@ -2098,7 +2155,7 @@ def execute_job(j, jobcount):
     if len(j.cmd_):  # The job is a system command
-        print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.label}")
+        print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.label}")
         # Add the command to logfile
         os.makedirs(runDir+"/logs", exist_ok=True)
@@ -2114,9 +2171,20 @@ def execute_job(j, jobcount):
             # run the command. subprocess.run will be a child of this process, and stays monitored.
             start_time = time.time()
-            r = subprocess.run(j.cmd_, timeout=j.timeout_,
+            r = subprocess.run(j.cmd_, timeout=j.timeout_, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
             end_time = time.time()
+            if r.returncode != 0:
+                if r.stderr is not None:
+                    print(r.stderr, flush=True)
+                print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\tIssue faced with {j.label}, skipping it and adding it to known issues (if not known).")
+                with n_launched.get_lock():
+                    n_launched.value -= 1
+                with n_skipped.get_lock():
+                    n_skipped.value += 1
+                if j.label not in issues:
+                    issues.add(j.label)
+                    with open("known_issues.txt", "a") as iss:
+                        iss.write(j.label+"\n")
             # Stop the Monitor, then get its result
             monitor.keep_watching = False
@@ -2124,7 +2192,7 @@ def execute_job(j, jobcount):
     elif j.func_ is not None:
-        print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if type(a) != list])})", flush=True)
+        print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if type(a) != list])})", flush=True)
         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
             # put the monitor in a different thread
@@ -2193,7 +2261,7 @@ def execute_joblist(fulljoblist):
             print("using", n, "processes:")
             # execute jobs of priority i that should be processed n by n:
-            p = Pool(processes=n, maxtasksperchild=1, initializer=init_worker)
+            p = Pool(processes=n, maxtasksperchild=1, initializer=init_no_tqdm, initargs=(n_launched, n_finished, n_skipped))
             try:
                 raw_results = p.map(partial(execute_job, jobcount=jobcount), bunch, chunksize=2)
                 p.close()
@@ -2207,7 +2275,11 @@ def execute_joblist(fulljoblist):
             for j, r in zip(bunch, raw_results):
                 j.comp_time = round(r[0], 2)  # seconds
                 j.max_mem = int(r[1]/1000000)  # MB
-                results.append((j.label, r[2], round(r[0], 2), int(r[1]/1000000)))
+                results.append((j.label, r[2], j.comp_time, j.max_mem))
+    
+    # Job is finished
+	with n_finished.get_lock():
+		n_finished.value += 1
     # throw back the money
     return results
@@ -2679,8 +2751,8 @@ def use_infernal(rfam_acc, alignopts):
     # Convert Stockholm to aligned FASTA
     subprocess.run(["esl-reformat", "-o", path_to_seq_data + f"realigned/{rfam_acc}++.afa", 
-                        "--informat", "stockholm", 
+                    "--informat", "stockholm", 
-                        "afa", path_to_seq_data + f"realigned/{rfam_acc}++.stk"])
+                    "afa", path_to_seq_data + f"realigned/{rfam_acc}++.stk"])
     subprocess.run(["rm", "-f", "esltmp*"]) # We can use a joker here, because we are not running in parallel for this part.
 @trace_unhandled_exceptions
@@ -3037,6 +3109,8 @@ def work_save(c, homology=True):
     df.to_csv(filename, float_format="%.2f", index=False)
+# =========================== Main function =============================
+
 if __name__ == "__main__":
     fileDir = os.path.dirname(os.path.realpath(__file__))
--- a/doc/FAQ.md
View file @511a1a1
+++ b/doc/FAQ.md
View file @511a1a1
@@ -7,6 +7,15 @@ In `cmalign` alignments, - means a nucleotide is missing compared to the covaria
 In the final filtered alignment that we provide for download, the same rule applies, but on top of that, some '.' are replaced by '-' when a gap in the 3D structure (a missing, unresolved nucleotide) is mapped to an insertion gap.
+* **What are the cmalign options for ?**
+
+From Infernal's user guide, we can quote that Infernal uses an HMM banding technique to accelerate alignment by default. It also takes care of 3' or 5' truncated sequences to be aligned correctly (and we have some).
+First, one can choose an algorithm, between `--optacc` (maximizing posterior probabilities, the default) and `--cyk` (maximizing likelihood).
+
+Then, the use of bands allows faster and more memory efficient computation, at the price of the guarantee of determining the optimal alignment. Bands can be disabled using the `--nonbanded` option. A best idea would be to control the threshold of probability mass to be considered negligible during HMM band calculation with the `--tau` parameter. Higher values of Tau yield greater speedups and lower memory usage, but a greater chance to miss the optimal alignment. In practice, the algorithm explores several Tau values (increasing it by a factor 2.0 from the original `--tau` value) until the DP matrix size falls below the threshold given by `--mxsize` (default 1028 Mb) or the value of `--maxtau` is reached (in this case, the program fails). One can disable this exploration with option `--fixedtau`. The default value of `--tau` is 1e-7, the default `--maxtau` is 0.05. Basically, you may decide on a value of `--mxsize` by dividing your available RAM by the number of cores used with cmalign. If necessary, you may use less cores than you have, using option `--cpu`.
+
+Finally, if using `--cyk --nonbanded --notrunc --noprob`, one can use the `--small` option to align using the divide-and-conquer CYK algorithm from Eddy 2002, requiring a very few memory but a lot of time. The major drawback of this is that it requires `--notrunc` and `--noprob`, so we give up on the correct alignment of truncated sequences, and the computation of posterior probabilities.
+
 * **Why are there some gap-only columns in the alignment ?**
 These columns are not completely gap-only, they contain at least one dash-gap '-'. This means an actual, physical nucleotide which should exist in the 3D structure should be located there. The previous and following nucleotides are **not** contiguous in space in 3D.
@@ -31,5 +40,5 @@ We first remove the nucleotides whose number is outside the family mapping (if a
 * **What are the versions of the dependencies you use ?**
-`cmalign` is v1.1.4, `sina` is v1.6.0, `x3dna-dssr` is v1.9.9, Biopython is v1.78.
+`cmalign` is v1.1.4, `sina` is v1.6.0, `x3dna-dssr` is v2.3.2-2021jun29, Biopython is v1.78.
\ No newline at end of file
--- a/doc/KnownIssues.md
View file @511a1a1
+++ b/doc/KnownIssues.md
View file @511a1a1
@@ -6,23 +6,16 @@
 * Some chains are not correctly renamed A in the produced separate files (e.g. 1d4r-B)
 ## Alignment issues
-* [SOLVED] Filtered alignments are shorter than the number of alignment columns saved to the SQL table `align_column`
 * Chain names appear in triple in the FASTA header (e.g. 1d4r[1]-B 1d4r[1]-B 1d4r[1]-B)
-## Technical running issues
-* [SOLVED] Files produced by Docker containers are owned by root and require root permissions to be read 
-* [SOLVED] SQLite WAL files are not deleted properly
-
 # Known feature requests
-* [DONE] Get filtered versions of the sequence alignments containing the 3D chains, publicly available for download
+* Automated annotation of detected Recurrent Interaction Networks (RINs), see http://carnaval.lri.fr/ .
-* [DONE] Get a consensus residue for each alignement column
+* Possibly, automated detection of HLs and ILs from the 3D Motif Atlas (BGSU). Maybe. Their own website already does the job.
-* [DONE] Get an option to limit the number of cores 
+* Weight sequences in alignment to give more importance to rarer sequences 
-* [DONE] Move to SILVA LSU release 138.1
+* Give both gap_percent and insertion_gap_percent
-* [UPCOMING] Automated annotation of detected Recurrent Interaction Networks (RINs), see http://carnaval.lri.fr/ .
-* [UPCOMING] Possibly, automated detection of HLs and ILs from the 3D Motif Atlas (BGSU). Maybe. Their own website already does the job.
-* [UPCOMING] Weight sequences in alignment to give more importance to rarer sequences 
-* [UPCOMING] Give both gap_percent and insertion_gap_percent
 * A field estimating the quality of the sequence alignment in table family.
 * Possibly, more metrics about the alignments coming from Infernal.
 * Run cmscan ourselves from the NDB instead of using Rfam-PDB mappings ? (Iff this actually makes a real difference, untested yet)
 * Use and save Infernal alignment bounds and truncation information
+* Save if a chain is a representative in BGSU list
+* Annotate unstructured regions (on a nucleotide basis)
--- a/geometric_stats.py 0 → 100644
View file @511a1a1
+++ b/geometric_stats.py 0 → 100644
View file @511a1a1
--- a/known_issues.txt
View file @511a1a1
+++ b/known_issues.txt
View file @511a1a1
 6ydp_1_AA_1176-2737
 6ydw_1_AA_1176-2737
 2z9q_1_A_1-72
-1ml5_1_b_5-121
-1ml5_1_a_1-2914
 3ep2_1_Y_1-72
 3eq3_1_Y_1-72
 4v48_1_A6_1-73
-1ml5_1_A_2-1520
+1ml5_1_b_5-121
+1ml5_1_a_1-2914
 1qzb_1_B_1-73
 1qza_1_B_1-73
 1ls2_1_B_1-73
+1ml5_1_A_2-1520
 1gsg_1_T_1-72
 7d1a_1_A_805-902
 7d0g_1_A_805-913
@@ -22,15 +22,12 @@
 2rdo_1_A_3-118
 4v48_1_A9_3-118
 4v47_1_A9_3-118
+4v42_1_BA_1-2914
+4v42_1_BB_5-121
 2ob7_1_A_10-319
 1x1l_1_A_1-130
 1zc8_1_Z_1-91
 2ob7_1_D_1-130
-4v42_1_BA_1-2914
-4v42_1_BB_5-121
-1r2x_1_C_1-58
-1r2w_1_C_1-58
-1eg0_1_L_1-56
 3dg2_1_A_1-1542
 3dg0_1_A_1-1542
 4v48_1_BA_1-1543
@@ -46,11 +43,14 @@
 3dg4_1_B_1-2904
 3dg5_1_B_1-2904
 1eg0_1_O_1-73
-1zc8_1_A_1-59
+1r2x_1_C_1-58
+1r2w_1_C_1-58
+1eg0_1_L_1-56
 1jgq_1_A_2-1520
 4v42_1_AA_2-1520
 1jgo_1_A_2-1520
 1jgp_1_A_2-1520
+1zc8_1_A_1-59
 1mvr_1_D_1-59
 4c9d_1_D_29-1
 4c9d_1_C_29-1
@@ -61,12 +61,6 @@
 3ep2_1_B_1-50
 3eq3_1_B_1-50
 3eq4_1_B_1-50
-3pgw_1_R_1-164
-3pgw_1_N_1-164
-3cw1_1_x_1-138
-3cw1_1_w_1-138
-3cw1_1_V_1-138
-3cw1_1_v_1-138
 2iy3_1_B_9-105
 3jcr_1_N_1-106
 2vaz_1_A_64-177
@@ -78,6 +72,12 @@
 4v5z_1_BY_2-113
 4v5z_1_BZ_1-70
 4v5z_1_B1_2-123
+3pgw_1_R_1-164
+3pgw_1_N_1-164
+3cw1_1_x_1-138
+3cw1_1_w_1-138
+3cw1_1_V_1-138
+3cw1_1_v_1-138
 1mvr_1_B_1-96
 4adx_1_0_1-2923
 3eq4_1_Y_1-69
@@ -295,7 +295,12 @@
 6ucq_1_2Y
 4w2e_1_X
 6ucq_1_2X
+7n1p_1_DT
+7n2u_1_DT
 6yss_1_W
+7n30_1_DT
+7n31_1_DT
+7n2c_1_DT
 5afi_1_Y
 5uq8_1_Z
 5wdt_1_Y
@@ -333,6 +338,22 @@
 4v4j_1_X
 4v4i_1_X
 4v42_1_BB
+4jrc_1_B
+4jrc_1_A
+6lkq_1_S
+5h5u_1_H
+7d6z_1_F
+5lze_1_Y
+5lze_1_V
+5lze_1_X
+3jcj_1_G
+6o7k_1_G
+3dg2_1_A
+3dg0_1_A
+4v48_1_BA
+4v47_1_BA
+3dg4_1_A
+3dg5_1_A
 6d30_1_C
 6j7z_1_C
 3er9_1_D
@@ -437,25 +458,22 @@
 6doc_1_B
 6doe_1_B
 6n6g_1_D
-6lkq_1_S
-5h5u_1_H
-7d6z_1_F
-5lze_1_Y
-5lze_1_V
-5lze_1_X
-3jcj_1_G
-6o7k_1_G
-3dg2_1_A
-3dg0_1_A
-4v48_1_BA
-4v47_1_BA
-3dg4_1_A
-3dg5_1_A
 4b3r_1_W
 4b3t_1_W
 4b3s_1_W
+7b5k_1_X
 5o2r_1_X
 5kcs_1_1X
+7n1p_1_PT
+7n2u_1_PT
+7n30_1_PT
+7n31_1_PT
+7n2c_1_PT
+6yl5_1_I
+6yl5_1_E
+6yl5_1_A
+6yl5_1_K
+6yl5_1_G
 6zvk_1_E2
 6zvk_1_H2
 7a01_1_E2
@@ -526,6 +544,7 @@
 6w6l_1_V
 6olf_1_V
 3erc_1_G
+4qjd_1_D
 6of1_1_1W
 6cae_1_1Y
 6o97_1_1W
@@ -557,7 +576,9 @@
 4v48_1_A6
 2z9q_1_A
 4hot_1_X
+5ns4_1_C
 6d2z_1_C
+7eh0_1_I
 4tu0_1_F
 4tu0_1_G
 6r9o_1_B
@@ -578,20 +599,23 @@
 6sv4_1_NC
 6i7o_1_NB
 1ml5_1_A
+7nsq_1_V
 6swa_1_Q
 6swa_1_R
-3j6x_1_IR
-3j6y_1_IR
 6ole_1_T
 6om0_1_T
 6oli_1_T
 6om7_1_T
 6olf_1_T
 6w6l_1_T
+6tnu_1_M
+5mc6_1_M
+7nrc_1_SM
 6tb3_1_N
 7b7d_1_SM
 7b7d_1_SN
 6tnu_1_N
+7nrc_1_SN
 7nrd_1_SN
 6zot_1_C
 2uxb_1_X
@@ -602,6 +626,9 @@
 1eg0_1_M
 3eq4_1_D
 5o1y_1_B
+4kzy_1_I
+4kzz_1_I
+4kzx_1_I
 3jcr_1_H
 6dzi_1_H
 5zeu_1_A
@@ -705,7 +732,6 @@
 6ip6_1_ZZ
 6uu3_1_333
 6uu1_1_333
-1pn8_1_D
 3er8_1_H
 3er8_1_G
 3er8_1_F
@@ -744,9 +770,8 @@
 4wtl_1_T
 4wtl_1_P
 1xnq_1_W
-1x18_1_C
+7n2v_1_DT
-1x18_1_B
+4peh_1_Z
-1x18_1_D
 1vq6_1_4
 4am3_1_D
 4am3_1_H
@@ -758,12 +783,45 @@
 4wtj_1_T
 4wtj_1_P
 4xbf_1_D
+5w1h_1_B
 6n6d_1_D
 6n6k_1_C
 6n6k_1_D
 3rtj_1_D
 6ty9_1_M
 6tz1_1_N
+6q1h_1_D
+6q1h_1_H
+6p7p_1_F
+6p7p_1_E
+6p7p_1_D
+6vm6_1_J
+6vm6_1_G
+6wan_1_K
+6wan_1_H
+6wan_1_G
+6wan_1_L
+6wan_1_I
+6ywo_1_F
+6wan_1_J
+4oau_1_A
+6ywo_1_E
+6ywo_1_K
+6vm6_1_I
+6vm6_1_H
+6ywo_1_I
+2a1r_1_C
+6m6v_1_F
+6m6v_1_E
+2a1r_1_D
+3gpq_1_E
+3gpq_1_F
+6o79_1_C
+6vm6_1_K
+6m6v_1_G
+6hyu_1_D
+1laj_1_R
+6ybv_1_K
 6sce_1_B
 6xl1_1_C
 6scf_1_I
@@ -809,11 +867,12 @@
 1y1y_1_P
 5zuu_1_I
 5zuu_1_G
+7am2_1_R1
 4peh_1_W
 4peh_1_V
 4peh_1_X
 4peh_1_Y
-4peh_1_Z
+7d8c_1_C
 6mkn_1_W
 7kl3_1_B
 4cxg_1_C
@@ -826,14 +885,7 @@
 4eya_1_F
 4eya_1_Q
 4eya_1_R
-1qzc_1_B
-1t1o_1_B
 1mvr_1_C
-1t1m_1_B
-1t1o_1_C
-1t1m_1_A
-1t1o_1_A
-2r1g_1_B
 4ht9_1_E
 6z1p_1_AB
 6z1p_1_AA
@@ -844,11 +896,9 @@
 5uk4_1_W
 5uk4_1_U
 5f6c_1_E
+7nwh_1_HH
 4rcj_1_B
 1xnr_1_W
-2agn_1_A
-2agn_1_C
-2agn_1_B
 6e0o_1_C
 6o75_1_D
 6o75_1_C
@@ -866,8 +916,7 @@
 1ibm_1_Z
 4dr5_1_V
 4d61_1_J
-1trj_1_B
+7nwg_1_Q3
-1trj_1_C
 5tbw_1_SR
 6hhq_1_SR
 6zvi_1_H
@@ -883,6 +932,8 @@
 5k8h_1_A
 5z4a_1_B
 3jbu_1_V
+4ts2_1_Y
+4ts0_1_Y
 1h2c_1_R
 1h2d_1_S
 1h2d_1_R
@@ -909,6 +960,7 @@
 6ppn_1_I
 5flx_1_Z
 6eri_1_AX
+7k5l_1_R
 7d80_1_Y
 1zc8_1_A
 1zc8_1_C
@@ -916,6 +968,7 @@
 1zc8_1_G
 1zc8_1_I
 1zc8_1_H
+6bfb_1_Y
 1zc8_1_J
 7du2_1_R
 4v8z_1_CX
@@ -951,6 +1004,8 @@
 4x9e_1_H
 6z1p_1_BB
 6z1p_1_BA
+3p22_1_C
+3p22_1_G
 2uxd_1_X
 6ywe_1_BB
 3ol9_1_D
@@ -973,8 +1028,6 @@
 3ol7_1_H
 3ol8_1_L
 3ol8_1_P
-1qzc_1_C
-1qzc_1_A
 6yrq_1_E
 6yrq_1_H
 6yrq_1_G
@@ -1054,6 +1107,7 @@
 3iy9_1_A
 4wtk_1_T
 4wtk_1_P
+6wlj_3_A
 1vqn_1_4
 4oav_1_C
 4oav_1_A
@@ -1070,18 +1124,13 @@
 3eq3_1_B
 3eq4_1_B
 4i67_1_B
-3pgw_1_R
+4jf2_1_A
-3pgw_1_N
-3cw1_1_X
-3cw1_1_W
-3cw1_1_V
-7b0y_1_A
 6k32_1_T
 6k32_1_P
 5mmj_1_A
 5x8r_1_A
-2agn_1_E
+3fu2_1_B
-2agn_1_D
+3fu2_1_A
 4v5z_1_BD
 6yw5_1_AA
 6ywe_1_AA
@@ -1117,6 +1166,17 @@
 3p6y_1_Q
 3p6y_1_W
 5dto_1_B
+6yml_1_A
+6ymm_1_A
+6ymi_1_M
+6ymi_1_F
+6ymi_1_A
+6ylb_1_F
+6ymi_1_C
+6ymj_1_C
+6ylb_1_C
+6ymj_1_I
+6ymj_1_O
 4cxh_1_X
 1uvj_1_F
 1uvj_1_D
@@ -1153,6 +1213,12 @@
 4v4f_1_B4
 4v4f_1_A6
 4v4f_1_B2
+7m4y_1_V
+7m4x_1_V
+6v3a_1_V
+6v39_1_V
+6ck5_1_A
+6ck5_1_B
 5it9_1_I
 7jqc_1_I
 5zsb_1_C
@@ -1162,6 +1228,8 @@
 1cwp_1_D
 3jcr_1_N
 6gfw_1_R
+3j6x_1_IR
+3j6y_1_IR
 2vaz_1_A
 6zm6_1_X
 6zm5_1_X
@@ -1177,11 +1245,11 @@
 5uh6_1_I
 6l74_1_I
 5uh9_1_I
+4v5z_1_BS
 2ftc_1_R
 7a5j_1_X
 6sag_1_R
 4udv_1_R
-2r1g_1_E
 5zsc_1_D
 5zsc_1_C
 6woy_1_I
@@ -1209,7 +1277,7 @@
 3m85_1_X
 3m85_1_Z
 3m85_1_Y
-1e8s_1_C
+5u34_1_B
 5wnp_1_B
 5wnv_1_B
 5yts_1_B
@@ -1232,8 +1300,11 @@
 6ij2_1_E
 3u2e_1_D
 3u2e_1_C
+7eh1_1_I
 5uef_1_C
 5uef_1_D
+7eh2_1_R
+7eh2_1_I
 4x4u_1_H
 4afy_1_D
 6oy5_1_I
@@ -1244,13 +1315,15 @@
 6s0m_1_C
 6ymw_1_C
 7a5g_1_J
+1m5k_1_B
+1m5o_1_E
+1m5v_1_B
 6gx6_1_B
 4k4s_1_D
 4k4s_1_H
 4k4t_1_H
 4k4t_1_D
 1zn1_1_C
-1zn0_1_C
 1xpu_1_G
 1xpu_1_L
 1xpr_1_L
@@ -1274,7 +1347,9 @@
 6gc5_1_F
 6gc5_1_H
 6gc5_1_G
+4rne_1_C
 1n1h_1_B
+7n2v_1_PT
 4ohz_1_B
 6t83_1_6B
 4gv6_1_C
@@ -1290,6 +1365,9 @@
 4v5z_1_BC
 5y88_1_X
 4v5z_1_BB
+5y85_1_D
+5y85_1_B
+5y87_1_D
 3j0o_1_H
 3j0l_1_H
 3j0p_1_H
@@ -1351,11 +1429,11 @@
 4e6b_1_A
 4e6b_1_B
 6a6l_1_D
-4v5z_1_BS
 4v8t_1_1
 1uvi_1_D
 1uvi_1_F
 1uvi_1_E
+3gs5_1_A
 4m7d_1_P
 4k4u_1_D
 4k4u_1_H
@@ -1376,8 +1454,8 @@
 6ip5_1_2M
 6ip6_1_2M
 6qcs_1_M
+7b5k_1_Z
 486d_1_G
-2r1g_1_C
 486d_1_F
 4v5z_1_B0
 4nia_1_O
@@ -1391,11 +1469,11 @@
 4oq9_1_F
 4oq9_1_L
 6r9q_1_B
+7m4u_1_A
 6v3a_1_SN1
 6v3b_1_SN1
 6v39_1_SN1
 6v3e_1_SN1
-1pn7_1_C
 1mj1_1_Q
 1mj1_1_R
 4dr6_1_V
@@ -1437,14 +1515,25 @@
 6ow3_1_I
 6ovy_1_I
 6oy6_1_I
-4bbl_1_Y
-4bbl_1_Z
 4qvd_1_H
 5gxi_1_B
 3iy8_1_A
-6tnu_1_M
+7n06_1_G
-5mc6_1_M
+7n06_1_H
+7n06_1_I
+7n06_1_J
+7n06_1_K
+7n06_1_L
+7n33_1_G
+7n33_1_H
+7n33_1_I
+7n33_1_J
+7n33_1_K
+7n33_1_L
 5mc6_1_N
+2qwy_1_C
+2qwy_1_A
+2qwy_1_B
 4eya_1_O
 4eya_1_P
 4eya_1_C
@@ -1453,8 +1542,6 @@
 6htq_1_W
 6htq_1_U
 6uu6_1_333
-6v3a_1_V
-6v39_1_V
 5a0v_1_F
 3avt_1_T
 6d1v_1_C
@@ -1497,6 +1584,7 @@
 6o78_1_E
 6xa1_1_BV
 6ha8_1_X
+3bnp_1_B
 1m8w_1_E
 1m8w_1_F
 5udi_1_B
@@ -1520,16 +1608,29 @@
 6een_1_H
 4wti_1_T
 4wti_1_P
+6dlr_1_A
+6dlt_1_A
+6dls_1_A
+6dlq_1_A
+6dnr_1_A
 5l3p_1_Y
 4hor_1_X
 3rzo_1_R
+5wlh_1_B
 2f4v_1_Z
+5ml7_1_B
 1qln_1_R
+3pgw_1_R
+3pgw_1_N
+3cw1_1_X
+3cw1_1_W
+3cw1_1_V
+7b0y_1_A
 6ogy_1_M
 6ogy_1_N
 6uej_1_B
+7kga_1_A
 6ywy_1_BB
-1x18_1_A
 5ytx_1_B
 4g0a_1_H
 6r9p_1_B
@@ -1572,12 +1673,8 @@
 5mre_1_AA
 5mrf_1_AA
 7jhy_1_Z
-2r1g_1_A
-2r1g_1_D
-2r1g_1_F
 3eq4_1_Y
 4wkr_1_C
-2r1g_1_X
 4v99_1_EC
 4v99_1_AC
 4v99_1_BH
@@ -1641,44 +1738,21 @@
 6rcl_1_C
 5jju_1_C
 4ejt_1_G
+1et4_1_A
+1et4_1_C
+1et4_1_B
+1et4_1_D
+1et4_1_E
+1ddy_1_C
+1ddy_1_A
+1ddy_1_E
 6lkq_1_W
+6r47_1_A
 3qsu_1_P
 3qsu_1_R
 2xs7_1_B
 1n38_1_B
 4qvc_1_G
-6q1h_1_D
-6q1h_1_H
-6p7p_1_F
-6p7p_1_E
-6p7p_1_D
-6vm6_1_J
-6vm6_1_G
-6wan_1_K
-6wan_1_H
-6wan_1_G
-6wan_1_L
-6wan_1_I
-6ywo_1_F
-6wan_1_J
-4oau_1_A
-6ywo_1_E
-6ywo_1_K
-6vm6_1_I
-6vm6_1_H
-6ywo_1_I
-2a1r_1_C
-6m6v_1_F
-6m6v_1_E
-2a1r_1_D
-3gpq_1_E
-3gpq_1_F
-6o79_1_C
-6vm6_1_K
-6m6v_1_G
-6hyu_1_D
-1laj_1_R
-6ybv_1_K
 6mpf_1_W
 6spc_1_A
 6spe_1_A
@@ -1687,14 +1761,12 @@
 6fti_1_V
 6ftj_1_V
 6ftg_1_V
+3npn_1_A
 4g0a_1_G
 4g0a_1_F
 4g0a_1_E
 2b2d_1_S
 5hkc_1_C
-4kzy_1_I
-4kzz_1_I
-4kzx_1_I
 1rmv_1_B
 4qu7_1_X
 4qu7_1_V
@@ -1710,25 +1782,3 @@
 6pmi_1_3
 6pmj_1_3
 5hjz_1_C
-7nrc_1_SM
-7nrc_1_SN
-7am2_1_R1
-7k5l_1_R
-7b5k_1_X
-7d8c_1_C
-7m4y_1_V
-7m4x_1_V
-7b5k_1_Z
-7m4u_1_A
-7n06_1_G
-7n06_1_H
-7n06_1_I
-7n06_1_J
-7n06_1_K
-7n06_1_L
-7n33_1_G
-7n33_1_H
-7n33_1_I
-7n33_1_J
-7n33_1_K
-7n33_1_L
--- a/known_issues_reasons.txt
View file @511a1a1
+++ b/known_issues_reasons.txt
View file @511a1a1
@@ -7,12 +7,6 @@ Could not find nucleotides of chain AA in annotation 6ydw.json. Either there is 
 2z9q_1_A_1-72
 DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A_1-72.
-1ml5_1_b_5-121
-Could not find nucleotides of chain b in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-1ml5_1_a_1-2914
-Could not find nucleotides of chain a in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
 3ep2_1_Y_1-72
 DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_Y_1-72.
@@ -22,8 +16,11 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_Y_1-72.
 4v48_1_A6_1-73
 DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A6_1-73.
-1ml5_1_A_2-1520
+1ml5_1_b_5-121
-Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Could not find nucleotides of chain b in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+1ml5_1_a_1-2914
+Could not find nucleotides of chain a in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 1qzb_1_B_1-73
 DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B_1-73.
@@ -34,6 +31,9 @@ DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B_1-73.
 1ls2_1_B_1-73
 DSSR warning 1ls2.json: no nucleotides found. Ignoring 1ls2_1_B_1-73.
+1ml5_1_A_2-1520
+Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 1gsg_1_T_1-72
 DSSR warning 1gsg.json: no nucleotides found. Ignoring 1gsg_1_T_1-72.
@@ -70,6 +70,12 @@ DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A9_3-118.
 4v47_1_A9_3-118
 DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A9_3-118.
+4v42_1_BA_1-2914
+Could not find nucleotides of chain BA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+4v42_1_BB_5-121
+Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 2ob7_1_A_10-319
 DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_A_10-319.
@@ -82,21 +88,6 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_Z_1-91.
 2ob7_1_D_1-130
 DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_D_1-130.
-4v42_1_BA_1-2914
-Could not find nucleotides of chain BA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-4v42_1_BB_5-121
-Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-1r2x_1_C_1-58
-DSSR warning 1r2x.json: no nucleotides found. Ignoring 1r2x_1_C_1-58.
-
-1r2w_1_C_1-58
-DSSR warning 1r2w.json: no nucleotides found. Ignoring 1r2w_1_C_1-58.
-
-1eg0_1_L_1-56
-DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_L_1-56.
-
 3dg2_1_A_1-1542
 DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A_1-1542.
@@ -142,8 +133,14 @@ DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B_1-2904.
 1eg0_1_O_1-73
 DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_O_1-73.
-1zc8_1_A_1-59
+1r2x_1_C_1-58
-DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_A_1-59.
+DSSR warning 1r2x.json: no nucleotides found. Ignoring 1r2x_1_C_1-58.
+
+1r2w_1_C_1-58
+DSSR warning 1r2w.json: no nucleotides found. Ignoring 1r2w_1_C_1-58.
+
+1eg0_1_L_1-56
+DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_L_1-56.
 1jgq_1_A_2-1520
 Could not find nucleotides of chain A in annotation 1jgq.json. Either there is a problem with 1jgq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -157,6 +154,9 @@ Could not find nucleotides of chain A in annotation 1jgo.json. Either there is a
 1jgp_1_A_2-1520
 Could not find nucleotides of chain A in annotation 1jgp.json. Either there is a problem with 1jgp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+1zc8_1_A_1-59
+DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_A_1-59.
+
 1mvr_1_D_1-59
 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_D_1-59.
@@ -187,24 +187,6 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_B_1-50.
 3eq4_1_B_1-50
 DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B_1-50.
-3pgw_1_R_1-164
-DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R_1-164.
-
-3pgw_1_N_1-164
-DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N_1-164.
-
-3cw1_1_x_1-138
-DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_x_1-138.
-
-3cw1_1_w_1-138
-DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_w_1-138.
-
-3cw1_1_V_1-138
-DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V_1-138.
-
-3cw1_1_v_1-138
-DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_v_1-138.
-
 2iy3_1_B_9-105
 DSSR warning 2iy3.json: no nucleotides found. Ignoring 2iy3_1_B_9-105.
@@ -238,6 +220,24 @@ DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BZ_1-70.
 4v5z_1_B1_2-123
 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B1_2-123.
+3pgw_1_R_1-164
+DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R_1-164.
+
+3pgw_1_N_1-164
+DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N_1-164.
+
+3cw1_1_x_1-138
+DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_x_1-138.
+
+3cw1_1_w_1-138
+DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_w_1-138.
+
+3cw1_1_V_1-138
+DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V_1-138.
+
+3cw1_1_v_1-138
+DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_v_1-138.
+
 1mvr_1_B_1-96
 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_B_1-96.
@@ -889,9 +889,24 @@ Could not find nucleotides of chain X in annotation 4w2e.json. Either there is a
 6ucq_1_2X
 Could not find nucleotides of chain 2X in annotation 6ucq.json. Either there is a problem with 6ucq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+7n1p_1_DT
+Could not find nucleotides of chain DT in annotation 7n1p.json. Either there is a problem with 7n1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7n2u_1_DT
+Could not find nucleotides of chain DT in annotation 7n2u.json. Either there is a problem with 7n2u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 6yss_1_W
 Could not find nucleotides of chain W in annotation 6yss.json. Either there is a problem with 6yss mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+7n30_1_DT
+Could not find nucleotides of chain DT in annotation 7n30.json. Either there is a problem with 7n30 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7n31_1_DT
+Could not find nucleotides of chain DT in annotation 7n31.json. Either there is a problem with 7n31 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7n2c_1_DT
+Could not find nucleotides of chain DT in annotation 7n2c.json. Either there is a problem with 7n2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 5afi_1_Y
 Could not find nucleotides of chain Y in annotation 5afi.json. Either there is a problem with 5afi mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -1003,6 +1018,54 @@ Could not find nucleotides of chain X in annotation 4v4i.json. Either there is a
 4v42_1_BB
 Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+4jrc_1_B
+Nucleotides not inserted !
+
+4jrc_1_A
+Nucleotides not inserted !
+
+6lkq_1_S
+Could not find nucleotides of chain S in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+5h5u_1_H
+Could not find nucleotides of chain H in annotation 5h5u.json. Either there is a problem with 5h5u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7d6z_1_F
+Could not find nucleotides of chain F in annotation 7d6z.json. Either there is a problem with 7d6z mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+5lze_1_Y
+Could not find nucleotides of chain Y in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+5lze_1_V
+Could not find nucleotides of chain V in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+5lze_1_X
+Could not find nucleotides of chain X in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+3jcj_1_G
+Could not find nucleotides of chain G in annotation 3jcj.json. Either there is a problem with 3jcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+6o7k_1_G
+Could not find nucleotides of chain G in annotation 6o7k.json. Either there is a problem with 6o7k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+3dg2_1_A
+DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A.
+
+3dg0_1_A
+DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A.
+
+4v48_1_BA
+DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_BA.
+
+4v47_1_BA
+DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_BA.
+
+3dg4_1_A
+DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A.
+
+3dg5_1_A
+DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A.
+
 6d30_1_C
 Sequence is too short. (< 5 resolved nts)
@@ -1315,62 +1378,53 @@ Sequence is too short. (< 5 resolved nts)
 6n6g_1_D
 Sequence is too short. (< 5 resolved nts)
-6lkq_1_S
+4b3r_1_W
-Could not find nucleotides of chain S in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Sequence is too short. (< 5 resolved nts)
-
-5h5u_1_H
-Could not find nucleotides of chain H in annotation 5h5u.json. Either there is a problem with 5h5u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-7d6z_1_F
-Could not find nucleotides of chain F in annotation 7d6z.json. Either there is a problem with 7d6z mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-5lze_1_Y
-Could not find nucleotides of chain Y in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-5lze_1_V
+4b3t_1_W
-Could not find nucleotides of chain V in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Sequence is too short. (< 5 resolved nts)
-5lze_1_X
+4b3s_1_W
-Could not find nucleotides of chain X in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Sequence is too short. (< 5 resolved nts)
-3jcj_1_G
+7b5k_1_X
-Could not find nucleotides of chain G in annotation 3jcj.json. Either there is a problem with 3jcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-6o7k_1_G
+5o2r_1_X
-Could not find nucleotides of chain G in annotation 6o7k.json. Either there is a problem with 6o7k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Could not find nucleotides of chain X in annotation 5o2r.json. Either there is a problem with 5o2r mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-3dg2_1_A
+5kcs_1_1X
-DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A.
+Could not find nucleotides of chain 1X in annotation 5kcs.json. Either there is a problem with 5kcs mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-3dg0_1_A
+7n1p_1_PT
-DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A.
+Could not find nucleotides of chain PT in annotation 7n1p.json. Either there is a problem with 7n1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-4v48_1_BA
+7n2u_1_PT
-DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_BA.
+Could not find nucleotides of chain PT in annotation 7n2u.json. Either there is a problem with 7n2u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-4v47_1_BA
+7n30_1_PT
-DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_BA.
+Could not find nucleotides of chain PT in annotation 7n30.json. Either there is a problem with 7n30 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-3dg4_1_A
+7n31_1_PT
-DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A.
+Could not find nucleotides of chain PT in annotation 7n31.json. Either there is a problem with 7n31 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-3dg5_1_A
+7n2c_1_PT
-DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A.
+Could not find nucleotides of chain PT in annotation 7n2c.json. Either there is a problem with 7n2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-4b3r_1_W
+6yl5_1_I
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-4b3t_1_W
+6yl5_1_E
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-4b3s_1_W
+6yl5_1_A
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-5o2r_1_X
+6yl5_1_K
-Could not find nucleotides of chain X in annotation 5o2r.json. Either there is a problem with 5o2r mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Nucleotides not inserted !
-5kcs_1_1X
+6yl5_1_G
-Could not find nucleotides of chain 1X in annotation 5kcs.json. Either there is a problem with 5kcs mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Nucleotides not inserted !
 6zvk_1_E2
 Could not find nucleotides of chain E2 in annotation 6zvk.json. Either there is a problem with 6zvk mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -1582,6 +1636,9 @@ Could not find nucleotides of chain V in annotation 6olf.json. Either there is a
 3erc_1_G
 Sequence is too short. (< 5 resolved nts)
+4qjd_1_D
+Nucleotides not inserted !
+
 6of1_1_1W
 Could not find nucleotides of chain 1W in annotation 6of1.json. Either there is a problem with 6of1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -1675,9 +1732,15 @@ DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A.
 4hot_1_X
 Sequence is too short. (< 5 resolved nts)
+5ns4_1_C
+Nucleotides not inserted !
+
 6d2z_1_C
 Sequence is too short. (< 5 resolved nts)
+7eh0_1_I
+Sequence is too short. (< 5 resolved nts)
+
 4tu0_1_F
 Sequence is too short. (< 5 resolved nts)
@@ -1738,18 +1801,15 @@ Could not find nucleotides of chain NB in annotation 6i7o.json. Either there is 
 1ml5_1_A
 Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+7nsq_1_V
+Could not find nucleotides of chain V in annotation 7nsq.json. Either there is a problem with 7nsq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 6swa_1_Q
 Could not find nucleotides of chain Q in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 6swa_1_R
 Could not find nucleotides of chain R in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-3j6x_1_IR
-Could not find nucleotides of chain IR in annotation 3j6x.json. Either there is a problem with 3j6x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-3j6y_1_IR
-Could not find nucleotides of chain IR in annotation 3j6y.json. Either there is a problem with 3j6y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
 6ole_1_T
 Could not find nucleotides of chain T in annotation 6ole.json. Either there is a problem with 6ole mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -1768,6 +1828,15 @@ Could not find nucleotides of chain T in annotation 6olf.json. Either there is a
 6w6l_1_T
 Could not find nucleotides of chain T in annotation 6w6l.json. Either there is a problem with 6w6l mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+6tnu_1_M
+Could not find nucleotides of chain M in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+5mc6_1_M
+Could not find nucleotides of chain M in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7nrc_1_SM
+Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 6tb3_1_N
 Could not find nucleotides of chain N in annotation 6tb3.json. Either there is a problem with 6tb3 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -1780,6 +1849,9 @@ Could not find nucleotides of chain SN in annotation 7b7d.json. Either there is 
 6tnu_1_N
 Could not find nucleotides of chain N in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+7nrc_1_SN
+Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 7nrd_1_SN
 Could not find nucleotides of chain SN in annotation 7nrd.json. Either there is a problem with 7nrd mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -1810,6 +1882,15 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_D.
 5o1y_1_B
 Sequence is too short. (< 5 resolved nts)
+4kzy_1_I
+Could not find nucleotides of chain I in annotation 4kzy.json. Either there is a problem with 4kzy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+4kzz_1_I
+Could not find nucleotides of chain I in annotation 4kzz.json. Either there is a problem with 4kzz mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+4kzx_1_I
+Could not find nucleotides of chain I in annotation 4kzx.json. Either there is a problem with 4kzx mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 3jcr_1_H
 DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_H.
@@ -2119,9 +2200,6 @@ Sequence is too short. (< 5 resolved nts)
 6uu1_1_333
 Sequence is too short. (< 5 resolved nts)
-1pn8_1_D
-DSSR warning 1pn8.json: no nucleotides found. Ignoring 1pn8_1_D.
-
 3er8_1_H
 Sequence is too short. (< 5 resolved nts)
@@ -2236,14 +2314,11 @@ Sequence is too short. (< 5 resolved nts)
 1xnq_1_W
 Sequence is too short. (< 5 resolved nts)
-1x18_1_C
+7n2v_1_DT
-DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_C.
+Could not find nucleotides of chain DT in annotation 7n2v.json. Either there is a problem with 7n2v mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-1x18_1_B
-DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_B.
-1x18_1_D
+4peh_1_Z
-DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_D.
+Sequence is too short. (< 5 resolved nts)
 1vq6_1_4
 Sequence is too short. (< 5 resolved nts)
@@ -2278,6 +2353,9 @@ Sequence is too short. (< 5 resolved nts)
 4xbf_1_D
 Sequence is too short. (< 5 resolved nts)
+5w1h_1_B
+Nucleotides not inserted !
+
 6n6d_1_D
 Sequence is too short. (< 5 resolved nts)
@@ -2296,52 +2374,148 @@ Sequence is too short. (< 5 resolved nts)
 6tz1_1_N
 Sequence is too short. (< 5 resolved nts)
-6sce_1_B
+6q1h_1_D
 Sequence is too short. (< 5 resolved nts)
-6xl1_1_C
+6q1h_1_H
 Sequence is too short. (< 5 resolved nts)
-6scf_1_I
+6p7p_1_F
 Sequence is too short. (< 5 resolved nts)
-6scf_1_K
+6p7p_1_E
 Sequence is too short. (< 5 resolved nts)
-6yud_1_K
+6p7p_1_D
 Sequence is too short. (< 5 resolved nts)
-6yud_1_O
+6vm6_1_J
 Sequence is too short. (< 5 resolved nts)
-6scf_1_M
+6vm6_1_G
 Sequence is too short. (< 5 resolved nts)
-6yud_1_P
+6wan_1_K
 Sequence is too short. (< 5 resolved nts)
-6scf_1_L
+6wan_1_H
 Sequence is too short. (< 5 resolved nts)
-6yud_1_M
+6wan_1_G
 Sequence is too short. (< 5 resolved nts)
-6yud_1_Q
+6wan_1_L
 Sequence is too short. (< 5 resolved nts)
-6w11_1_C
+6wan_1_I
 Sequence is too short. (< 5 resolved nts)
-6o6x_1_D
+6ywo_1_F
 Sequence is too short. (< 5 resolved nts)
-4ba2_1_R
+6wan_1_J
 Sequence is too short. (< 5 resolved nts)
-7bdv_1_F
+4oau_1_A
 Sequence is too short. (< 5 resolved nts)
-7bdv_1_H
+6ywo_1_E
+Sequence is too short. (< 5 resolved nts)
+
+6ywo_1_K
+Sequence is too short. (< 5 resolved nts)
+
+6vm6_1_I
+Sequence is too short. (< 5 resolved nts)
+
+6vm6_1_H
+Sequence is too short. (< 5 resolved nts)
+
+6ywo_1_I
+Sequence is too short. (< 5 resolved nts)
+
+2a1r_1_C
+Sequence is too short. (< 5 resolved nts)
+
+6m6v_1_F
+Sequence is too short. (< 5 resolved nts)
+
+6m6v_1_E
+Sequence is too short. (< 5 resolved nts)
+
+2a1r_1_D
+Sequence is too short. (< 5 resolved nts)
+
+3gpq_1_E
+Sequence is too short. (< 5 resolved nts)
+
+3gpq_1_F
+Sequence is too short. (< 5 resolved nts)
+
+6o79_1_C
+Sequence is too short. (< 5 resolved nts)
+
+6vm6_1_K
+Sequence is too short. (< 5 resolved nts)
+
+6m6v_1_G
+Sequence is too short. (< 5 resolved nts)
+
+6hyu_1_D
+Sequence is too short. (< 5 resolved nts)
+
+1laj_1_R
+Sequence is too short. (< 5 resolved nts)
+
+6ybv_1_K
+Could not find nucleotides of chain K in annotation 6ybv.json. Either there is a problem with 6ybv mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+6sce_1_B
+Sequence is too short. (< 5 resolved nts)
+
+6xl1_1_C
+Sequence is too short. (< 5 resolved nts)
+
+6scf_1_I
+Sequence is too short. (< 5 resolved nts)
+
+6scf_1_K
+Sequence is too short. (< 5 resolved nts)
+
+6yud_1_K
+Sequence is too short. (< 5 resolved nts)
+
+6yud_1_O
+Sequence is too short. (< 5 resolved nts)
+
+6scf_1_M
+Sequence is too short. (< 5 resolved nts)
+
+6yud_1_P
+Sequence is too short. (< 5 resolved nts)
+
+6scf_1_L
+Sequence is too short. (< 5 resolved nts)
+
+6yud_1_M
+Sequence is too short. (< 5 resolved nts)
+
+6yud_1_Q
+Sequence is too short. (< 5 resolved nts)
+
+6w11_1_C
+Sequence is too short. (< 5 resolved nts)
+
+6o6x_1_D
+Sequence is too short. (< 5 resolved nts)
+
+4ba2_1_R
+Sequence is too short. (< 5 resolved nts)
+
+7bdv_1_F
+Sequence is too short. (< 5 resolved nts)
+
+7bdv_1_H
 Sequence is too short. (< 5 resolved nts)
 6o6x_1_C
@@ -2423,7 +2597,7 @@ Sequence is too short. (< 5 resolved nts)
 Sequence is too short. (< 5 resolved nts)
 1y1y_1_P
-DSSR warning 1y1y.json: no nucleotides found. Ignoring 1y1y_1_P.
+Sequence is too short. (< 5 resolved nts)
 5zuu_1_I
 Sequence is too short. (< 5 resolved nts)
@@ -2431,6 +2605,9 @@ Sequence is too short. (< 5 resolved nts)
 5zuu_1_G
 Sequence is too short. (< 5 resolved nts)
+7am2_1_R1
+Sequence is too short. (< 5 resolved nts)
+
 4peh_1_W
 Sequence is too short. (< 5 resolved nts)
@@ -2443,7 +2620,7 @@ Sequence is too short. (< 5 resolved nts)
 4peh_1_Y
 Sequence is too short. (< 5 resolved nts)
-4peh_1_Z
+7d8c_1_C
 Sequence is too short. (< 5 resolved nts)
 6mkn_1_W
@@ -2482,30 +2659,9 @@ Could not find nucleotides of chain Q in annotation 4eya.json. Either there is a
 4eya_1_R
 Could not find nucleotides of chain R in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-1qzc_1_B
-DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_B.
-
-1t1o_1_B
-DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_B.
-
 1mvr_1_C
 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_C.
-1t1m_1_B
-DSSR warning 1t1m.json: no nucleotides found. Ignoring 1t1m_1_B.
-
-1t1o_1_C
-DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_C.
-
-1t1m_1_A
-DSSR warning 1t1m.json: no nucleotides found. Ignoring 1t1m_1_A.
-
-1t1o_1_A
-DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_A.
-
-2r1g_1_B
-DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_B.
-
 4ht9_1_E
 Sequence is too short. (< 5 resolved nts)
@@ -2536,21 +2692,15 @@ Could not find nucleotides of chain U in annotation 5uk4.json. Either there is a
 5f6c_1_E
 Sequence is too short. (< 5 resolved nts)
+7nwh_1_HH
+Could not find nucleotides of chain HH in annotation 7nwh.json. Either there is a problem with 7nwh mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 4rcj_1_B
 Sequence is too short. (< 5 resolved nts)
 1xnr_1_W
 Sequence is too short. (< 5 resolved nts)
-2agn_1_A
-DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_A.
-
-2agn_1_C
-DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_C.
-
-2agn_1_B
-DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_B.
-
 6e0o_1_C
 Sequence is too short. (< 5 resolved nts)
@@ -2602,11 +2752,8 @@ Sequence is too short. (< 5 resolved nts)
 4d61_1_J
 Could not find nucleotides of chain J in annotation 4d61.json. Either there is a problem with 4d61 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-1trj_1_B
+7nwg_1_Q3
-DSSR warning 1trj.json: no nucleotides found. Ignoring 1trj_1_B.
+Could not find nucleotides of chain Q3 in annotation 7nwg.json. Either there is a problem with 7nwg mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-1trj_1_C
-DSSR warning 1trj.json: no nucleotides found. Ignoring 1trj_1_C.
 5tbw_1_SR
 Could not find nucleotides of chain SR in annotation 5tbw.json. Either there is a problem with 5tbw mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -2653,6 +2800,12 @@ Sequence is too short. (< 5 resolved nts)
 3jbu_1_V
 Could not find nucleotides of chain V in annotation 3jbu.json. Either there is a problem with 3jbu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+4ts2_1_Y
+Nucleotides not inserted !
+
+4ts0_1_Y
+Nucleotides not inserted !
+
 1h2c_1_R
 Sequence is too short. (< 5 resolved nts)
@@ -2731,6 +2884,9 @@ Could not find nucleotides of chain Z in annotation 5flx.json. Either there is a
 6eri_1_AX
 Could not find nucleotides of chain AX in annotation 6eri.json. Either there is a problem with 6eri mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+7k5l_1_R
+Sequence is too short. (< 5 resolved nts)
+
 7d80_1_Y
 Could not find nucleotides of chain Y in annotation 7d80.json. Either there is a problem with 7d80 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -2752,6 +2908,9 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_I.
 1zc8_1_H
 DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_H.
+6bfb_1_Y
+Nucleotides not inserted !
+
 1zc8_1_J
 DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_J.
@@ -2857,6 +3016,12 @@ Could not find nucleotides of chain BB in annotation 6z1p.json. Either there is 
 6z1p_1_BA
 Could not find nucleotides of chain BA in annotation 6z1p.json. Either there is a problem with 6z1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+3p22_1_C
+Nucleotides not inserted !
+
+3p22_1_G
+Nucleotides not inserted !
+
 2uxd_1_X
 Sequence is too short. (< 5 resolved nts)
@@ -2923,12 +3088,6 @@ Sequence is too short. (< 5 resolved nts)
 3ol8_1_P
 Sequence is too short. (< 5 resolved nts)
-1qzc_1_C
-DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_C.
-
-1qzc_1_A
-DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_A.
-
 6yrq_1_E
 Sequence is too short. (< 5 resolved nts)
@@ -3166,6 +3325,9 @@ Sequence is too short. (< 5 resolved nts)
 4wtk_1_P
 Sequence is too short. (< 5 resolved nts)
+6wlj_3_A
+Nucleotides not inserted !
+
 1vqn_1_4
 Sequence is too short. (< 5 resolved nts)
@@ -3214,23 +3376,8 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B.
 4i67_1_B
 Sequence is too short. (< 5 resolved nts)
-3pgw_1_R
+4jf2_1_A
-DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R.
+Nucleotides not inserted !
-
-3pgw_1_N
-DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N.
-
-3cw1_1_X
-DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_X.
-
-3cw1_1_W
-DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_W.
-
-3cw1_1_V
-DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V.
-
-7b0y_1_A
-Could not find nucleotides of chain A in annotation 7b0y.json. Either there is a problem with 7b0y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
 6k32_1_T
 Could not find nucleotides of chain T in annotation 6k32.json. Either there is a problem with 6k32 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -3244,11 +3391,11 @@ Could not find nucleotides of chain A in annotation 5mmj.json. Either there is a
 5x8r_1_A
 Could not find nucleotides of chain A in annotation 5x8r.json. Either there is a problem with 5x8r mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-2agn_1_E
+3fu2_1_B
-DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_E.
+Nucleotides not inserted !
-2agn_1_D
+3fu2_1_A
-DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_D.
+Nucleotides not inserted !
 4v5z_1_BD
 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BD.
@@ -3355,6 +3502,39 @@ Sequence is too short. (< 5 resolved nts)
 5dto_1_B
 Sequence is too short. (< 5 resolved nts)
+6yml_1_A
+Nucleotides not inserted !
+
+6ymm_1_A
+Nucleotides not inserted !
+
+6ymi_1_M
+Nucleotides not inserted !
+
+6ymi_1_F
+Nucleotides not inserted !
+
+6ymi_1_A
+Nucleotides not inserted !
+
+6ylb_1_F
+Nucleotides not inserted !
+
+6ymi_1_C
+Nucleotides not inserted !
+
+6ymj_1_C
+Nucleotides not inserted !
+
+6ylb_1_C
+Nucleotides not inserted !
+
+6ymj_1_I
+Nucleotides not inserted !
+
+6ymj_1_O
+Nucleotides not inserted !
+
 4cxh_1_X
 Could not find nucleotides of chain X in annotation 4cxh.json. Either there is a problem with 4cxh mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -3463,6 +3643,24 @@ Sequence is too short. (< 5 resolved nts)
 4v4f_1_B2
 Sequence is too short. (< 5 resolved nts)
+7m4y_1_V
+Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+7m4x_1_V
+Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+6v3a_1_V
+Could not find nucleotides of chain V in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+6v39_1_V
+Could not find nucleotides of chain V in annotation 6v39.json. Either there is a problem with 6v39 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+6ck5_1_A
+Nucleotides not inserted !
+
+6ck5_1_B
+Nucleotides not inserted !
+
 5it9_1_I
 Could not find nucleotides of chain I in annotation 5it9.json. Either there is a problem with 5it9 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -3490,6 +3688,12 @@ DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_N.
 6gfw_1_R
 Sequence is too short. (< 5 resolved nts)
+3j6x_1_IR
+Could not find nucleotides of chain IR in annotation 3j6x.json. Either there is a problem with 3j6x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
+3j6y_1_IR
+Could not find nucleotides of chain IR in annotation 3j6y.json. Either there is a problem with 3j6y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 2vaz_1_A
 DSSR warning 2vaz.json: no nucleotides found. Ignoring 2vaz_1_A.
@@ -3535,6 +3739,9 @@ Sequence is too short. (< 5 resolved nts)
 5uh9_1_I
 Sequence is too short. (< 5 resolved nts)
+4v5z_1_BS
+DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BS.
+
 2ftc_1_R
 DSSR warning 2ftc.json: no nucleotides found. Ignoring 2ftc_1_R.
@@ -3547,9 +3754,6 @@ Sequence is too short. (< 5 resolved nts)
 4udv_1_R
 Sequence is too short. (< 5 resolved nts)
-2r1g_1_E
-DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_E.
-
 5zsc_1_D
 Sequence is too short. (< 5 resolved nts)
@@ -3631,8 +3835,8 @@ Sequence is too short. (< 5 resolved nts)
 3m85_1_Y
 Sequence is too short. (< 5 resolved nts)
-1e8s_1_C
+5u34_1_B
-DSSR warning 1e8s.json: no nucleotides found. Ignoring 1e8s_1_C.
+Nucleotides not inserted !
 5wnp_1_B
 Could not find nucleotides of chain B in annotation 5wnp.json. Either there is a problem with 5wnp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -3700,12 +3904,21 @@ Sequence is too short. (< 5 resolved nts)
 3u2e_1_C
 Sequence is too short. (< 5 resolved nts)
+7eh1_1_I
+Sequence is too short. (< 5 resolved nts)
+
 5uef_1_C
 Sequence is too short. (< 5 resolved nts)
 5uef_1_D
 Sequence is too short. (< 5 resolved nts)
+7eh2_1_R
+Sequence is too short. (< 5 resolved nts)
+
+7eh2_1_I
+Sequence is too short. (< 5 resolved nts)
+
 4x4u_1_H
 Sequence is too short. (< 5 resolved nts)
@@ -3736,6 +3949,15 @@ Sequence is too short. (< 5 resolved nts)
 7a5g_1_J
 Could not find nucleotides of chain J in annotation 7a5g.json. Either there is a problem with 7a5g mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+1m5k_1_B
+Nucleotides not inserted !
+
+1m5o_1_E
+Nucleotides not inserted !
+
+1m5v_1_B
+Nucleotides not inserted !
+
 6gx6_1_B
 Sequence is too short. (< 5 resolved nts)
@@ -3754,9 +3976,6 @@ Sequence is too short. (< 5 resolved nts)
 1zn1_1_C
 DSSR warning 1zn1.json: no nucleotides found. Ignoring 1zn1_1_C.
-1zn0_1_C
-DSSR warning 1zn0.json: no nucleotides found. Ignoring 1zn0_1_C.
-
 1xpu_1_G
 Sequence is too short. (< 5 resolved nts)
@@ -3826,9 +4045,15 @@ Sequence is too short. (< 5 resolved nts)
 6gc5_1_G
 Sequence is too short. (< 5 resolved nts)
+4rne_1_C
+Nucleotides not inserted !
+
 1n1h_1_B
 Sequence is too short. (< 5 resolved nts)
+7n2v_1_PT
+Could not find nucleotides of chain PT in annotation 7n2v.json. Either there is a problem with 7n2v mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 4ohz_1_B
 Sequence is too short. (< 5 resolved nts)
@@ -3874,6 +4099,15 @@ Could not find nucleotides of chain X in annotation 5y88.json. Either there is a
 4v5z_1_BB
 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BB.
+5y85_1_D
+Nucleotides not inserted !
+
+5y85_1_B
+Nucleotides not inserted !
+
+5y87_1_D
+Nucleotides not inserted !
+
 3j0o_1_H
 Could not find nucleotides of chain H in annotation 3j0o.json. Either there is a problem with 3j0o mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4057,9 +4291,6 @@ Sequence is too short. (< 5 resolved nts)
 6a6l_1_D
 Sequence is too short. (< 5 resolved nts)
-4v5z_1_BS
-DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BS.
-
 4v8t_1_1
 Could not find nucleotides of chain 1 in annotation 4v8t.json. Either there is a problem with 4v8t mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4072,6 +4303,9 @@ Sequence is too short. (< 5 resolved nts)
 1uvi_1_E
 Sequence is too short. (< 5 resolved nts)
+3gs5_1_A
+Nucleotides not inserted !
+
 4m7d_1_P
 Sequence is too short. (< 5 resolved nts)
@@ -4132,12 +4366,12 @@ Could not find nucleotides of chain 2M in annotation 6ip6.json. Either there is 
 6qcs_1_M
 Sequence is too short. (< 5 resolved nts)
+7b5k_1_Z
+Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 486d_1_G
 Could not find nucleotides of chain G in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-2r1g_1_C
-DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_C.
-
 486d_1_F
 Could not find nucleotides of chain F in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4177,6 +4411,9 @@ Could not find nucleotides of chain L in annotation 4oq9.json. Either there is a
 6r9q_1_B
 Sequence is too short. (< 5 resolved nts)
+7m4u_1_A
+Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 6v3a_1_SN1
 Could not find nucleotides of chain SN1 in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4189,9 +4426,6 @@ Could not find nucleotides of chain SN1 in annotation 6v39.json. Either there is
 6v3e_1_SN1
 Could not find nucleotides of chain SN1 in annotation 6v3e.json. Either there is a problem with 6v3e mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-1pn7_1_C
-DSSR warning 1pn7.json: no nucleotides found. Ignoring 1pn7_1_C.
-
 1mj1_1_Q
 Could not find nucleotides of chain Q in annotation 1mj1.json. Either there is a problem with 1mj1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4315,12 +4549,6 @@ Sequence is too short. (< 5 resolved nts)
 6oy6_1_I
 Sequence is too short. (< 5 resolved nts)
-4bbl_1_Y
-DSSR warning 4bbl.json: no nucleotides found. Ignoring 4bbl_1_Y.
-
-4bbl_1_Z
-DSSR warning 4bbl.json: no nucleotides found. Ignoring 4bbl_1_Z.
-
 4qvd_1_H
 Sequence is too short. (< 5 resolved nts)
@@ -4330,15 +4558,54 @@ Sequence is too short. (< 5 resolved nts)
 3iy8_1_A
 DSSR warning 3iy8.json: no nucleotides found. Ignoring 3iy8_1_A.
-6tnu_1_M
+7n06_1_G
-Could not find nucleotides of chain M in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Sequence is too short. (< 5 resolved nts)
-5mc6_1_M
+7n06_1_H
-Could not find nucleotides of chain M in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_I
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_J
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_K
+Sequence is too short. (< 5 resolved nts)
+
+7n06_1_L
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_G
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_H
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_I
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_J
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_K
+Sequence is too short. (< 5 resolved nts)
+
+7n33_1_L
+Sequence is too short. (< 5 resolved nts)
 5mc6_1_N
 Could not find nucleotides of chain N in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+2qwy_1_C
+Nucleotides not inserted !
+
+2qwy_1_A
+Nucleotides not inserted !
+
+2qwy_1_B
+Nucleotides not inserted !
+
 4eya_1_O
 Could not find nucleotides of chain O in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4363,12 +4630,6 @@ Could not find nucleotides of chain U in annotation 6htq.json. Either there is a
 6uu6_1_333
 Sequence is too short. (< 5 resolved nts)
-6v3a_1_V
-Could not find nucleotides of chain V in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-6v39_1_V
-Could not find nucleotides of chain V in annotation 6v39.json. Either there is a problem with 6v39 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
 5a0v_1_F
 Sequence is too short. (< 5 resolved nts)
@@ -4495,6 +4756,9 @@ Could not find nucleotides of chain BV in annotation 6xa1.json. Either there is 
 6ha8_1_X
 Could not find nucleotides of chain X in annotation 6ha8.json. Either there is a problem with 6ha8 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+3bnp_1_B
+Nucleotides not inserted !
+
 1m8w_1_E
 Could not find nucleotides of chain E in annotation 1m8w.json. Either there is a problem with 1m8w mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4564,6 +4828,21 @@ Sequence is too short. (< 5 resolved nts)
 4wti_1_P
 Sequence is too short. (< 5 resolved nts)
+6dlr_1_A
+Nucleotides not inserted !
+
+6dlt_1_A
+Nucleotides not inserted !
+
+6dls_1_A
+Nucleotides not inserted !
+
+6dlq_1_A
+Nucleotides not inserted !
+
+6dnr_1_A
+Nucleotides not inserted !
+
 5l3p_1_Y
 Could not find nucleotides of chain Y in annotation 5l3p.json. Either there is a problem with 5l3p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4573,12 +4852,36 @@ Sequence is too short. (< 5 resolved nts)
 3rzo_1_R
 Sequence is too short. (< 5 resolved nts)
+5wlh_1_B
+Nucleotides not inserted !
+
 2f4v_1_Z
 Sequence is too short. (< 5 resolved nts)
+5ml7_1_B
+Nucleotides not inserted !
+
 1qln_1_R
 Sequence is too short. (< 5 resolved nts)
+3pgw_1_R
+DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R.
+
+3pgw_1_N
+DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N.
+
+3cw1_1_X
+DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_X.
+
+3cw1_1_W
+DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_W.
+
+3cw1_1_V
+DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V.
+
+7b0y_1_A
+Could not find nucleotides of chain A in annotation 7b0y.json. Either there is a problem with 7b0y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+
 6ogy_1_M
 Sequence is too short. (< 5 resolved nts)
@@ -4588,12 +4891,12 @@ Sequence is too short. (< 5 resolved nts)
 6uej_1_B
 Sequence is too short. (< 5 resolved nts)
+7kga_1_A
+Nucleotides not inserted !
+
 6ywy_1_BB
 Could not find nucleotides of chain BB in annotation 6ywy.json. Either there is a problem with 6ywy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-1x18_1_A
-DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_A.
-
 5ytx_1_B
 Sequence is too short. (< 5 resolved nts)
@@ -4720,24 +5023,12 @@ Could not find nucleotides of chain AA in annotation 5mrf.json. Either there is 
 7jhy_1_Z
 Could not find nucleotides of chain Z in annotation 7jhy.json. Either there is a problem with 7jhy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-2r1g_1_A
-DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_A.
-
-2r1g_1_D
-DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_D.
-
-2r1g_1_F
-DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_F.
-
 3eq4_1_Y
 DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_Y.
 4wkr_1_C
 Sequence is too short. (< 5 resolved nts)
-2r1g_1_X
-DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_X.
-
 4v99_1_EC
 Could not find nucleotides of chain EC in annotation 4v99.json. Either there is a problem with 4v99 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
@@ -4927,120 +5218,51 @@ Sequence is too short. (< 5 resolved nts)
 4ejt_1_G
 Sequence is too short. (< 5 resolved nts)
-6lkq_1_W
+1et4_1_A
-Could not find nucleotides of chain W in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+Nucleotides not inserted !
-
-3qsu_1_P
-Sequence is too short. (< 5 resolved nts)
-
-3qsu_1_R
-Sequence is too short. (< 5 resolved nts)
-
-2xs7_1_B
-Sequence is too short. (< 5 resolved nts)
-
-1n38_1_B
-Sequence is too short. (< 5 resolved nts)
-
-4qvc_1_G
-Sequence is too short. (< 5 resolved nts)
-
-6q1h_1_D
-Sequence is too short. (< 5 resolved nts)
-
-6q1h_1_H
-Sequence is too short. (< 5 resolved nts)
-
-6p7p_1_F
-Sequence is too short. (< 5 resolved nts)
-
-6p7p_1_E
-Sequence is too short. (< 5 resolved nts)
-
-6p7p_1_D
-Sequence is too short. (< 5 resolved nts)
-
-6vm6_1_J
-Sequence is too short. (< 5 resolved nts)
-6vm6_1_G
+1et4_1_C
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-6wan_1_K
+1et4_1_B
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-6wan_1_H
+1et4_1_D
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-6wan_1_G
+1et4_1_E
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-6wan_1_L
+1ddy_1_C
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-6wan_1_I
+1ddy_1_A
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-6ywo_1_F
+1ddy_1_E
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-
-6wan_1_J
-Sequence is too short. (< 5 resolved nts)
-
-4oau_1_A
-Sequence is too short. (< 5 resolved nts)
-6ywo_1_E
+6lkq_1_W
-Sequence is too short. (< 5 resolved nts)
+Could not find nucleotides of chain W in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-6ywo_1_K
-Sequence is too short. (< 5 resolved nts)
-
-6vm6_1_I
-Sequence is too short. (< 5 resolved nts)
-
-6vm6_1_H
-Sequence is too short. (< 5 resolved nts)
-
-6ywo_1_I
-Sequence is too short. (< 5 resolved nts)
-
-2a1r_1_C
-Sequence is too short. (< 5 resolved nts)
-
-6m6v_1_F
-Sequence is too short. (< 5 resolved nts)
-
-6m6v_1_E
-Sequence is too short. (< 5 resolved nts)
-
-2a1r_1_D
-Sequence is too short. (< 5 resolved nts)
-
-3gpq_1_E
-Sequence is too short. (< 5 resolved nts)
-3gpq_1_F
+6r47_1_A
-Sequence is too short. (< 5 resolved nts)
+Nucleotides not inserted !
-6o79_1_C
+3qsu_1_P
 Sequence is too short. (< 5 resolved nts)
-6vm6_1_K
+3qsu_1_R
 Sequence is too short. (< 5 resolved nts)
-6m6v_1_G
+2xs7_1_B
 Sequence is too short. (< 5 resolved nts)
-6hyu_1_D
+1n38_1_B
 Sequence is too short. (< 5 resolved nts)
-1laj_1_R
+4qvc_1_G
 Sequence is too short. (< 5 resolved nts)
-6ybv_1_K
-Could not find nucleotides of chain K in annotation 6ybv.json. Either there is a problem with 6ybv mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
 6mpf_1_W
 Sequence is too short. (< 5 resolved nts)
@@ -5065,6 +5287,9 @@ Could not find nucleotides of chain V in annotation 6ftj.json. Either there is a
 6ftg_1_V
 Could not find nucleotides of chain V in annotation 6ftg.json. Either there is a problem with 6ftg mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
+3npn_1_A
+Nucleotides not inserted !
+
 4g0a_1_G
 Sequence is too short. (< 5 resolved nts)
@@ -5080,15 +5305,6 @@ Sequence is too short. (< 5 resolved nts)
 5hkc_1_C
 Sequence is too short. (< 5 resolved nts)
-4kzy_1_I
-Could not find nucleotides of chain I in annotation 4kzy.json. Either there is a problem with 4kzy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-4kzz_1_I
-Could not find nucleotides of chain I in annotation 4kzz.json. Either there is a problem with 4kzz mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-4kzx_1_I
-Could not find nucleotides of chain I in annotation 4kzx.json. Either there is a problem with 4kzx mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
 1rmv_1_B
 Sequence is too short. (< 5 resolved nts)
@@ -5134,69 +5350,3 @@ Sequence is too short. (< 5 resolved nts)
 5hjz_1_C
 Sequence is too short. (< 5 resolved nts)
-7nrc_1_SM
-Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-7nrc_1_SN
-Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-7am2_1_R1
-Sequence is too short. (< 5 resolved nts)
-
-7k5l_1_R
-Sequence is too short. (< 5 resolved nts)
-
-7b5k_1_X
-Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-7d8c_1_C
-Sequence is too short. (< 5 resolved nts)
-
-7m4y_1_V
-Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-7m4x_1_V
-Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-7b5k_1_Z
-Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-7m4u_1_A
-Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
-
-7n06_1_G
-Sequence is too short. (< 5 resolved nts)
-
-7n06_1_H
-Sequence is too short. (< 5 resolved nts)
-
-7n06_1_I
-Sequence is too short. (< 5 resolved nts)
-
-7n06_1_J
-Sequence is too short. (< 5 resolved nts)
-
-7n06_1_K
-Sequence is too short. (< 5 resolved nts)
-
-7n06_1_L
-Sequence is too short. (< 5 resolved nts)
-
-7n33_1_G
-Sequence is too short. (< 5 resolved nts)
-
-7n33_1_H
-Sequence is too short. (< 5 resolved nts)
-
-7n33_1_I
-Sequence is too short. (< 5 resolved nts)
-
-7n33_1_J
-Sequence is too short. (< 5 resolved nts)
-
-7n33_1_K
-Sequence is too short. (< 5 resolved nts)
-
-7n33_1_L
-Sequence is too short. (< 5 resolved nts)
-
--- a/statistics.py
View file @511a1a1
+++ b/statistics.py
View file @511a1a1
@@ -7,38 +7,27 @@
 # Run this file if you want the base counts, pair-type counts, identity percents, etc
 # in the database.
-import getopt, os, pickle, sqlite3, shlex, subprocess, sys, warnings
+import getopt, glob, json, os, sqlite3, shlex, subprocess, sys, warnings
 import numpy as np
 import pandas as pd
-import threading as th
 import scipy.stats as st
 import matplotlib
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
 import matplotlib.patches as mpatches
 import scipy.cluster.hierarchy as sch
-import sklearn
-import json
-import glob
-import pickle
-import Bio
 from scipy.spatial.distance import squareform
 from mpl_toolkits.mplot3d import axes3d
 from Bio import AlignIO, SeqIO
 from Bio.PDB.MMCIFParser import MMCIFParser
-from Bio.PDB.vectors import Vector, calc_angle, calc_dihedral
 from functools import partial
-from multiprocessing import Pool, Manager
+from multiprocessing import Pool, Manager, Value
 from os import path
 from tqdm import tqdm
 from collections import Counter
 from setproctitle import setproctitle
 from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker, trace_unhandled_exceptions
-from sklearn.mixture import GaussianMixture
+from geometric_stats import *
-import warnings
-from pandas.core.common import SettingWithCopyWarning
-from joblib import Parallel, delayed
-
 np.set_printoptions(threshold=sys.maxsize, linewidth=np.inf, precision=8)
 path_to_3D_data = "tobedefinedbyoptions"
@@ -928,6 +917,7 @@ def general_stats():
     fig.savefig(runDir + "/results/figures/Nfamilies.png")
     plt.close()
+@trace_unhandled_exceptions
 def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
     # Identify the right 3D file
@@ -1135,11 +1125,6 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False):
         setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
     return 0
-def log_to_pbar(pbar):
-    def update(r):
-        pbar.update(1)
-    return update
-
 def family_order(f):
     # sort the RNA families so that the plots are readable
@@ -1154,70 +1139,6 @@ def family_order(f):
     else:
         return 2
-def conversion_angles(bdd): 
-    """
-    Convert database torsion angles to degrees
-    and put them in a list to reuse for statistics
-    """
-    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-    db_path = os.path.join(BASE_DIR, bdd)
-    baseDeDonnees = sqlite3.connect(db_path)
-    curseur = baseDeDonnees.cursor()
-    curseur.execute("SELECT chain_id, nt_name, alpha, beta, gamma, delta, epsilon, zeta, chi FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U' ;")
-    liste=[]
-    for nt in curseur.fetchall(): # retrieve the angle measurements and put them in a list
-        liste.append(nt)
-    angles_torsion=[]
-    for nt in liste :
-        angles_deg=[]
-        angles_deg.append(nt[0]) #chain_id
-        angles_deg.append(nt[1]) #nt_name
-        for i in range (2,9): # on all angles
-            angle=0
-            if nt[i] == None : 
-                angle=None
-            elif nt[i]<=np.pi: #if angle value <pi, positive
-                angle=(180/np.pi)*nt[i]
-            elif np.pi < nt[i] <= 2*np.pi : #if value of the angle between pi and 2pi, negative
-                angle=((180/np.pi)*nt[i])-360
-            else :
-                angle=nt[i] # in case some angles still in degrees
-            angles_deg.append(angle)
-        angles_torsion.append(angles_deg)
-    return angles_torsion
-
-def conversion_eta_theta(bdd):
-    """
-    Convert database pseudotorsion angles to degrees
-    and put them in a list to reuse for statistics
-    """
-    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-    db_path = os.path.join(BASE_DIR, bdd)
-    baseDeDonnees = sqlite3.connect(db_path)
-    curseur = baseDeDonnees.cursor()
-    curseur.execute("SELECT chain_id, nt_name, eta, theta, eta_prime, theta_prime, eta_base, theta_base FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U';")
-    liste=[]
-    for nt in curseur.fetchall(): 
-        liste.append(nt)
-    angles_virtuels=[]
-    for nt in liste :
-        angles_deg=[]
-        angles_deg.append(nt[0]) #chain_id
-        angles_deg.append(nt[1]) #nt_name
-        for i in range (2,8): 
-            angle=0
-            if nt[i] == None : 
-                angle=None
-            elif nt[i]<=np.pi:
-                angle=(180/np.pi)*nt[i]
-            elif np.pi < nt[i] <= 2*np.pi : 
-                angle=((180/np.pi)*nt[i])-360
-            else :
-                angle=nt[i] 
-            angles_deg.append(angle)
-        angles_virtuels.append(angles_deg)
-    return angles_virtuels
-
 def nt_3d_centers(cif_file, consider_all_atoms):
     """Return the nucleotides' coordinates, summarizing a nucleotide by only one point.
     If consider_all_atoms : barycentre is used
@@ -1252,1674 +1173,30 @@ def nt_3d_centers(cif_file, consider_all_atoms):
                     result.append(res)
     return(result)
-def liste_repres(fpath):
+def representatives_from_nrlist(res):
-    repres=[]
+    nr_code = min([i for i in [1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 20.0] if i >= res])
-    df=pd.read_csv(os.path.abspath(fpath))
+    fpath = f"/home/data/RNA/3D/latest_nr_list_{nr_code}A.csv"
+    repres = []
+    df = pd.read_csv(os.path.abspath(fpath))
     for i in range(df.shape[0]):
-        up_name=df["representative"][i]
+        up_name = df["representative"][i]
         if '+' in up_name:
-            up_name=up_name.split('+')
+            up_name = up_name.split('+')
             for i in range(len(up_name)):
-                chain=up_name[i].split('|')
+                chain = up_name[i].split('|')
-                chain=chain[0].lower()+'_'+chain[1]+'_'+chain[2]
+                chain = chain[0].lower() + '_' + chain[1] + '_' + chain[2]
-                repres.append(chain+'.cif')
+                repres.append(chain + '.cif')
         else :
-            up_name=up_name.split('|')
+            up_name = up_name.split('|')
-            low_name=up_name[0].lower()+'_'+up_name[1]+'_'+up_name[2]
+            low_name = up_name[0].lower() + '_' + up_name[1] + '_' + up_name[2]
-            repres.append(low_name+'.cif') 
+            repres.append(low_name + '.cif') 
     return repres
-
+def log_to_pbar(pbar):
-def get_euclidian_distance(L1, L2):
+    def update(r):
-    """
-    Returns the distance between two points (coordinates in lists)
-    """
-
-    if len(L1)*len(L2) == 0:
-        return np.nan
-    
-    if len(L1) == 1:
-        L1 = L1[0]
-    if len(L2) == 1:
-        L2 = L2[0]
-
-    e = 0
-    for i in range(len(L1)):
-        try:
-            e += float(L1[i] - L2[i])**2
-        except TypeError:
-            print("Terms: ", L1, L2)
-        except IndexError:
-            print("Terms: ", L1, L2)
-
-    return np.sqrt(e)
-
-def get_flat_angle(L1, L2, L3):
-    if len(L1)*len(L2)*len(L3) == 0:
-        return np.nan
-
-    return calc_angle(Vector(L1[0]), Vector(L2[0]), Vector(L3[0]))*(180/np.pi)
-
-def get_torsion_angle(L1, L2, L3, L4):
-    if len(L1)*len(L2)*len(L3)*len(L4) == 0:
-        return np.nan
-    
-    return calc_dihedral(Vector(L1[0]), Vector(L2[0]), Vector(L3[0]), Vector(L4[0]))*(180/np.pi)
-
-def pos_b1(res):
-    """
-    Returns the coordinates of virtual atom B1 (center of the first aromatic cycle)
-    """
-    coordb1=[]
-    somme_x_b1=0
-    somme_y_b1=0
-    somme_z_b1=0
-    moy_x_b1=0
-    moy_y_b1=0
-    moy_z_b1=0
-    #different cases
-    #some residues have 2 aromatic cycles 
-    if res.get_resname() in ['A', 'G', '2MG', '7MG', 'MA6', '6IA', 'OMG' , '2MA', 'B9B', 'A2M', '1MA', 'E7G', 'P7G', 'B8W', 'B8K', 'BGH', '6MZ', 'E6G', 'MHG', 'M7A', 'M2G', 'P5P', 'G7M', '1MG', 'T6A', 'MIA', 'YG', 'YYG', 'I', 'DG', 'N79', '574', 'DJF', 'AET', '12A', 'ANZ', 'UY4'] :  
-        c=0
-        names=[]
-        for atom in res : 
-            if (atom.get_fullname() in ['N9', 'C8', 'N7', 'C4', 'C5']) :
-                c=c+1
-                names.append(atom.get_name())
-                coord=atom.get_vector()
-                somme_x_b1=somme_x_b1+coord[0]
-                somme_y_b1=somme_y_b1+coord[1]
-                somme_z_b1=somme_z_b1+coord[2]
-            else : 
-                c=c
-        #calcul coord B1
-        if c != 0 :
-            moy_x_b1=somme_x_b1/c
-            moy_y_b1=somme_y_b1/c
-            moy_z_b1=somme_z_b1/c
-            coordb1.append(moy_x_b1)
-            coordb1.append(moy_y_b1)
-            coordb1.append(moy_z_b1)
-    #others have only one cycle
-    if res.get_resname() in ['C', 'U', 'AG9', '70U', '1RN', 'RSP', '3AU', 'CM0', 'U8U', 'IU', 'E3C', '4SU', '5HM', 'LV2', 'LHH', '4AC', 'CH', 'Y5P', '2MU', '4OC', 'B8T', 'JMH', 'JMC', 'DC', 'B9H', 'UR3', 'I4U', 'B8Q', 'P4U', 'OMU', 'OMC', '5MU', 'H2U', 'CBV', 'M1Y', 'B8N', '3TD', 'B8H'] :
-        c=0
-        for atom in res :
-            if (atom.get_fullname() in ['C6', 'N3', 'N1', 'C2', 'C4', 'C5']):
-                c=c+1
-                coord=atom.get_vector()
-                somme_x_b1=somme_x_b1+coord[0]
-                somme_y_b1=somme_y_b1+coord[1]
-                somme_z_b1=somme_z_b1+coord[2]
-        #calcul coord B1
-        if c != 0 :
-            moy_x_b1=somme_x_b1/c
-            moy_y_b1=somme_y_b1/c
-            moy_z_b1=somme_z_b1/c
-            coordb1.append(moy_x_b1)
-            coordb1.append(moy_y_b1)
-            coordb1.append(moy_z_b1)
-
-    if len(coordb1):
-        return [coordb1]
-    else:
-        return []
-
-def pos_b2(res):
-    """
-    Returns the coordinates of virtual atom B2 (center of the second aromatic cycle, if exists)
-    """
-    coordb2=[]
-    somme_x_b2=0
-    somme_y_b2=0
-    somme_z_b2=0
-    moy_x_b2=0
-    moy_y_b2=0
-    moy_z_b2=0
-
-    if res.get_resname() in ['A', 'G', '2MG', '7MG', 'MA6', '6IA', 'OMG' , '2MA', 'B9B', 'A2M', '1MA', 'E7G', 'P7G', 'B8W', 'B8K', 'BGH', '6MZ', 'E6G', 'MHG', 'M7A', 'M2G', 'P5P', 'G7M', '1MG', 'T6A', 'MIA', 'YG', 'YYG', 'I', 'DG', 'N79', '574', 'DJF', 'AET', '12A', 'ANZ', 'UY4'] :  #2 cycles aromatiques
-        c=0
-        for atom in res :
-            if atom.get_fullname() in ['C6', 'N3', 'N1', 'C2', 'C4', 'C5'] :
-                c=c+1
-                coord=atom.get_vector()
-                somme_x_b2=somme_x_b2+coord[0]
-                somme_y_b2=somme_y_b2+coord[1]
-                somme_z_b2=somme_z_b2+coord[2]
-        #calcul coord B2
-        if c!=0 :
-            moy_x_b2=somme_x_b2/c
-            moy_y_b2=somme_y_b2/c
-            moy_z_b2=somme_z_b2/c
-            coordb2.append(moy_x_b2)
-            coordb2.append(moy_y_b2)
-            coordb2.append(moy_z_b2)
-    if len(coordb2):
-        return [coordb2]
-    else:
-        return []
-
-@trace_unhandled_exceptions
-def basepair_measures(res, pair):
-    """
-    measurement of the flat angles describing a basepair in the HiRE-RNA model
-    """
-    if res.get_resname()=='C' or res.get_resname()=='U' :
-        atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] 
-        atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
-        atom_b1_res = pos_b1(res)
-        if not len(atom_c4_res) or not len(atom_c1p_res) or not len(atom_b1_res):
-            return
-        a3_res = Vector(atom_c4_res[0])
-        a2_res = Vector(atom_c1p_res[0])
-        a1_res = Vector(atom_b1_res[0])
-    if res.get_resname()=='A' or res.get_resname()=='G' :
-        atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
-        atom_b1_res = pos_b1(res)
-        atom_b2_res = pos_b2(res)
-        if not len(atom_c1p_res) or not len(atom_b1_res) or not len(atom_b2_res): 
-            return
-        a3_res = Vector(atom_c1p_res[0])
-        a2_res = Vector(atom_b1_res[0])
-        a1_res = Vector(atom_b2_res[0])
-        
-    if pair.get_resname()=='C' or pair.get_resname()=='U' :
-        atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ]
-        atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
-        atom_b1_pair = pos_b1(pair)
-        if not len(atom_c4_pair) or not len(atom_c1p_pair) or not len(atom_b1_pair):
-            return
-        a3_pair = Vector(atom_c4_pair[0])
-        a2_pair = Vector(atom_c1p_pair[0])
-        a1_pair = Vector(atom_b1_pair[0])
-    if pair.get_resname()=='A' or pair.get_resname()=='G' :
-        atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
-        atom_b1_pair = pos_b1(pair)
-        atom_b2_pair = pos_b2(pair)
-        if not len(atom_c1p_pair) or not len(atom_b1_pair) or not len(atom_b2_pair): # No C1' atom in the paired nucleotide, skip measures.
-            return
-        a3_pair = Vector(atom_c1p_pair[0])
-        a2_pair = Vector(atom_b1_pair[0])
-        a1_pair = Vector(atom_b2_pair[0])
-
-    # Bond vectors
-    res_32 = a3_res - a2_res
-    res_12 = a1_res - a2_res
-    pair_32 = a3_pair - a2_pair
-    pair_12 = a1_pair - a2_pair
-    rho = a1_res - a1_pair # from pair to res
-
-    # dist
-    dist = rho.norm()
-
-    # we calculate the 2 plane angles
-    with warnings.catch_warnings():
-        warnings.simplefilter('ignore', RuntimeWarning)
-        b = res_12.angle(rho)*(180/np.pi)   # equal to the previous implementation
-        c = pair_12.angle(-rho)*(180/np.pi) #
-    # a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi)    # not required
-    # b = calc_angle(a2_res, a1_res, a1_pair)*(180/np.pi)
-    # c = calc_angle(a1_res, a1_pair, a2_pair)*(180/np.pi)
-    # d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi)   # not required
-
-    # Compute plane vectors
-    n1 = (res_32**res_12).normalized() # ** between vectors, is the cross product
-    n2 = (pair_32**pair_12).normalized()
-
-    # Distances between base tip and the other base's plane (orthogonal projection)
-    # if angle(rho, n) > pi/2 the distance is negative (signed following n)
-    d1 = rho*n1 # projection of rho on axis n1
-    d2 = rho*n2
-
-    # Now the projection of rho in the planes. It's just a sum of the triangles' two other edges.
-    p1 = (-rho+n1**d1).normalized() # between vector and scalar, ** is the multiplication by a scalar
-    p2 = (rho-n2**d2).normalized()
-
-    # Measure tau, the dihedral
-    u = (res_12**rho).normalized()
-    v = (rho**pair_12).normalized()
-    cosTau1 = n1*u
-    cosTau2 = v*n2 
-    
-    # cosTau is enough to compute alpha, but we can't distinguish
-    # yet betwwen tau and -tau. If the full computation if required, then:
-    tau1 = np.arccos(cosTau1)*(180/np.pi)
-    tau2 = np.arccos(cosTau2)*(180/np.pi)
-    w1 = u**n1
-    w2 = v**n2
-    if res_12*w1 < 0:
-        tau1 = -tau1
-    if pair_12*w2 < 0:
-        tau2 = -tau2
-
-    # And finally, the a1 and a2 angles between res_12 and p1 / pair_12 and p2
-    with warnings.catch_warnings():
-        warnings.simplefilter('ignore', RuntimeWarning)
-        a1 = (-res_12).angle(p1)*(180/np.pi)
-        a2 = (-pair_12).angle(p2)*(180/np.pi)
-    if cosTau1 > 0:
-        # CosTau > 0 (Tau < 90 or Tau > 270) implies that alpha > 180.
-        a1 = -a1
-    if cosTau2 > 0:
-        a2 = -a2
-
-    return [dist, b, c, d1, d2, a1, a2, tau1, tau2]
-
-@trace_unhandled_exceptions
-def measure_from_structure(f):
-    """
-    Do geometric measures required on a given filename
-    """
-
-    name = f.split('.')[0]
-
-    global idxQueue
-    thr_idx = idxQueue.get()
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measure_from_structure({f})")
-
-    # Open the structure 
-    with warnings.catch_warnings():
-        # Ignore the PDB problems. This mostly warns that some chain is discontinuous.
-        warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.PDBConstructionWarning)
-        warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.BiopythonWarning)
-        parser=MMCIFParser()
-        s = parser.get_structure(f, os.path.abspath(path_to_3D_data+ "rna_only/" + f))
-    
-    #pyle_measures(name, s, thr_idx)
-    #measures_aa(name, s, thr_idx)
-    if DO_HIRE_RNA_MEASURES:
-        measures_hrna(name, s, thr_idx)
-        measures_hrna_basepairs(name, s, thr_idx)
-    if DO_WADLEY_ANALYSIS:
-        #measures_wadley(name, s, thr_idx)
-        pyle_measures(name, s, thr_idx)
-    
-    idxQueue.put(thr_idx) # replace the thread index in the queue
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
-
-@trace_unhandled_exceptions
-def measures_wadley(name, s, thr_idx):
-    """
-    Measures the distances and plane angles involving C1' and P atoms 
-    Saves the results in a dataframe
-    """
-
-    # do not recompute something already computed
-    if (path.isfile(runDir + '/results/geometry/Pyle/angles/flat_angles_pyle_' + name + '.csv') and
-        path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")):
-        return
-
-    liste_dist = []
-    liste_angl = []
-    last_p = []
-    last_c1p = []
-    last_c4p = []
-
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_wadley({name})")
-
-    chain = next(s[0].get_chains())
-    for res in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_wadley", unit="res", leave=False):
-        p_c1p_psuiv = np.nan
-        c1p_psuiv_c1psuiv = np.nan
-        if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] :
-            atom_p = [ atom.get_coord() for atom in res if atom.get_name() ==  "P"]
-            atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
-            atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
-            if len(atom_c1p) > 1:
-                for atom in res:
-                    if "C1'" in atom.get_fullname():
-                        print("\n", atom.get_fullname(), "-", res.get_resname(), "\n")
-
-            p_c1p_psuiv = get_flat_angle(last_p, last_c1p, atom_p)
-            c1p_psuiv_c1psuiv = get_flat_angle(last_c1p, atom_p, atom_c1p)
-            c1p_psuiv = get_euclidian_distance(last_c1p, atom_p)
-            p_c1p = get_euclidian_distance(atom_p, atom_c1p)
-            c4p_psuiv = get_euclidian_distance(last_c4p, atom_p)
-            p_c4p = get_euclidian_distance(atom_p, atom_c4p)
-
-            last_p = atom_p
-            last_c1p = atom_c1p
-            last_c4p = atom_c4p
-
-            liste_dist.append([res.get_resname(), c1p_psuiv, p_c1p, c4p_psuiv, p_c4p])
-            liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv])
-
-    df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"])
-    df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")
-    df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"])
-    df.to_csv(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle_"+name+".csv")
-
-@trace_unhandled_exceptions
-def measures_aa(name, s, thr_idx):
-    """
-    Measures the distance between atoms linked by covalent bonds
-    """
-
-    # do not recompute something already computed
-    if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms_"+name+".csv"):
-        return
-    
-    last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide
-    liste_common = []
-    liste_purines = []
-    liste_pyrimidines = []
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measure_aa_dists({name})")
-
-    chain = next(s[0].get_chains()) # 1 chain per file
-    residues = list(chain.get_residues())
-    pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measure_aa_dists", unit="res", leave=False)
-    pbar.update(0)
-    for res in chain :
-        
-        # for residues A, G, C, U
-        op3_p=[]
-        p_op1=[]
-        p_op2=[]
-        p_o5p=[]
-        o5p_c5p=[]
-        c5p_c4p=[]
-        c4p_o4p=[]
-        o4p_c1p=[]
-        c1p_c2p=[]
-        c2p_o2p=[]
-        c2p_c3p=[]
-        c3p_o3p=[]
-        c4p_c3p=[]
-        
-        #if res = A or G
-        c1p_n9=None
-        n9_c8=None
-        c8_n7=None
-        n7_c5=None
-        c5_c6=None
-        c6_n1=None
-        n1_c2=None
-        c2_n3=None
-        n3_c4=None
-        c4_n9=None
-        c4_c5=None
-        #if res=G
-        c6_o6=None
-        c2_n2=None
-        #if res = A
-        c6_n6=None
-
-        #if res = C or U
-        c1p_n1=None
-        n1_c6=None
-        c6_c5=None
-        c5_c4=None
-        c4_n3=None
-        n3_c2=None
-        c2_n1=None
-        c2_o2=None
-        #if res =C
-        c4_n4=None
-        #if res=U
-        c4_o4=None
-        last_o3p_p=None
-
-
-        if res.get_resname()=='A' or res.get_resname()=='G' or res.get_resname()=='C' or res.get_resname()=='U' :
-            #get the coordinates of the atoms
-            atom_p = [ atom.get_coord() for atom in res if atom.get_name() ==  "P"]
-            atom_op3 = [ atom.get_coord() for atom in res if "OP3" in atom.get_fullname() ]
-            atom_op1 = [ atom.get_coord() for atom in res if "OP1" in atom.get_fullname() ]
-            atom_op2 = [ atom.get_coord() for atom in res if "OP2" in atom.get_fullname() ]
-            atom_o5p= [ atom.get_coord() for atom in res if "O5'" in atom.get_fullname() ]
-            atom_c5p = [ atom.get_coord() for atom in res if "C5'" in atom.get_fullname() ]
-            atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
-            atom_o4p = [ atom.get_coord() for atom in res if "O4'" in atom.get_fullname() ]
-            atom_c3p = [ atom.get_coord() for atom in res if "C3'" in atom.get_fullname() ]
-            atom_o3p = [ atom.get_coord() for atom in res if "O3'" in atom.get_fullname() ]
-            atom_c2p = [ atom.get_coord() for atom in res if "C2'" in atom.get_fullname() ]
-            atom_o2p = [ atom.get_coord() for atom in res if "O2'" in atom.get_fullname() ]
-            atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
-            atom_n9 = [ atom.get_coord() for atom in res if "N9" in atom.get_fullname() ]
-            atom_c8 = [ atom.get_coord() for atom in res if "C8" in atom.get_fullname() ]
-            atom_n7 = [ atom.get_coord() for atom in res if "N7" in atom.get_fullname() ]
-            atom_c5 = [ atom.get_coord() for atom in res if atom.get_name() ==  "C5"]
-            atom_c6 = [ atom.get_coord() for atom in res if "C6" in atom.get_fullname() ]
-            atom_o6 = [ atom.get_coord() for atom in res if "O6" in atom.get_fullname() ]
-            atom_n6 = [ atom.get_coord() for atom in res if "N6" in atom.get_fullname() ]
-            atom_n1 = [ atom.get_coord() for atom in res if "N1" in atom.get_fullname() ]
-            atom_c2 = [ atom.get_coord() for atom in res if atom.get_name() ==  "C2"]
-            atom_n2 = [ atom.get_coord() for atom in res if "N2" in atom.get_fullname() ]
-            atom_o2 = [ atom.get_coord() for atom in res if atom.get_name() ==  "O2"]
-            atom_n3 = [ atom.get_coord() for atom in res if "N3" in atom.get_fullname() ]
-            atom_c4 = [ atom.get_coord() for atom in res if atom.get_name() ==  "C4" ]
-            atom_n4 = [ atom.get_coord() for atom in res if "N4" in atom.get_fullname() ]
-            atom_o4 = [ atom.get_coord() for atom in res if atom.get_name() == "O4"]
-            
-            op3_p = get_euclidian_distance(atom_op3, atom_p)
-            last_o3p_p = get_euclidian_distance(last_o3p, atom_p) # link with the previous nucleotide
-            p_op1 = get_euclidian_distance(atom_op1, atom_p)
-            p_op2 = get_euclidian_distance(atom_op2, atom_p)
-            p_o5p = get_euclidian_distance(atom_o5p, atom_p)
-            o5p_c5p = get_euclidian_distance(atom_o5p, atom_c5p)
-            c5p_c4p = get_euclidian_distance(atom_c5p, atom_c4p)
-            c4p_o4p = get_euclidian_distance(atom_c4p, atom_o4p)
-            c4p_c3p = get_euclidian_distance(atom_c4p, atom_c3p)
-            o4p_c1p = get_euclidian_distance(atom_o4p, atom_c1p)
-            c1p_c2p = get_euclidian_distance(atom_c1p, atom_c2p)
-            c2p_o2p = get_euclidian_distance(atom_c2p, atom_o2p)
-            c2p_c3p = get_euclidian_distance(atom_c2p, atom_c3p)
-            c3p_o3p = get_euclidian_distance(atom_c3p, atom_o3p)
-
-            last_o3p=atom_o3p # o3' of this residue becomes the previous o3' of the following
-            
-            #different cases for the aromatic cycles
-            if res.get_resname()=='A' or res.get_resname()=='G': 
-                # computes the distances between atoms of aromatic cycles
-                c1p_n9 = get_euclidian_distance(atom_c1p, atom_n9)
-                n9_c8 = get_euclidian_distance(atom_n9, atom_c8)
-                c8_n7 = get_euclidian_distance(atom_c8, atom_n7)
-                n7_c5 = get_euclidian_distance(atom_n7, atom_c5)
-                c5_c6 = get_euclidian_distance(atom_c5, atom_c6)
-                c6_o6 = get_euclidian_distance(atom_c6, atom_o6)
-                c6_n6 = get_euclidian_distance(atom_c6, atom_n6)
-                c6_n1 = get_euclidian_distance(atom_c6, atom_n1)
-                n1_c2 = get_euclidian_distance(atom_n1, atom_c2)
-                c2_n2 = get_euclidian_distance(atom_c2, atom_n2)
-                c2_n3 = get_euclidian_distance(atom_c2, atom_n3)
-                n3_c4 = get_euclidian_distance(atom_n3, atom_c4)
-                c4_n9 = get_euclidian_distance(atom_c4, atom_n9)
-                c4_c5 = get_euclidian_distance(atom_c4, atom_c5)
-            if res.get_resname()=='C' or res.get_resname()=='U' :
-                c1p_n1 = get_euclidian_distance(atom_c1p, atom_n1)
-                n1_c6 = get_euclidian_distance(atom_n1, atom_c6)
-                c6_c5 = get_euclidian_distance(atom_c6, atom_c5)
-                c5_c4 = get_euclidian_distance(atom_c5, atom_c4)
-                c4_n3 = get_euclidian_distance(atom_c4, atom_n3)
-                n3_c2 = get_euclidian_distance(atom_n3, atom_c2)
-                c2_o2 = get_euclidian_distance(atom_c2, atom_o2)
-                c2_n1 = get_euclidian_distance(atom_c2, atom_n1)
-                c4_n4 = get_euclidian_distance(atom_c4, atom_n4)
-                c4_o4 = get_euclidian_distance(atom_c4, atom_o4)
-
-            liste_common.append([res.get_resname(), last_o3p_p, op3_p, p_op1, p_op2, p_o5p, o5p_c5p, c5p_c4p, c4p_o4p, c4p_c3p, o4p_c1p, c1p_c2p, c2p_o2p, c2p_c3p, c3p_o3p] )
-            liste_purines.append([c1p_n9, n9_c8, c8_n7, n7_c5, c5_c6, c6_o6, c6_n6, c6_n1, n1_c2, c2_n2, c2_n3, n3_c4, c4_n9, c4_c5])
-            liste_pyrimidines.append([c1p_n1, n1_c6, c6_c5, c5_c4, c4_n3, n3_c2, c2_o2, c2_n1, c4_n4, c4_o4])
-            pbar.update(1)
-
-    df_comm=pd.DataFrame(liste_common, columns=["Residu", "O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'"])
-    df_pur=pd.DataFrame(liste_purines, columns=["C1'-N9", "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5" ])
-    df_pyr=pd.DataFrame(liste_pyrimidines, columns=["C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"])
-    df=pd.concat([df_comm, df_pur, df_pyr], axis = 1)
-    pbar.close()
-    
-    df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms_" + name + ".csv")
-
-@trace_unhandled_exceptions
-def measures_hrna(name, s, thr_idx):
-    """
-    Measures the distance/angles between the atoms of the HiRE-RNA model linked by covalent bonds
-    """
-    
-    # do not recompute something already computed
-    if (path.isfile(runDir + '/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA '+name+'.csv') and 
-       path.isfile(runDir + '/results/geometry/HiRE-RNA/angles/angles_hire_RNA '+name+'.csv') and 
-       path.isfile(runDir + '/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA '+name+'.csv')):
-        return
-
-    liste_dist=[]
-    liste_angl = []
-    liste_tors = []
-    last_c4p = []
-    last_c5p = []
-    last_c1p = []
-    last_o5p = []
-
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_hrna({name})")
-
-    chain = next(s[0].get_chains())
-    residues=list(chain.get_residues())
-    for res in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna", unit="res", leave=False):
-        # distances
-        p_o5p = None
-        o5p_c5p = None
-        c5p_c4p = None
-        c4p_c1p = None
-        c1p_b1 = None
-        b1_b2 = None
-        last_c4p_p = np.nan
-        
-        # angles
-        p_o5p_c5p = None
-        o5p_c5p_c4p = None
-        c5p_c4p_c1p = None
-        c4p_c1p_b1 = None
-        c1p_b1_b2 = None
-        lastc4p_p_o5p = None
-        lastc5p_lastc4p_p = None
-        lastc1p_lastc4p_p = None
-
-        # torsions
-        p_o5_c5_c4 = np.nan
-        o5_c5_c4_c1 = np.nan
-        c5_c4_c1_b1 = np.nan
-        c4_c1_b1_b2 = np.nan
-        o5_c5_c4_psuiv = np.nan
-        c5_c4_psuiv_o5suiv = np.nan
-        c4_psuiv_o5suiv_c5suiv = np.nan
-        c1_c4_psuiv_o5suiv = np.nan
-
-        if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] : # several phosphate groups, ignore
-            atom_p = [ atom.get_coord() for atom in res if atom.get_name() ==  "P"]
-            atom_o5p= [ atom.get_coord() for atom in res if "O5'" in atom.get_fullname() ]
-            atom_c5p = [ atom.get_coord() for atom in res if "C5'" in atom.get_fullname() ]
-            atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
-            atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
-            atom_b1 = pos_b1(res) # position b1 to be calculated, depending on the case
-            atom_b2 = pos_b2(res) # position b2 to be calculated only for those with 2 cycles
-
-            # Distances. If one of the atoms is empty, the euclidian distance returns NaN.
-            last_c4p_p = get_euclidian_distance(last_c4p, atom_p)
-            p_o5p = get_euclidian_distance(atom_p, atom_o5p)
-            o5p_c5p = get_euclidian_distance(atom_o5p, atom_c5p)
-            c5p_c4p = get_euclidian_distance(atom_c5p, atom_c4p)
-            c4p_c1p = get_euclidian_distance(atom_c4p, atom_c1p)
-            c1p_b1 = get_euclidian_distance(atom_c1p, atom_b1)
-            b1_b2 = get_euclidian_distance(atom_b1, atom_b2)
-
-            # flat angles. Same.
-            lastc4p_p_o5p = get_flat_angle(last_c4p, atom_p, atom_o5p)
-            lastc1p_lastc4p_p = get_flat_angle(last_c1p, last_c4p, atom_p)
-            lastc5p_lastc4p_p = get_flat_angle(last_c5p, last_c4p, atom_p)
-            p_o5p_c5p = get_flat_angle(atom_p, atom_o5p, atom_c5p)
-            o5p_c5p_c4p = get_flat_angle(atom_o5p, atom_c5p, atom_c4p)
-            c5p_c4p_c1p = get_flat_angle(atom_c5p, atom_c4p, atom_c1p)
-            c4p_c1p_b1 = get_flat_angle(atom_c4p, atom_c1p, atom_b1)
-            c1p_b1_b2 = get_flat_angle(atom_c1p, atom_b1, atom_b2)
-
-            # torsions. Idem.
-            p_o5_c5_c4 = get_torsion_angle(atom_p, atom_o5p, atom_c5p, atom_c4p)
-            o5_c5_c4_c1 = get_torsion_angle(atom_o5p, atom_c5p, atom_c4p, atom_c1p)
-            c5_c4_c1_b1 = get_torsion_angle(atom_c5p, atom_c4p, atom_c1p, atom_b1)
-            c4_c1_b1_b2 = get_torsion_angle(atom_c4p, atom_c1p, atom_b1, atom_b2)
-            o5_c5_c4_psuiv = get_torsion_angle(last_o5p, last_c5p, last_c4p, atom_p)
-            c5_c4_psuiv_o5suiv = get_torsion_angle(last_c5p, last_c4p, atom_p, atom_o5p)
-            c4_psuiv_o5suiv_c5suiv = get_torsion_angle(last_c4p, atom_p, atom_o5p, atom_c5p)
-            c1_c4_psuiv_o5suiv = get_torsion_angle(last_c1p, last_c4p, atom_p, atom_o5p)
-
-            last_c4p = atom_c4p
-            last_c5p = atom_c5p
-            last_c1p = atom_c1p
-            last_o5p = atom_o5p
-            liste_dist.append([res.get_resname(), last_c4p_p, p_o5p, o5p_c5p, c5p_c4p, c4p_c1p, c1p_b1, b1_b2])
-            liste_angl.append([res.get_resname(), lastc4p_p_o5p, lastc1p_lastc4p_p, lastc5p_lastc4p_p, p_o5p_c5p, o5p_c5p_c4p, c5p_c4p_c1p, c4p_c1p_b1, c1p_b1_b2])
-            liste_tors.append([res.get_resname(), p_o5_c5_c4, o5_c5_c4_c1, c5_c4_c1_b1, c4_c1_b1_b2, o5_c5_c4_psuiv, c5_c4_psuiv_o5suiv, c4_psuiv_o5suiv_c5suiv, c1_c4_psuiv_o5suiv])
-    df = pd.DataFrame(liste_dist, columns=["Residu", "C4'-P", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2"])
-    df.to_csv(runDir + '/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA '+name+'.csv')
-    df = pd.DataFrame(liste_angl, columns=["Residu", "C4'-P-O5'", "C1'-C4'-P", "C5'-C4'-P", "P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2"])
-    df.to_csv(runDir + '/results/geometry/HiRE-RNA/angles/angles_hire_RNA ' + name + ".csv")
-    df=pd.DataFrame(liste_tors, columns=["Residu", "P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "O5'-C5'-C4'-P°", "C5'-C4'-P°-O5'°", "C4'-P°-O5'°-C5'°", "C1'-C4'-P°-O5'°"])
-    df.to_csv(runDir + '/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA '+name+'.csv')
-
-@trace_unhandled_exceptions
-def measures_hrna_basepairs(name, s, thr_idx):
-    """
-    Open a rna_only/ file, and run measures_hrna_basepairs_chain() on every chain
-    """  
-
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_hrna_basepairs({name})")
-    
-    l=[]
-    chain = next(s[0].get_chains())
-            
-    # do not recompute something already computed
-    if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs_"+name+".csv"):
-        return
-
-    df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name))
-
-    if df['index_chain'][0] == 1: # ignore files with numbering errors : TODO : remove when we get DSSR Pro, there should not be numbering errors anymore
-        l = measures_hrna_basepairs_chain(name, chain, df, thr_idx)
-    df_calc = pd.DataFrame(l, columns=["type_LW", "nt1_idx", "nt1_res", "nt2_idx", "nt2_res", "Distance", 
-                                        "211_angle", "112_angle", "dB1", "dB2", "alpha1", "alpha2", "3211_torsion", "1123_torsion"])
-    df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs_' + name + '.csv', float_format="%.3f")
-
-@trace_unhandled_exceptions
-def measures_hrna_basepairs_chain(name, chain, df, thr_idx):
-    """
-    Cleanup of the dataset
-    measurements of distances and angles between paired nucleotides in the chain
-    """
-
-    results = []
-    warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
-
-    pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep
-    for i in range(pairs.shape[0]): # we remove the lines where no pairing (NaN in paired)
-        index_with_nan = pairs.index[pairs.iloc[:,2].isnull()]
-        pairs.drop(index_with_nan, 0, inplace=True)
-
-    paired_int = []
-    for i in pairs.index:   # convert values from paired to integers or lists of integers
-        paired = pairs.at[i, 'paired']
-        if type(paired) is np.int64 or type(paired) is np.float64:
-            paired_int.append(int(paired))
-        else :  #strings
-            if len(paired) < 3: # a single pairing
-                paired_int.append(int(paired))         
-            else : # several pairings
-                paired = paired.split(',')
-                l = [ int(i) for i in paired ]
-                paired_int.append(l)
-
-    pair_type_LW_bis = []
-    for j in pairs.index:
-        pair_type_LW = pairs.at[j, 'pair_type_LW']
-        if len(pair_type_LW) < 4 : # a single pairing
-            pair_type_LW_bis.append(pair_type_LW)
-        else : # several pairings
-            pair_type_LW = pair_type_LW.split(',')
-            l = [ i for i in pair_type_LW ]
-            pair_type_LW_bis.append(pair_type_LW)
-
-    # addition of these new columns
-    pairs.insert(4, "paired_int", paired_int, True)
-    pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True)
-    
-    indexNames = pairs[pairs['paired_int'] == 0].index
-    pairs.drop(indexNames, inplace=True) # deletion of lines with a 0 in paired_int (matching to another RNA chain)
-
-    for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna_basepairs_chain", unit="res", leave=False):
-        # calculations for each row of the pairs dataset
-        index = pairs.at[i, 'index_chain'] 
-        res1 =  chain[(' ', index, ' ')].get_resname()
-        if res1 not in ['A','C','G','U']:
-            continue
-        type_LW = pairs.at[i, 'pair_type_LW_bis'] # pairing type
-        num_paired = pairs.at[i, 'paired_int'] # number (index_chain) of the paired nucleotide
-        
-        if type(num_paired) is int or type(num_paired) is np.int64:
-            res2 =  chain[(' ', num_paired, ' ')].get_resname()
-            if res2 not in ["A","C","G","U"]:
-                continue
-            measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')])
-            if measures is not None:
-                results.append([type_LW, index, res1, num_paired, res2] + measures)
-        else:
-            for j in range(len(num_paired)): # if several pairings, process them one by one
-                if num_paired[j] != 0:
-                    res2 =  chain[(' ', num_paired[j], ' ')].get_resname()
-                    if res2 not in ["A","C","G","U"]:
-                        continue
-                    measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')])
-                    if measures is not None:
-                        results.append([type_LW[j], index, res1, num_paired[j], res2] + measures)
-
-    return results
-
-@trace_unhandled_exceptions
-def pyle_measures(name, s, thr_idx):
-
-    if (path.isfile(runDir + '/results/geometry/Pyle/distances/distances_pyle_'+name+'.csv')):
-        return
-
-    liste_dist=[]
-    #classes=[]
-    #for i in range(0, 150, 5):
-        #classes.append([i, i+5])
-    #classes.append([150, 300])
-    #occur_p_p=len(classes)*[0]
-    #occur_p_c1=len(classes)*[0]
-    #occur_p_c4=len(classes)*[0]
-    #occur_c1_c1=len(classes)*[0]
-    #occur_c4_c4=len(classes)*[0]
-    #nb_occurs=[]
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} pyle_measures({name})")
-
-    chain = next(s[0].get_chains())
-    #residues=list(chain.get_residues())
-    for res1 in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} pyle_measures", unit="res", leave=False):
-        #res1=chain[i]
-        if res1.get_resname() in ["A", "C", "G", "U"]:
-            resnum1=list(res1.get_id())[1]
-            atom_p_1 = [ atom.get_coord() for atom in res1 if atom.get_name() ==  "P"]
-            atom_c1p_1 = [ atom.get_coord() for atom in res1 if "C1'" in atom.get_fullname() ]
-            atom_c4p_1 = [ atom.get_coord() for atom in res1 if "C4'" in atom.get_fullname() ]
-            for res2 in chain:
-                resnum2=list(res2.get_id())[1]
-                if resnum2-resnum1 < 4 :
-                    continue
-                p_p=np.nan
-                p_c4p=np.nan
-                p_c1p=np.nan
-                c4p_c4p=np.nan
-                c1p_c1p=np.nan
-                #res2=chain[j]
-                if res2.get_resname() in ["A", "C", "G", "U"]:
-                    
-                    atom_p_2 = [ atom.get_coord() for atom in res2 if atom.get_name() ==  "P"]
-                    atom_c1p_2 = [ atom.get_coord() for atom in res2 if "C1'" in atom.get_fullname() ]
-                    atom_c4p_2 = [ atom.get_coord() for atom in res2 if "C4'" in atom.get_fullname() ]
-
-                    p_p = get_euclidian_distance(atom_p_1, atom_p_2)
-                    p_c4p= get_euclidian_distance(atom_p_1, atom_c4p_2)
-                    p_c1p= get_euclidian_distance(atom_p_1, atom_c1p_2)
-                    c4p_c4p= get_euclidian_distance(atom_c4p_1, atom_c4p_2)
-                    c1p_c1p= get_euclidian_distance(atom_c1p_1, atom_c1p_2)
-
-                    liste_dist.append([res1.get_resname(), int(resnum1), res2.get_resname(), int(resnum2), p_p, p_c4p, p_c1p, c4p_c4p, c1p_c1p])
-                    '''
-                    for x in range(len(classes)):
-                        if classes[x][0] <= p_p <= classes[x][1]:
-                            occur_p_p[x]=occur_p_p[x]+1
-                        if classes[x][0] <= p_c4p <= classes[x][1]:
-                            occur_p_c4[x]=occur_p_c4[x]+1                        
-                        if classes[x][0] <= p_c1p <= classes[x][1]:
-                            occur_p_c1[x]=occur_p_c1[x]+1
-                        if classes[x][0] <= c4p_c4p <= classes[x][1]:
-                            occur_c4_c4[x]=occur_c4_c4[x]+1
-                        if classes[x][0] <= c1p_c1p <= classes[x][1]:
-                            occur_c1_c1[x]=occur_c1_c1[x]+1
-                    '''
-    #for x in range(len(classes)):
-        # for i in range(len(liste_dist)):
-        #     if classes[x][0] <= liste_dist[i][4] <= classes[x][1]:
-        #         occur_p_p[x]=occur_p_p[x]+1
-        #     if classes[x][0] <= liste_dist[i][5] <= classes[x][1]:
-        #         occur_p_c4[x]=occur_p_c4[x]+1                        
-        #     if classes[x][0] <= liste_dist[i][6] <= classes[x][1]:
-        #         occur_p_c1[x]=occur_p_c1[x]+1
-        #     if classes[x][0] <= liste_dist[i][7] <= classes[x][1]:
-        #         occur_c4_c4[x]=occur_c4_c4[x]+1
-        #     if classes[x][0] <= liste_dist[i][8] <= classes[x][1]:
-        #         occur_c1_c1[x]=occur_c1_c1[x]+1
-        #nb_occurs.append([classes[x], occur_p_p[x], occur_p_c1[x], occur_p_c4[x], occur_c1_c1[x], occur_c4_c4[x]])
-    #df = pd.DataFrame(nb_occurs, columns=["classe", "P-P", "P-C1'", "P-C4'", "C1'-C1'", "C4'-C4'"])
-    # return df
-    # nb_occurs.append([classes, occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4])
-    # print(nb_occurs)
-    # return nb_occurs
-                    
-
-    df = pd.DataFrame(liste_dist, columns=["res1", "resnum1", "res2", "resnum2", "P-P", "P-C4'", "P-C1'", "C4'-C4'", "C1'-C1'"])
-    df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_pyle_" + name + ".csv")
-
-@trace_unhandled_exceptions
-def count_occur_pyle_dist(fpath):
-    
-    global idxQueue
-    thr_idx = idxQueue.get()
-    setproctitle(f"Worker {thr_idx+1} : Extract occurences of {fpath}")    
-
-    liste=os.listdir(fpath)
-    pbar = tqdm(total=len(liste), position=thr_idx, desc="Preparing ", leave=False)
-    df = pd.read_csv(os.path.abspath(fpath + liste.pop()))
-    occur_p_p=list(df["P-P"])
-    occur_p_c1=list(df["P-C1'"])
-    occur_p_c4=list(df["P-C4'"])
-    occur_c1_c1=list(df["C1'-C1'"])
-    occur_c4_c4=list(df["C4'-C4'"])
-    nb_occurs=[]
-    for f in range(len(liste)):
-        df = pd.read_csv(os.path.abspath(fpath + liste.pop()))
-        # print(liste[f])
-        for k in range(df.shape[0]):
-            occur_p_p[k]=occur_p_p[k]+df["P-P"][k]
-            occur_p_c1[k]=occur_p_c1[k]+df["P-C1'"][k]
-            occur_p_c4[k]=occur_p_c4[k]+df["P-C4'"][k]
-            occur_c1_c1[k]=occur_c1_c1[k]+df["C1'-C1'"][k]
-            occur_c4_c4[k]=occur_c4_c4[k]+df["C4'-C4'"][k]
-        pbar.update(1)
-    nb_occurs=[occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4]
-    # return(list(df["classe"]), occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4)
-    df = pd.DataFrame(nb_occurs, columns=list(df["classe"]))
-
-    df.to_csv(runDir + "/results/geometry/Pyle/classes_dist/occurences_dist.csv")
-    idxQueue.put(thr_idx) # replace the thread index in the queue
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
-
-
-@trace_unhandled_exceptions
-def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) :
-    """
-    Plot Gaussian-Mixture-Model (with or without histograms)
-    """
-    data_ori = np.array(data_ori)
-
-    if toric:
-        # Extend the data on the right and on the left (for angles)
-        data = np.concatenate([data_ori, data_ori-360.0, data_ori+360.0])
-    else:
-        data = data_ori
-
-    # chooses the number of components based on the maximum likelihood value (maxlogv)
-    n_components_range = np.arange(8)+1
-    # aic = []
-    # bic = []
-    maxlogv=[]
-    md = np.array(data).reshape(-1,1)
-    nb_components = 1
-    nb_log_max = n_components_range[0]
-    log_max = 0
-    for n_comp in n_components_range:
-        gmm = GaussianMixture(n_components=n_comp).fit(md)
-        # aic.append(abs(gmm.aic(md)))
-        # bic.append(abs(gmm.bic(md)))
-        maxlogv.append(gmm.lower_bound_)
-        if gmm.lower_bound_== max(maxlogv) : # takes the maximum
-            nb_components = n_comp
-            # if there is convergence, keep the first maximum found
-            if abs(gmm.lower_bound_-log_max) < 0.02 : #threshold=0.02
-                nb_components = nb_log_max
-                break
-        log_max = max(maxlogv)
-        nb_log_max = n_comp
-
-    
-    # Now compute the final GMM
-    obs = np.array(data).reshape(-1,1) # still on extended data
-    g = GaussianMixture(n_components=nb_components)
-    g.fit(obs)
-
-    if toric:
-        # Now decide which to keep
-        keep = []
-        weights = []
-        means = []
-        covariances = []
-        sum_weights = 0.0
-        for m in g.means_:
-            keep.append(m > -180 and m <= 180)
-        for i, w in enumerate(g.weights_):
-            if not keep[i]:
-                continue
-            sum_weights += w
-        for i in range(nb_components):
-            if not keep[i]:
-                continue
-            means.append(g.means_[i])
-            covariances.append(g.covariances_[i])
-            weights.append(g.weights_[i]/sum_weights)
-        nb_components = len(means)
-    else:
-        weights = g.weights_
-        means = g.means_
-        covariances = g.covariances_
-
-    # plot histograms if asked, with the appropriate number of components
-    if hist:
-        plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True)
-    if toric:
-        plt.xlabel("Angle (Degrees)")
-    else:
-        plt.xlabel("Distance (Angströms)")
-    plt.ylabel("Density")
-
-    # Prepare the GMM curve with some absciss points
-    if toric:
-        x = np.linspace(-360.0,360.0,721)
-    else:
-        D = obs.ravel()
-        xmin = D.min()
-        #xmax = min(10.0, D.max())
-        xmax = D.max()
-        x = np.linspace(xmin,xmax,1000)
-    colors=['red', 'blue', 'gold', 'cyan', 'magenta', 'white', 'black', 'green']
-
-    # prepare the dictionary to save the parameters
-    summary_data = {}
-    summary_data["measure"] = name_data
-    summary_data["weights"] = []
-    summary_data["means"] = []
-    summary_data["std"] = []
-
-    # plot
-    curves = []
-    for i in range(nb_components):
-
-        # store the parameters
-        mean = means[i]
-        sigma = np.sqrt(covariances[i])
-        weight = weights[i]
-        summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]"))))
-        summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]"))))
-        summary_data["weights"].append("{:.2f}".format(float(str(weight).strip("[]"))))
-
-        # compute the right x and y data to plot
-        y = weight*st.norm.pdf(x, mean, sigma)
-        if toric:
-            y_mod = (((y[0]+180.0)%360.0)-180.0)
-            x_mod = (((x+180.0)%360.0)-180.0)
-            s = sorted(zip(x_mod,y_mod))
-            newx = []
-            newy = []
-            for k in range(0, len(s), 2):
-                if k == 362.0:
-                    continue # this value is dealt with when k = 360.0
-                # print(k, "summing: ", s[k-int(k>360)], s[k+1-int(k>360)])
-                newx.append(s[k-int(k>360)][0])
-                if k == 360.0:
-                    newy.append(s[k][1]+s[k+1][1]+s[k+2][1])
-                else:
-                    newy.append(s[k-int(k>360)][1]+s[k+1-int(k>360)][1])
-        else:
-            newx = x
-            newy = y[0]
-
-        if hist:
-            # plot on top of the histograms
-            plt.plot(newx, newy, c=colors[i])
-        else:
-            # store for later summation
-            curves.append(np.array(newy))
-
-    if hist:
-        plt.title(f"Histogram of {name_data} with GMM of {nb_components} components (" + str(len(data_ori))+" values)")
-        if save:
-            plt.savefig(f"Histogram_{name_data}_{nb_components}_comps.png")
-            plt.close()
-    else:
-        # Plot their sum, do not save figure yet
-        try:
-            plt.plot(newx, sum(curves), c=col, label=name_data)
-        except TypeError:
-            print("N curves:", len(curves))
-            for c in curves:
-                print(c)
-        plt.legend()
-
-        # Save the json
-        with open(runDir + "/results/geometry/json/" +name_data + ".json", 'w', encoding='utf-8') as f:
-            json.dump(summary_data, f, indent=4)
-
-@trace_unhandled_exceptions
-def gmm_aa_dists():
-    """
-    Draw the figures representing the data on the measurements of distances between atoms
-    """
-
-    setproctitle("GMM (all atoms, distances)")
-
-    df=pd.read_csv(os.path.abspath(runDir + "/results/geometry/all-atoms/distances/dist_atoms.csv"))
-
-    last_o3p_p=list(df["O3'-P"][~ np.isnan(df["O3'-P"])])
-    #print(last_o3p_p)
-    op3_p=list(df["OP3-P"][~ np.isnan(df["OP3-P"])])
-    p_op1=list(df["P-OP1"][~ np.isnan(df["P-OP1"])])
-    p_op2=list(df["P-OP2"][~ np.isnan(df["P-OP2"])])
-    p_o5p=list(df["P-O5'"][~ np.isnan(df["P-O5'"])])
-    o5p_c5p=list(df["O5'-C5'"][~ np.isnan(df["O5'-C5'"])])
-    c5p_c4p=list(df["C5'-C4'"][~ np.isnan(df["C5'-C4'"])])
-    c4p_o4p=list(df["C4'-O4'"][~ np.isnan(df["C4'-O4'"])])
-    o4p_c1p=list(df["O4'-C1'"][~ np.isnan(df["O4'-C1'"])])
-    c1p_c2p=list(df["C1'-C2'"][~ np.isnan(df["C1'-C2'"])])
-    c2p_o2p=list(df["C2'-O2'"][~ np.isnan(df["C2'-O2'"])])
-    c2p_c3p=list(df["C2'-C3'"][~ np.isnan(df["C2'-C3'"])])
-    c3p_o3p=list(df["C3'-O3'"][~ np.isnan(df["C3'-O3'"])])
-    c4p_c3p=list(df["C4'-C3'"][~ np.isnan(df["C4'-C3'"])])
-    
-    #if res = A ou G
-    c1p_n9=list(df["C1'-N9"][~ np.isnan(df["C1'-N9"])])
-    n9_c8=list(df["N9-C8"][~ np.isnan(df["N9-C8"])])
-    c8_n7=list(df["C8-N7"][~ np.isnan(df["C8-N7"])])
-    n7_c5=list(df["N7-C5"][~ np.isnan(df["N7-C5"])])
-    c5_c6=list(df["C5-C6"][~ np.isnan(df["C5-C6"])])
-    c6_n1=list(df["C6-N1"][~ np.isnan(df["C6-N1"])])
-    n1_c2=list(df["N1-C2"][~ np.isnan(df["N1-C2"])])
-    c2_n3=list(df["C2-N3"][~ np.isnan(df["C2-N3"])])
-    n3_c4=list(df["N3-C4"][~ np.isnan(df["N3-C4"])])
-    c4_n9=list(df["C4-N9"][~ np.isnan(df["C4-N9"])])
-    c4_c5=list(df["C4-C5"][~ np.isnan(df["C4-C5"])])
-    #if res=G
-    c6_o6=list(df["C6-O6"][~ np.isnan(df["C6-O6"])])
-    c2_n2=list(df["C2-N2"][~ np.isnan(df["C2-N2"])])
-    #if res = A
-    c6_n6=list(df["C6-N6"][~ np.isnan(df["C6-N6"])])
-
-    #if res = C ou U
-    c1p_n1=list(df["C1'-N1"][~ np.isnan(df["C1'-N1"])])
-    n1_c6=list(df["N1-C6"][~ np.isnan(df["N1-C6"])])
-    c6_c5=list(df["C6-C5"][~ np.isnan(df["C6-C5"])])
-    c5_c4=list(df["C5-C4"][~ np.isnan(df["C5-C4"])])
-    c4_n3=list(df["C4-N3"][~ np.isnan(df["C4-N3"])])
-    n3_c2=list(df["N3-C2"][~ np.isnan(df["N3-C2"])])
-    c2_n1=list(df["C2-N1"][~ np.isnan(df["C2-N1"])])
-    c2_o2=list(df["C2-O2"][~ np.isnan(df["C2-O2"])])
-    #if res =C
-    c4_n4=list(df["C4-N4"][~ np.isnan(df["C4-N4"])])
-    #if res=U
-    c4_o4=list(df["C4-O4"][~ np.isnan(df["C4-O4"])])
-
-    os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/commun/", exist_ok=True)
-    os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/commun/")
-    # draw figures for atoms common to all nucleotides
-    GMM_histo(last_o3p_p, "O3'-P")
-    if len(op3_p) > 0 :
-        GMM_histo(op3_p, "OP3-P")
-    GMM_histo(p_op1, "P-OP1")
-    GMM_histo(p_op2, "P-OP2")
-    
-    GMM_histo(p_o5p, "P-O5'")
-    GMM_histo(o5p_c5p, "O5'-C5'")
-    GMM_histo(c5p_c4p, "C5'-C4'")
-    GMM_histo(c4p_o4p, "C4'-O4'")
-    GMM_histo(c4p_c3p, "C4'-C3'")
-    GMM_histo(c3p_o3p, "C3'-O3'")
-    GMM_histo(o4p_c1p, "O4'-C1'")
-    GMM_histo(c1p_c2p, "C1'-C2'")
-    GMM_histo(c2p_c3p, "C2'-C3'")
-    GMM_histo(c2p_o2p, "C2'-O2'")
-
-    if len(op3_p) > 0 :
-        GMM_histo(op3_p, "OP3-P", toric=False, hist=False, col= 'lightcoral')
-    GMM_histo(p_op1, "P-OP1", toric=False, hist=False, col='gold')
-    GMM_histo(p_op2, "P-OP2", toric=False, hist=False, col='lightseagreen')
-    GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, col='saddlebrown')
-    GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkturquoise')
-    GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='darkkhaki')
-    GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='indigo')
-    GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, col='maroon')
-    GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, col='burlywood')
-    GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, col='steelblue')
-    GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, col='tomato')
-    GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, col='darkolivegreen')
-    GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, col='orchid')
-    GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, col='deeppink')
-    axes=plt.gca()
-    axes.set_ylim(0, 100)
-    plt.xlabel("Distance (Angströms)")
-    plt.title("GMM of distances between common atoms ")
-    plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM_distances_common_atoms.png")
-    plt.close()
-
-    os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True)
-    os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/purines/")
-    # purines
-    GMM_histo(c1p_n9, "C1'-N9")
-    GMM_histo(n9_c8, "N9-C8")
-    GMM_histo(c8_n7, "C8-N7")
-    GMM_histo(n7_c5, "N7-C5")
-    GMM_histo(c5_c6, "C5-C6")
-    GMM_histo(c6_o6, "C6-O6")
-    GMM_histo(c6_n6, "C6-N6")
-    GMM_histo(c6_n1, "C6-N1")
-    GMM_histo(n1_c2, "N1-C2")
-    GMM_histo(c2_n2, "C2-N2")
-    GMM_histo(c2_n3, "C2-N3")
-    GMM_histo(n3_c4, "N3-C4")
-    GMM_histo(c4_n9, "C4-N9")
-    GMM_histo(c4_c5, "C4-C5")
-
-    GMM_histo(c1p_n9, "C1'-N9", hist=False, col='lightcoral')
-    GMM_histo(n9_c8, "N9-C8", hist=False, col='gold')
-    GMM_histo(c8_n7, "C8-N7", hist=False, col='lightseagreen')
-    GMM_histo(n7_c5, "N7-C5", hist=False, col='saddlebrown')
-    GMM_histo(c5_c6, "C5-C6", hist=False, col='darkturquoise')
-    GMM_histo(c6_o6, "C6-O6", hist=False, col='darkkhaki')
-    GMM_histo(c6_n6, "C6-N6", hist=False, col='indigo')
-    GMM_histo(c6_n1, "C6-N1", hist=False, col='maroon')
-    GMM_histo(n1_c2, "N1-C2", hist=False, col='burlywood')
-    GMM_histo(c2_n2, "C2-N2", hist=False, col='steelblue')
-    GMM_histo(c2_n3, "C2-N3", hist=False, col='tomato')
-    GMM_histo(n3_c4, "N3-C4", hist=False, col='darkolivegreen')
-    GMM_histo(c4_n9, "C4-N9", hist=False, col='orchid')
-    GMM_histo(c4_c5, "C4-C5", hist=False, col='deeppink')
-    axes=plt.gca()
-    axes.set_ylim(0, 100)
-    plt.xlabel("Distance (Angströms)")
-    plt.title("GMM of distances between atoms of the purine cycles", fontsize=10)
-    plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM_distances_purine_cycles.png")
-    plt.close()
-
-    os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True)
-    os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/")
-    # pyrimidines
-
-    GMM_histo(c1p_n1, "C1'-N1")
-    GMM_histo(n1_c6, "N1-C6")
-    GMM_histo(c6_c5, "C6-C5")
-    GMM_histo(c5_c4, "C5-C4")
-    GMM_histo(c4_n3, "C4-N3")
-    GMM_histo(n3_c2, "N3-C2")
-    GMM_histo(c2_o2, "C2-O2")
-    GMM_histo(c2_n1, "C2-N1")
-    GMM_histo(c4_n4, "C4-N4")
-    GMM_histo(c4_o4, "C4-O4")
-
-    GMM_histo(c1p_n1, "C1'-N1", hist=False, col='lightcoral')
-    GMM_histo(n1_c6, "N1-C6", hist=False, col='gold')
-    GMM_histo(c6_c5, "C6-C5", hist=False, col='lightseagreen')
-    GMM_histo(c5_c4, "C5-C4", hist=False, col='deeppink')
-    GMM_histo(c4_n3, "C4-N3", hist=False, col='red')
-    GMM_histo(n3_c2, "N3-C2", hist=False, col='lime')
-    GMM_histo(c2_o2, "C2-O2", hist=False, col='indigo')
-    GMM_histo(c2_n1, "C2-N1", hist=False, col='maroon')
-    GMM_histo(c4_n4, "C4-N4", hist=False, col='burlywood')
-    GMM_histo(c4_o4, "C4-O4", hist=False, col='steelblue')
-    axes=plt.gca()
-    #axes.set_xlim(1, 2)
-    axes.set_ylim(0, 100)
-    plt.xlabel("Distance (Angströms")
-    plt.title("GMM of distances between atoms of the pyrimidine cycles", fontsize=10)
-    plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM_distances_pyrimidine_cycles.png")
-    plt.close()
-
-    os.chdir(runDir)
-    setproctitle("GMM (all atoms, distances) finished")
-
-@trace_unhandled_exceptions
-def gmm_aa_torsions():
-    """
-    Separates the torsion angle measurements by angle type and plots the figures representing the data
-    """
-    setproctitle("GMM (all atoms, torsions)")
-
-    # we create lists to store the values of each angle
-    alpha=[]
-    beta=[]
-    gamma=[]
-    delta=[]
-    epsilon=[]
-    zeta=[]
-    chi = []
-    for angles_deg in conversion_angles(runDir + "/results/RNANet.db"): 
-        alpha.append(angles_deg[2])
-        beta.append(angles_deg[3])
-        gamma.append(angles_deg[4])
-        delta.append(angles_deg[5])
-        epsilon.append(angles_deg[6])
-        zeta.append(angles_deg[7])
-        chi.append(angles_deg[8])
-
-    # we remove the null values
-    alpha=[i for i in alpha if i != None]
-    beta=[i for i in beta if i != None]
-    gamma=[i for i in gamma if i != None]
-    delta=[i for i in delta if i != None]
-    epsilon=[i for i in epsilon if i != None]
-    zeta=[i for i in zeta if i != None]
-    chi=[i for i in chi if i != None]
-
-    os.makedirs(runDir + "/results/figures/GMM/all-atoms/torsions/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/all-atoms/torsions/")
-
-    """
-    We plot the GMMs with histogram for each angle
-    We create the corresponding json with the means and standard deviations of each Gaussian
-    We draw the figure grouping the GMMs of all angles without histogram to compare them with each other
-    """
-
-    GMM_histo(alpha, "Alpha", toric=True)
-    GMM_histo(beta, "Beta", toric=True)
-    GMM_histo(gamma, "Gamma", toric=True)
-    GMM_histo(delta, "Delta", toric=True)
-    GMM_histo(epsilon, "Epsilon", toric=True)
-    GMM_histo(zeta, "Zeta", toric=True)
-    GMM_histo(chi, "Xhi", toric=True)
-
-    GMM_histo(alpha, "Alpha", toric=True, hist=False, col='red')
-    GMM_histo(beta, "Beta", toric=True, hist=False, col='firebrick')
-    GMM_histo(gamma, "Gamma", toric=True, hist=False, col='limegreen')
-    GMM_histo(delta, "Delta", toric=True, hist=False, col='darkslateblue')
-    GMM_histo(epsilon, "Epsilon", toric=True, hist=False, col='goldenrod')
-    GMM_histo(zeta, "Zeta", toric=True, hist=False, col='teal')
-    GMM_histo(chi, "Xhi", toric=True, hist=False, col='hotpink')
-    plt.xlabel("Angle (Degrees)")
-    plt.title("GMM of torsion angles")
-    plt.savefig("GMM_torsions.png")
-    plt.close()
-
-    os.chdir(runDir)
-    setproctitle("GMM (all atoms, torsions) finished")
-
-@trace_unhandled_exceptions
-def gmm_wadley():
-
-    setproctitle("GMM (Pyle model)")
-
-    # Distances
-    df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/distances/distances_wadley.csv"))  
-
-    p_c1p = list(df["C1'-P"][~ np.isnan(df["C1'-P"])])
-    c1p_p = list(df["P-C1'"][~ np.isnan(df["P-C1'"])])
-    p_c4p = list(df["C4'-P"][~ np.isnan(df["C4'-P"])])
-    c4p_p = list(df["P-C4'"][~ np.isnan(df["P-C4'"])])
-
-    os.makedirs(runDir + "/results/figures/GMM/Pyle/distances/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/Pyle/distances/")
-
-    GMM_histo(p_c1p, "P-C1'")
-    GMM_histo(c1p_p, "C1'-P")
-    GMM_histo(p_c4p, "P-C4'")
-    GMM_histo(c4p_p, "C4'-P")
-
-    GMM_histo(p_c4p, "P-C4'", toric=False, hist=False, col='gold')
-    GMM_histo(c4p_p, "C4'-P", toric=False, hist=False, col='indigo')
-    GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, col='firebrick')
-    GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, col='seagreen')
-    plt.xlabel("Distance (Angströms)")
-    plt.title("GMM of distances (Pyle model)")
-    plt.savefig("GMM_distances_pyle_model.png")
-    plt.close()
-
-    # Flat Angles
-    df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle.csv"))  
-
-    p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])])
-    c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])])
-
-
-    os.makedirs(runDir + "/results/figures/GMM/Pyle/angles/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/Pyle/angles/")
-
-    GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True)
-    GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True)
-
-    GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, col='firebrick')
-    GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, col='seagreen')
-    plt.xlabel("Angle (Degrees)")
-    plt.title("GMM of flat angles (Pyle model)")
-    plt.savefig("GMM_flat_angles_pyle_model.png")
-    plt.close()
-
-    # Torsion angles    
-    eta=[]
-    theta=[]
-    eta_prime=[]
-    theta_prime=[]
-    eta_base=[]
-    theta_base=[]
-
-    for angles_deg in conversion_eta_theta(runDir + "/results/RNANet.db"): 
-        eta.append(angles_deg[2])
-        theta.append(angles_deg[3])
-        eta_prime.append(angles_deg[4])
-        theta_prime.append(angles_deg[5])
-        eta_base.append(angles_deg[6])
-        theta_base.append(angles_deg[7])
-
-    eta=[i for i in eta if i != None]
-    theta=[i for i in theta if i != None]
-    eta_prime=[i for i in eta_prime if i != None]
-    theta_prime=[i for i in theta_prime if i != None]
-    eta_base=[i for i in eta_base if i != None]
-    theta_base=[i for i in theta_base if i != None]
-
-
-    os.makedirs(runDir + "/results/figures/GMM/Pyle/pseudotorsions/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/Pyle/pseudotorsions/")
-
-    GMM_histo(eta, "Eta", toric=True)
-    GMM_histo(theta, "Theta", toric=True)
-    GMM_histo(eta_prime, "Eta'", toric=True)
-    GMM_histo(theta_prime, "Theta'", toric=True)
-    GMM_histo(eta_base, "Eta''", toric=True)
-    GMM_histo(theta_base, "Theta''", toric=True)
-
-    GMM_histo(eta, "Eta", toric=True, hist=False, col='mediumaquamarine')
-    GMM_histo(theta, "Theta", toric=True, hist=False, col='darkorchid')
-    GMM_histo(eta_prime, "Eta'", toric=True, hist=False, col='cyan')
-    GMM_histo(theta_prime, "Theta'", toric=True, hist=False, col='crimson')
-    GMM_histo(eta_base, "Eta''", toric=True, hist=False, col='royalblue')
-    GMM_histo(theta_base, "Theta''", toric=True, hist=False, col='palevioletred')
-    plt.xlabel("Angle (Degrees)")
-    plt.title("GMM of pseudo-torsion angles (Pyle Model)")
-    plt.savefig("GMM_pseudotorsion_angles_pyle_model.png")
-    plt.close()
-    
-    os.chdir(runDir)
-    setproctitle("GMM (Pyle model) finished")   
-
-def gmm_pyle_type(ntpair, data):
-
-    setproctitle(f"GMM (Pyle {ntpair} )")
-    
-    os.makedirs(runDir + "/results/figures/GMM/Pyle/distances/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/Pyle/distances/")
-
-    p_p=list(data["P-P"][~ np.isnan(data["P-P"])])
-    p_c4p=list(data["P-C4'"][~ np.isnan(data["P-C4'"])])
-    p_c1p=list(data["P-C1'"][~ np.isnan(data["P-C1'"])])
-    c4p_c4p=list(data["C4'-C4'"][~ np.isnan(data["C4'-C4'"])])
-    c1p_c1p=list(data["C1'-C1'"][~ np.isnan(data["C1'-C1'"])])
-    print(len(p_p))
-    # res2=list(data["resnum2"])
-    # res1=list(data["resnum1"])
-    # diff=[]
-    # for i in range(len(res1)):
-    #     diff.append(res2[i]-res1[i])
-    # print(diff[:100])
-    
-    GMM_histo(p_p, f"Distance P-P between {ntpair} tips for {str(len(p_p))} values", toric=False, hist=False, col="cyan")
-    GMM_histo(p_c4p, f"Distance P-C4' between {ntpair} tips", toric=False, hist=False, col="tomato")
-    GMM_histo(p_c1p, f"Distance P-C1' between {ntpair} tips", toric=False, hist=False, col="goldenrod")
-    GMM_histo(c4p_c4p, f"Distance C4'-C4' between {ntpair} tips", toric=False, hist=False, col="magenta")
-    GMM_histo(c1p_c1p, f"Distance C1'-C1' between {ntpair} tips", toric=False, hist=False, col="black")
-    # GMM_histo(diff, f"Gap between {ntpair} tips", toric=False, hist=False, col="tomato")
-    plt.xlabel("Distance (Angströms)")
-    
-    # plt.xlabel("Number of residues")
-    plt.ylabel("Distance (Angströms)")
-    plt.title(f"GMM of distances for {ntpair} ", fontsize=10)
-
-    # plt.savefig(f"Longueurs_Pyle_{ntpair}.png" )
-    plt.savefig(f"Distances_Pyle_{ntpair}.png" )
-    plt.close()
-    setproctitle(f"GMM (Pyle {ntpair} distances) finished")
-
-def gmm_pyle():
-
-    setproctitle("GMM (Pyle model)")
-
-    df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/distances/distances.csv"))
-
-    # dist = ["P-P", "P-C4'", "P-C1'", "C4'-C4'", "C1'-C1'"]
-    data=df
-    if len(data):
-        for b1 in ['A','C','G','U']:
-            for b2 in ['A','C','G','U']:
-                thisbases = data[(data.res1 == b1)&(data.res2 == b2)]
-                if len(thisbases):
-                    gmm_pyle_type(b1+b2, thisbases)
-
-@trace_unhandled_exceptions
-def gmm_hrna():
-    """
-    Draw the figures representing the data on the measurements between atoms of the HiRE-RNA model
-    """
-
-    setproctitle("GMM (HiRE-RNA)")
-
-    # Distances
-    df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA.csv"))  
-
-    last_c4p_p = list(df["C4'-P"][~ np.isnan(df["C4'-P"])])
-    p_o5p = list(df["P-O5'"][~ np.isnan(df["P-O5'"])])
-    o5p_c5p = list(df["O5'-C5'"][~ np.isnan(df["O5'-C5'"])])
-    c5p_c4p = list(df["C5'-C4'"][~ np.isnan(df["C5'-C4'"])])
-    c4p_c1p = list(df["C4'-C1'"][~ np.isnan(df["C4'-C1'"])])
-    c1p_b1 = list(df["C1'-B1"][~ np.isnan(df["C1'-B1"])])
-    b1_b2 = list(df["B1-B2"][~ np.isnan(df["B1-B2"])])
-
-    os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/distances/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/distances/")
-
-    GMM_histo(o5p_c5p, "O5'-C5'")
-    GMM_histo(b1_b2, "B1-B2")
-    GMM_histo(c1p_b1, "C1'-B1")
-    GMM_histo(c5p_c4p, "C5'-C4'")
-    GMM_histo(c4p_c1p, "C4'-C1'")
-    GMM_histo(p_o5p, "P-O5'")
-    GMM_histo(last_c4p_p, "C4'-P")
-    
-    GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='lightcoral')
-    GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, col='limegreen')
-    GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, col='tomato')
-    GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='aquamarine')
-    GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, col='goldenrod')
-    GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkcyan')
-    GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, col='deeppink')
-    axes = plt.gca()
-    axes.set_ylim(0, 100)
-    plt.xlabel("Distance (Angströms)")
-    plt.title("GMM of distances between HiRE-RNA beads")
-    plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM_distances_HiRE_RNA.png")
-    plt.close()
-
-    # Angles
-    df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/angles/angles_hire_RNA.csv"))  
-
-    lastc4p_p_o5p = list(df["C4'-P-O5'"][~ np.isnan(df["C4'-P-O5'"])])
-    lastc1p_lastc4p_p = list(df["C1'-C4'-P"][~ np.isnan(df["C1'-C4'-P"])])
-    lastc5p_lastc4p_p = list(df["C5'-C4'-P"][~ np.isnan(df["C5'-C4'-P"])])
-    p_o5p_c5p = list(df["P-O5'-C5'"][~ np.isnan(df["P-O5'-C5'"])])
-    o5p_c5p_c4p = list(df["O5'-C5'-C4'"][~ np.isnan(df["O5'-C5'-C4'"])])
-    c5p_c4p_c1p = list(df["C5'-C4'-C1'"][~ np.isnan(df["C5'-C4'-C1'"])])
-    c4p_c1p_b1 = list(df["C4'-C1'-B1"][~ np.isnan(df["C4'-C1'-B1"])])
-    c1p_b1_b2 = list(df["C1'-B1-B2"][~ np.isnan(df["C1'-B1-B2"])])
-
-    os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/angles/")
-
-    GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True)
-    GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True)
-    GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True)
-    GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True)
-    GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True)
-    GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True)
-    GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True)
-    GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True)
-    
-    GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, col='lightcoral')
-    GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, col='limegreen')
-    GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, col='tomato')
-    GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, col='aquamarine')
-    GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, col='goldenrod')
-    GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, col='darkcyan')
-    GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, col='deeppink')
-    GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, col='indigo')
-    axes = plt.gca()
-    axes.set_ylim(0, 100)
-    plt.xlabel("Angle (Degres)")
-    plt.title("GMM of angles between HiRE-RNA beads")
-    plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM_angles_HiRE_RNA.png")
-    plt.close()
-
-    # Torsions    
-    df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA.csv"))  
-
-    p_o5_c5_c4 = list(df["P-O5'-C5'-C4'"][~ np.isnan(df["P-O5'-C5'-C4'"])])
-    o5_c5_c4_c1 = list(df["O5'-C5'-C4'-C1'"][~ np.isnan(df["O5'-C5'-C4'-C1'"])])
-    c5_c4_c1_b1 = list(df["C5'-C4'-C1'-B1"][~ np.isnan(df["C5'-C4'-C1'-B1"])])
-    c4_c1_b1_b2 = list(df["C4'-C1'-B1-B2"][~ np.isnan(df["C4'-C1'-B1-B2"])])
-    o5_c5_c4_psuiv = list(df["O5'-C5'-C4'-P°"][~ np.isnan(df["O5'-C5'-C4'-P°"])])
-    c5_c4_psuiv_o5suiv = list(df["C5'-C4'-P°-O5'°"][~ np.isnan(df["C5'-C4'-P°-O5'°"])])
-    c4_psuiv_o5suiv_c5suiv = list(df["C4'-P°-O5'°-C5'°"][~ np.isnan(df["C4'-P°-O5'°-C5'°"])])
-    c1_c4_psuiv_o5suiv = list(df["C1'-C4'-P°-O5'°"][~ np.isnan(df["C1'-C4'-P°-O5'°"])])
-
-    os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/torsions/")
-
-    GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True)
-    GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True)
-    GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True)
-    GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True)
-    GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True)
-    GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True)
-    GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True)
-    GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True)
-
-    GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, col='darkred')
-    GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, col='chocolate')
-    GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, col='mediumvioletred')
-    GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, col='cadetblue')
-    GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, col='darkkhaki')
-    GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, col='springgreen')
-    GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, col='indigo')
-    GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, col='gold')
-    plt.xlabel("Angle (Degrees)")
-    plt.title("GMM of torsion angles between HiRE-RNA beads")
-    plt.savefig("GMM_torsions_HiRE_RNA.png")
-    plt.close()
-
-    os.chdir(runDir)
-    setproctitle("GMM (HiRE-RNA) finished")
-
-@trace_unhandled_exceptions
-def gmm_hrna_basepair_type(type_LW, ntpair, data):
-    """
-    function to plot the statistical figures you want
-    By type of pairing:
-    Superposition of GMMs of plane angles
-    Superposition of the histogram and the GMM of the distances
-    all in the same window
-    """
-
-    setproctitle(f"GMM (HiRE-RNA {type_LW} basepairs)")
-
-    figure = plt.figure(figsize = (10, 10))
-    plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5)
-   
-    plt.subplot(2, 1, 1)
-    GMM_histo(data["211_angle"], f"{type_LW}_{ntpair}_C1'-B1-B1pair", toric=True, hist=False, col='cyan' )
-    GMM_histo(data["112_angle"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair", toric=True, hist=False, col='magenta')
-    GMM_histo(data["3211_torsion"], f"{type_LW}_{ntpair}_C4'-C1'-B1-B1pair", toric=True, hist=False, col='black' )
-    GMM_histo(data["1123_torsion"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair-C4'pair", toric=True, hist=False, col='maroon')
-    GMM_histo(data["alpha1"], f"{type_LW}_{ntpair}_alpha_1", toric=True, hist=False, col="yellow")
-    GMM_histo(data["alpha2"], f"{type_LW}_{ntpair}_alpha_2", toric=True, hist=False, col='olive')
-    plt.xlabel("Angle (degree)")
-    plt.title(f"GMM of plane angles for {type_LW} {ntpair} basepairs", fontsize=10)
-
-    plt.subplot(2, 1, 2)
-    GMM_histo(data["Distance"], f"Distance between {type_LW} {ntpair} tips", toric=False, hist=False, col="cyan")
-    GMM_histo(data["dB1"], f"{type_LW} {ntpair} dB1", toric=False, hist=False, col="tomato")
-    GMM_histo(data["dB2"], f"{type_LW} {ntpair} dB2", toric=False, hist=False, col="goldenrod")
-    plt.xlabel("Distance (Angströms)")
-    plt.title(f"GMM of distances for {type_LW} {ntpair} basepairs", fontsize=10)
-    
-    plt.savefig(f"{type_LW}_{ntpair}_basepairs.png" )
-    plt.close()
-    setproctitle(f"GMM (HiRE-RNA {type_LW} {ntpair} basepairs) finished")
-
-@trace_unhandled_exceptions
-def gmm_hrna_basepairs():
-
-    setproctitle("GMM (HiRE-RNA basepairs)")
-
-    df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv"))
-
-    lw = ["cWW", "tWW", "cWH", "tWH", "cHW", "tHW", "cWS", "tWS", "cSW", "tSW", "cHH", "tHH", "cSH", "tSH", "cHS", "tHS", "cSS", "tSS"]
-
-    os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True)
-    os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/")
-
-    for lw_type in lw:
-        data = df[df['type_LW'] == lw_type ]
-        if len(data):
-            for b1 in ['A','C','G','U']:
-                for b2 in ['A','C','G','U']:
-                    thisbases = data[(data.nt1_res == b1)&(data.nt2_res == b2)]
-                    if len(thisbases):
-                        gmm_hrna_basepair_type(lw_type, b1+b2, thisbases)
-
-    # colors = ['lightcoral', "lightseagreen", "black", "goldenrod", "olive", "steelblue", "silver", "deeppink", "navy",
-    #           "sienna", "maroon", "orange", "mediumaquamarine", "tomato", "indigo", "orchid", "tan", "lime"]
-    # for lw_type, col in zip(lw, colors):
-    #     data = df[df['type LW'] == lw_type]
-    #     GMM_histo(data.Distance, lw_type, toric=False, hist=False, col=col)
-    # plt.xlabel('Distance (Angströms)')
-    # plt.title("GMM of distances between base tips ("+str(nt)+ " values)", fontsize=8)
-    # plt.savefig("distances_between_tips.png")
-    # plt.close()
-
-    os.chdir(runDir)
-    setproctitle(f"GMM (HiRE-RNA basepairs) finished")
-
-def merge_jsons():
-    # All atom distances
-    bonds = ["O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'", "C1'-N9",
-             "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5", 
-             "C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"]
-    bonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in bonds ]
-    concat_jsons(bonds, runDir + "/results/geometry/json/all_atom_distances.json")
-    
-
-    # All atom torsions
-    torsions = ["Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Xhi", "Zeta"]
-    torsions = [ runDir + "/results/geometry/json/" + x + ".json" for x in torsions ]
-    concat_jsons(torsions, runDir + "/results/geometry/json/all_atom_torsions.json")
- 
-    # HiRE-RNA distances
-    hrnabonds = ["P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2", "C4'-P"]
-    hrnabonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnabonds ]
-    concat_jsons(hrnabonds, runDir + "/results/geometry/json/hirerna_distances.json")
-
-    # HiRE-RNA angles
-    hrnaangles = ["P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2", "C4'-P-O5'", "C5'-C4'-P", "C1'-C4'-P"]
-    hrnaangles = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnaangles ]
-    concat_jsons(hrnaangles, runDir + "/results/geometry/json/hirerna_angles.json")
-
-    # HiRE-RNA torsions
-    hrnators = ["P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "C4'-P°-O5'°-C5'°", "C5'-C4'-P°-O5'°", "C1'-C4'-P°-O5'°", "O5'-C5'-C4'-P°"]
-    hrnators = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnators ]
-    concat_jsons(hrnators, runDir + "/results/geometry/json/hirerna_torsions.json")
-
-    # HiRE-RNA basepairs
-    for nt1 in ['A', 'C', 'G', 'U']:
-        for nt2 in ['A', 'C', 'G', 'U']:
-            bps = glob.glob(runDir + f"/results/geometry/json/*{nt1}{nt2}*.json")
-            concat_jsons(bps, runDir + f"/results/geometry/json/hirerna_{nt1}{nt2}_basepairs.json")
-
-    # Delete previous files
-    for f in bonds + torsions + hrnabonds + hrnaangles + hrnators:
-        try:
-            os.remove(f)
-        except FileNotFoundError:
-            pass
-    for f in glob.glob(runDir + "/results/geometry/json/t*.json"):
-        try:
-            os.remove(f)
-        except FileNotFoundError:
-            pass
-    for f in glob.glob(runDir + "/results/geometry/json/c*.json"):
-        try:
-            os.remove(f)
-        except FileNotFoundError:
-            pass
-    for f in glob.glob(runDir + "/results/geometry/json/Distance*.json"):
-        try:
-            os.remove(f)
-        except FileNotFoundError:
-            pass
-
-@trace_unhandled_exceptions
-def loop(f):
-    return pd.read_csv(f)
-
-@trace_unhandled_exceptions
-def concat_dataframes(fpath, outfilename):
-    """
-    Concatenates the dataframes containing measures 
-    and creates a new dataframe gathering all
-    """
-    global idxQueue
-    thr_idx = idxQueue.get()
-    setproctitle(f"Worker {thr_idx+1} : Concatenation of {fpath}")
-
-    liste = os.listdir(fpath)
-    pbar = tqdm(total=len(liste), position=thr_idx, desc="Preparing "+outfilename, leave=False)
-    df_tot = pd.read_csv(os.path.abspath(fpath + liste.pop()), engine="python")
-    #df=Parallel(n_jobs=-1, verbose=20)(delayed(loop)(os.path.abspath(fpath+liste[f])) for f in range (len(liste)))
-    #except :
-        # print(liste[f])
-         
-    pbar.update(1)
-    for f in range(len(liste)):
-    #     try :
-        df = pd.read_csv(os.path.abspath(fpath + liste.pop()), engine='python')
-    #     except :
-    #         print(liste[f])
-    #         continue
-        df_tot = pd.concat([df_tot, df], ignore_index=True)
         pbar.update(1)
-    #df = pd.concat(df, ignore_index=True)
+    return update
-    #pbar.update(1)
-    #df.to_csv(fpath + outfilename)
-    df_tot.to_csv(fpath + outfilename)
-    idxQueue.put(thr_idx) # replace the thread index in the queue
-    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
-
-def concat_jsons(flist, outfilename):
-    """
-    Reads JSON files computed by the geometry jobs and merge them into a smaller
-    number of files
-    """
-   
-    result = []
-    for f in flist:
-        # if not path.isfile(f):
-        #     continue:
-        with open(f, "rb") as infile:
-            result.append(json.load(infile))
-
-    # write the files
-    with open(outfilename, 'w', encoding='utf-8') as f:
-        json.dump(result, f, indent=4)
 def process_jobs(joblist):
     """
@@ -2952,13 +1229,16 @@ if __name__ == "__main__":
     DO_WADLEY_ANALYSIS = False
     DO_AVG_DISTANCE_MATRIX = False
     DO_HIRE_RNA_MEASURES = False
+    RESCAN_GMM_COMP_NUM = False
     try:
-        opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", "3d-folder=", "seq-folder=", "hire-rna" ])
+        opts, _ = getopt.getopt( sys.argv[1:], "r:h", 
+                    [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", 
+                      "3d-folder=", "seq-folder=", "hire-rna", "rescan-nmodes" ])
     except getopt.GetoptError as err:
         print(err)
         sys.exit(2)
-    for opt, arg in opts:
+    for opt, arg in opts:
         if opt == "-h" or opt == "--help":
             print(  "RNANet statistics, a script to build a multiscale RNA dataset from public data\n"
                     "Developed by Louis Becquey, Khodor Hannoush, and Aglaé Tabot 2019/2021")
@@ -2975,28 +1255,28 @@ if __name__ == "__main__":
             print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.")
             print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.")
             print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data.")
-
+            print("--rescan-nmodes\t\tDo not assume the number of modes in distances and angles distributions, measure it.")
             sys.exit()
-        elif opt == '--version':
+        elif opt == "--version":
             print("RNANet statistics 1.6 beta")
             sys.exit()
         elif opt == "-r" or opt == "--resolution":
             assert float(arg) > 0.0 and float(arg) <= 20.0 
             res_thr = float(arg)
-        elif opt=='--3d-folder':
+        elif opt == "--3d-folder":
             path_to_3D_data = path.abspath(arg)
             if path_to_3D_data[-1] != '/':
                 path_to_3D_data += '/'
-        elif opt=='--seq-folder':
+        elif opt == "--seq-folder":
             path_to_seq_data = path.abspath(arg)
             if path_to_seq_data[-1] != '/':
                 path_to_seq_data += '/'
-        elif opt=='--from-scratch':
+        elif opt == "--from-scratch":
             DELETE_OLD_DATA = True
             DO_WADLEY_ANALYSIS = True
-        elif opt=="--distance-matrices":
+        elif opt == "--distance-matrices":
             DO_AVG_DISTANCE_MATRIX = True
-        elif opt=='--wadley':
+        elif opt == "--wadley":
             DO_WADLEY_ANALYSIS = True
             os.makedirs(runDir+"/results/geometry/Pyle/distances/", exist_ok=True)
             os.makedirs(runDir+"/results/geometry/Pyle/classes_dist/", exist_ok=True)
@@ -3005,7 +1285,7 @@ if __name__ == "__main__":
             os.makedirs(runDir+"/results/figures/GMM/Pyle/distances/", exist_ok=True)
             os.makedirs(runDir+"/results/figures/GMM/Pyle/angles/", exist_ok=True)
             os.makedirs(runDir+"/results/figures/GMM/Pyle/pseudotorsions/", exist_ok=True)
-        elif opt=='--hire-rna':
+        elif opt == "--hire-rna":
             DO_HIRE_RNA_MEASURES = True
             os.makedirs(runDir + "/results/geometry/HiRE-RNA/distances/", exist_ok=True)
             os.makedirs(runDir + "/results/geometry/HiRE-RNA/angles/", exist_ok=True)
@@ -3015,7 +1295,8 @@ if __name__ == "__main__":
             os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True)
             os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True)
             os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True)
-    
+        elif opt == "rescan-nmodes":
+            RESCAN_GMM_COMP_NUM = True
     # Load mappings. famlist will contain only families with structures at this resolution threshold.
@@ -3053,7 +1334,8 @@ if __name__ == "__main__":
         print("Old data deleted.")
     # Prepare the multiprocessing execution environment
-    nworkers = min(read_cpu_number()-1, 50)
+    global nworkers
+    nworkers = read_cpu_number()-1
     print("Using", nworkers, "threads...")
     thr_idx_mgr = Manager()
     idxQueue = thr_idx_mgr.Queue()
@@ -3063,26 +1345,25 @@ if __name__ == "__main__":
     # Define the tasks
     joblist = []
-    # Do eta/theta plots
+    # # Do eta/theta plots
-    #if n_unmapped_chains and DO_WADLEY_ANALYSIS:    
+    # if n_unmapped_chains and DO_WADLEY_ANALYSIS:    
     #    joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr)))
     #    joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr)))
-    # Do distance matrices for each family excl. LSU/SSU (will be processed later)
+    # # Do distance matrices for each family excl. LSU/SSU (will be processed later)
-    if DO_AVG_DISTANCE_MATRIX:  
+    # if DO_AVG_DISTANCE_MATRIX:  
-        extracted_chains = []
+    #     extracted_chains = []
-        for file in os.listdir(path_to_3D_data + "rna_mapped_to_Rfam"):
+    #     for file in os.listdir(path_to_3D_data + "rna_mapped_to_Rfam"):
-            if os.path.isfile(os.path.join(path_to_3D_data + "rna_mapped_to_Rfam", file)):
+    #         if os.path.isfile(os.path.join(path_to_3D_data + "rna_mapped_to_Rfam", file)):
-                e1 = file.split('_')[0]
+    #             e1 = file.split('_')[0]
-                e2 = file.split('_')[1]
+    #             e2 = file.split('_')[1]
-                e3 = file.split('_')[2]
+    #             e3 = file.split('_')[2]
-                extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3)
+    #             extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3)
-        for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]:    # Process the rRNAs later only 3 by 3
+    #     for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]:    # Process the rRNAs later only 3 by 3
-            joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False)))
+    #         joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False)))
-            joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False)))
+    #         joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False)))
-
+
-    # Do general family statistics
+    # # Do general family statistics
-    
     # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths
     # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families)
     # for f in famlist:
@@ -3091,25 +1372,18 @@ if __name__ == "__main__":
     #         joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families)
-    # Do geometric measures on all chains
+    # Do geometric measures
-    #print(liste_repres('/home/data/RNA/3D/latest_nr_list_4.0A.csv'))
-    
-    # print(measure_from_structure(os.listdir(path_to_3D_data + "rna_only")[0]))
     if n_unmapped_chains:
-    #     os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True)
+        os.makedirs(runDir + "/results/geometry/all-atoms/distances/", exist_ok=True)
-    #    liste_struct = os.listdir(path_to_3D_data + "rna_only")
+        # structure_list = os.listdir(path_to_3D_data + "rna_only")
-        liste_struct=liste_repres('/home/data/RNA/3D/latest_nr_list_4.0A.csv')
+        structure_list = representatives_from_nrlist(res_thr)
-    #     if '4zdo_1_E.cif' in liste_struct:
+        for f in structure_list:
-    #         liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now
-    #     if '4zdp_1_E.cif' in liste_struct:
-    #         liste_struct.remove('4zdp_1_E.cif')
-        for f in liste_struct:
             if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]):
                 joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers))   # All-atom distances
-    process_jobs(joblist)
+    # process_jobs(joblist)
-    #count_occur_pyle_dist(runDir + '/results/geometry/Pyle/classes_dist/')
+
     # Now process the memory-heavy tasks family by family
     if DO_AVG_DISTANCE_MATRIX:
         for f in LSU_set:
@@ -3124,36 +1398,31 @@ if __name__ == "__main__":
     # finish the work after the parallel portions
-    # per_chain_stats()   # per chain base frequencies en basepair types
+    # per_chain_stats()   # per chain base frequencies and basepair types
     # seq_idty()          # identity matrices from pre-computed .npy matrices
     # stats_pairs()
-    concat_dataframes(runDir + '/results/geometry/Pyle/distances/', 'distances.csv')
     if n_unmapped_chains:
         # general_stats()
         os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True)
         os.makedirs(runDir+"/results/geometry/json/", exist_ok=True)
+        concat_dataframes(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')
+        if DO_HIRE_RNA_MEASURES:
+            concat_dataframes(runDir + '/results/geometry/HiRE-RNA/distances/', 'distances_HiRERNA.csv')
+            concat_dataframes(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_HiRERNA.csv')
+            concat_dataframes(runDir + '/results/geometry/HiRE-RNA/torsions/', 'torsions_HiRERNA.csv')
+            concat_dataframes(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs_HiRERNA.csv')
+        if DO_WADLEY_ANALYSIS:
+            concat_dataframes(runDir + '/results/geometry/Pyle/distances/', 'distances_pyle.csv')
+            concat_dataframes(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv')
         joblist = []
-        # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances.csv')))
+        joblist.append(Job(function=gmm_aa_dists, args=(RESCAN_GMM_COMP_NUM)))
-        # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')))
+        joblist.append(Job(function=gmm_aa_torsions, args=(RESCAN_GMM_COMP_NUM)))
-        # if DO_HIRE_RNA_MEASURES:
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv')))
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv')))
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv')))
-        # if DO_WADLEY_ANALYSIS:
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv')))
-        #     joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv')))
-        # process_jobs(joblist)
-        joblist = []
-        # joblist.append(Job(function=gmm_aa_dists, args=()))
-        # joblist.append(Job(function=gmm_aa_torsions, args=()))
         if DO_HIRE_RNA_MEASURES:
-        #     joblist.append(Job(function=gmm_hrna, args=()))
+            joblist.append(Job(function=gmm_hrna, args=(RESCAN_GMM_COMP_NUM)))
-            joblist.append(Job(function=gmm_hrna_basepairs, args=()))
+            joblist.append(Job(function=gmm_hrna_basepairs, args=(RESCAN_GMM_COMP_NUM)))
-        # if DO_WADLEY_ANALYSIS:
+        if DO_WADLEY_ANALYSIS:
-        #    joblist.append(Job(function=gmm_wadley, args=()))
+            joblist.append(Job(function=gmm_pyle, args=(RESCAN_GMM_COMP_NUM)))
-            # joblist.append(Job(function=gmm_pyle, args=()))
         if len(joblist):
             process_jobs(joblist)
-        #merge_jsons()
+        merge_jsons()