Louis BECQUEY

Splitted Aglaé's code in a separate stats file

...@@ -31,7 +31,7 @@ import time ...@@ -31,7 +31,7 @@ import time
31 import traceback 31 import traceback
32 import warnings 32 import warnings
33 from functools import partial, wraps 33 from functools import partial, wraps
34 -from multiprocessing import Pool, Manager 34 +from multiprocessing import Pool, Manager, Value
35 from time import sleep 35 from time import sleep
36 from tqdm import tqdm 36 from tqdm import tqdm
37 from setproctitle import setproctitle 37 from setproctitle import setproctitle
...@@ -45,6 +45,12 @@ from Bio.PDB.PDBIO import Select ...@@ -45,6 +45,12 @@ from Bio.PDB.PDBIO import Select
45 runDir = os.getcwd() 45 runDir = os.getcwd()
46 46
47 def trace_unhandled_exceptions(func): 47 def trace_unhandled_exceptions(func):
48 + """
49 + Captures exceptions even in parallel sections of the code and child processes,
50 + and throws logs in red to stderr and to errors.txt.
51 +
52 + Should be defined before the classes that use it.
53 + """
48 @wraps(func) 54 @wraps(func)
49 def wrapped_func(*args, **kwargs): 55 def wrapped_func(*args, **kwargs):
50 try: 56 try:
...@@ -60,27 +66,27 @@ def trace_unhandled_exceptions(func): ...@@ -60,27 +66,27 @@ def trace_unhandled_exceptions(func):
60 print(s) 66 print(s)
61 return wrapped_func 67 return wrapped_func
62 68
63 -
64 pd.set_option('display.max_rows', None) 69 pd.set_option('display.max_rows', None)
65 sqlite3.enable_callback_tracebacks(True) 70 sqlite3.enable_callback_tracebacks(True)
66 sqlite3.register_adapter(np.int64, lambda val: int(val)) # Tell Sqlite what to do with <class numpy.int64> objects ---> convert to int 71 sqlite3.register_adapter(np.int64, lambda val: int(val)) # Tell Sqlite what to do with <class numpy.int64> objects ---> convert to int
67 sqlite3.register_adapter(np.float64, lambda val: float(val)) # Tell Sqlite what to do with <class numpy.float64> objects ---> convert to float 72 sqlite3.register_adapter(np.float64, lambda val: float(val)) # Tell Sqlite what to do with <class numpy.float64> objects ---> convert to float
68 73
69 -m = Manager() 74 +# m = Manager()
70 -running_stats = m.list() 75 +# running_stats = m.list()
71 -running_stats.append(0) # n_launched 76 +# running_stats.append(0) # n_launched
72 -running_stats.append(0) # n_finished 77 +# running_stats.append(0) # n_finished
73 -running_stats.append(0) # n_skipped 78 +# running_stats.append(0) # n_skipped
79 +n_launched = Value('i', 0)
80 +n_finished = Value('i', 0)
81 +n_skipped = Value('i', 0)
74 path_to_3D_data = "tobedefinedbyoptions" 82 path_to_3D_data = "tobedefinedbyoptions"
75 path_to_seq_data = "tobedefinedbyoptions" 83 path_to_seq_data = "tobedefinedbyoptions"
76 python_executable = "python"+".".join(platform.python_version().split('.')[:2]) # Cuts python3.8.1 into python3.8 for example. 84 python_executable = "python"+".".join(platform.python_version().split('.')[:2]) # Cuts python3.8.1 into python3.8 for example.
77 validsymb = '\U00002705' 85 validsymb = '\U00002705'
78 warnsymb = '\U000026A0' 86 warnsymb = '\U000026A0'
79 errsymb = '\U0000274C' 87 errsymb = '\U0000274C'
80 -LSU_set = {"RF00002", "RF02540", "RF02541", 88 +LSU_set = {"RF00002", "RF02540", "RF02541", "RF02543", "RF02546"} # From Rfam CLAN 00112
81 - "RF02543", "RF02546"} # From Rfam CLAN 00112 89 +SSU_set = {"RF00177", "RF02542", "RF02545", "RF01959", "RF01960"} # From Rfam CLAN 00111
82 -SSU_set = {"RF00177", "RF02542", "RF02545",
83 - "RF01959", "RF01960"} # From Rfam CLAN 00111
84 90
85 no_nts_set = set() 91 no_nts_set = set()
86 weird_mappings = set() 92 weird_mappings = set()
...@@ -103,17 +109,15 @@ class MutableFastaIterator(FastaIterator): ...@@ -103,17 +109,15 @@ class MutableFastaIterator(FastaIterator):
103 first_word = title.split(None, 1)[0] 109 first_word = title.split(None, 1)[0]
104 except IndexError: 110 except IndexError:
105 assert not title, repr(title) 111 assert not title, repr(title)
106 - # Should we use SeqRecord default for no ID?
107 first_word = "" 112 first_word = ""
108 - yield SeqRecord( 113 + yield SeqRecord(MutableSeq(sequence), id=first_word, name=first_word, description=title)
109 - MutableSeq(sequence), id=first_word, name=first_word, description=title,
110 - )
111 114
112 115
113 class SelectivePortionSelector(object): 116 class SelectivePortionSelector(object):
114 """Class passed to MMCIFIO to select some chain portions in an MMCIF file. 117 """Class passed to MMCIFIO to select some chain portions in an MMCIF file.
115 118
116 Validates every chain, residue, nucleotide, to say if it is in the selection or not. 119 Validates every chain, residue, nucleotide, to say if it is in the selection or not.
120 + The primary use is to select the portion of a chain which is mapped to a family.
117 """ 121 """
118 122
119 def __init__(self, model_id, chain_id, valid_resnums, khetatm): 123 def __init__(self, model_id, chain_id, valid_resnums, khetatm):
...@@ -156,123 +160,6 @@ class SelectivePortionSelector(object): ...@@ -156,123 +160,6 @@ class SelectivePortionSelector(object):
156 return 1 160 return 1
157 161
158 162
159 -_select=Select()
160 -
161 -def save_mmcif(ioobj, out_file, select=_select, preserve_atom_numbering=False):
162 - # reuse and modification of the source code of Biopython
163 - # to have the 2 columns of numbering of residues numbered with the index_chain of DSSR
164 - if isinstance(out_file, str):
165 - fp = open(out_file, "w")
166 - close_file = True
167 - else:
168 - fp = out_file
169 - close_file = False
170 - atom_dict = defaultdict(list)
171 -
172 - for model in ioobj.structure.get_list():
173 - if not select.accept_model(model):
174 - continue
175 - # mmCIF files with a single model have it specified as model 1
176 - if model.serial_num == 0:
177 - model_n = "1"
178 - else:
179 - model_n = str(model.serial_num)
180 - # This is used to write label_entity_id and label_asym_id and
181 - # increments from 1, changing with each molecule
182 - entity_id = 0
183 - if not preserve_atom_numbering:
184 - atom_number = 1
185 - for chain in model.get_list():
186 - if not select.accept_chain(chain):
187 - continue
188 - chain_id = chain.get_id()
189 - if chain_id == " ":
190 - chain_id = "."
191 - # This is used to write label_seq_id,
192 - # remaining blank for hetero residues
193 -
194 - prev_residue_type = ""
195 - prev_resname = ""
196 - for residue in chain.get_unpacked_list():
197 - if not select.accept_residue(residue):
198 - continue
199 - hetfield, resseq, icode = residue.get_id()
200 - if hetfield == " ":
201 - residue_type = "ATOM"
202 - label_seq_id = str(resseq)
203 -
204 - else:
205 - residue_type = "HETATM"
206 - label_seq_id = "."
207 - resseq = str(resseq)
208 - if icode == " ":
209 - icode = "?"
210 - resname = residue.get_resname()
211 - # Check if the molecule changes within the chain
212 - # This will always increment for the first residue in a
213 - # chain due to the starting values above
214 - if residue_type != prev_residue_type or (
215 - residue_type == "HETATM" and resname != prev_resname
216 - ):
217 - entity_id += 1
218 - prev_residue_type = residue_type
219 - prev_resname = resname
220 - label_asym_id = ioobj._get_label_asym_id(entity_id)
221 - for atom in residue.get_unpacked_list():
222 - if select.accept_atom(atom):
223 - atom_dict["_atom_site.group_PDB"].append(residue_type)
224 - if preserve_atom_numbering:
225 - atom_number = atom.get_serial_number()
226 - atom_dict["_atom_site.id"].append(str(atom_number))
227 - if not preserve_atom_numbering:
228 - atom_number += 1
229 - element = atom.element.strip()
230 - if element == "":
231 - element = "?"
232 - atom_dict["_atom_site.type_symbol"].append(element)
233 - atom_dict["_atom_site.label_atom_id"].append(
234 - atom.get_name().strip()
235 - )
236 - altloc = atom.get_altloc()
237 - if altloc == " ":
238 - altloc = "."
239 - atom_dict["_atom_site.label_alt_id"].append(altloc)
240 - atom_dict["_atom_site.label_comp_id"].append(
241 - resname.strip()
242 - )
243 - atom_dict["_atom_site.label_asym_id"].append(label_asym_id)
244 - # The entity ID should be the same for similar chains
245 - # However this is non-trivial to calculate so we write "?"
246 - atom_dict["_atom_site.label_entity_id"].append("?")
247 - atom_dict["_atom_site.label_seq_id"].append(label_seq_id)
248 - atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode)
249 - coord = atom.get_coord()
250 - atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0])
251 - atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1])
252 - atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2])
253 - atom_dict["_atom_site.occupancy"].append(
254 - str(atom.get_occupancy())
255 - )
256 - atom_dict["_atom_site.B_iso_or_equiv"].append(
257 - str(atom.get_bfactor())
258 - )
259 - atom_dict["_atom_site.auth_seq_id"].append(resseq)
260 - atom_dict["_atom_site.auth_asym_id"].append(chain_id)
261 - atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n)
262 -
263 - # Data block name is the structure ID with special characters removed
264 - structure_id = ioobj.structure.id
265 - for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]:
266 - structure_id = structure_id.replace(c, "")
267 - atom_dict["data_"] = structure_id
268 -
269 - # Set the dictionary and write out using the generic dictionary method
270 - ioobj.dic = atom_dict
271 - ioobj._save_dict(fp)
272 - if close_file:
273 - fp.close()
274 -
275 -
276 class Chain: 163 class Chain:
277 """ 164 """
278 The object which stores all our data and the methods to process it. 165 The object which stores all our data and the methods to process it.
...@@ -424,13 +311,11 @@ class Chain: ...@@ -424,13 +311,11 @@ class Chain:
424 for atom in list(res.get_atoms()): 311 for atom in list(res.get_atoms()):
425 # rename the remaining phosphate group to P, OP1, OP2, OP3 312 # rename the remaining phosphate group to P, OP1, OP2, OP3
426 if atom.get_name() in ['PA', 'O1A', 'O2A', 'O3A'] and res_name != 'RIA': 313 if atom.get_name() in ['PA', 'O1A', 'O2A', 'O3A'] and res_name != 'RIA':
427 - 314 + # RIA is a residue made up of 2 riboses and 2 phosphates,
428 - # RIA is a residue made up of 2 riboses and 2 phosphates, 315 + # so it has an O2A atom between the C2A and C1 'atoms,
429 - # so it has an O2A atom between the C2A and C1 'atoms, 316 + # and it also has an OP2 atom attached to one of its phosphates
430 - # and it also has an OP2 atom attached to one of its phosphates 317 + # (see chains 6fyx_1_1, 6zu9_1_1, 6fyy_1_1, 6gsm_1_1 , 3jaq_1_1 and 1yfg_1_A)
431 - # (see chains 6fyx_1_1, 6zu9_1_1, 6fyy_1_1, 6gsm_1_1 , 3jaq_1_1 and 1yfg_1_A) 318 + # we do not modify the atom names of RIA residue
432 - # we do not modify the atom names of RIA residue
433 -
434 if atom.get_name() == 'PA': 319 if atom.get_name() == 'PA':
435 atom_name = 'P' 320 atom_name = 'P'
436 if atom.get_name() == 'O1A': 321 if atom.get_name() == 'O1A':
...@@ -440,7 +325,7 @@ class Chain: ...@@ -440,7 +325,7 @@ class Chain:
440 if atom.get_name() == 'O3A': 325 if atom.get_name() == 'O3A':
441 atom_name = 'OP3' 326 atom_name = 'OP3'
442 new_atom_t = pdb.Atom.Atom(atom_name, atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom_name, atom.get_serial_number()) 327 new_atom_t = pdb.Atom.Atom(atom_name, atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom_name, atom.get_serial_number())
443 - else : 328 + else:
444 new_atom_t=atom.copy() 329 new_atom_t=atom.copy()
445 new_residu_t.add(new_atom_t) 330 new_residu_t.add(new_atom_t)
446 new_chain_t.add(new_residu_t) 331 new_chain_t.add(new_residu_t)
...@@ -787,7 +672,8 @@ class Chain: ...@@ -787,7 +672,8 @@ class Chain:
787 return df 672 return df
788 673
789 def register_chain(self, df): 674 def register_chain(self, df):
790 - """Saves the extracted 3D data to the database. 675 + """
676 + Saves the extracted 3D data to the database.
791 """ 677 """
792 678
793 setproctitle(f"RNANet.py {self.chain_label} register_chain()") 679 setproctitle(f"RNANet.py {self.chain_label} register_chain()")
...@@ -920,6 +806,10 @@ class Monitor: ...@@ -920,6 +806,10 @@ class Monitor:
920 806
921 807
922 class Downloader: 808 class Downloader:
809 + """
810 + An object with methods to download public data from the internet.
811 + """
812 +
923 def download_Rfam_PDB_mappings(self): 813 def download_Rfam_PDB_mappings(self):
924 """Query the Rfam public MySQL database for mappings between their RNA families and PDB structures. 814 """Query the Rfam public MySQL database for mappings between their RNA families and PDB structures.
925 815
...@@ -1170,6 +1060,10 @@ class Mapping: ...@@ -1170,6 +1060,10 @@ class Mapping:
1170 1060
1171 1061
1172 class Pipeline: 1062 class Pipeline:
1063 + """
1064 + The RNANet pipeline steps.
1065 + """
1066 +
1173 def __init__(self): 1067 def __init__(self):
1174 self.dl = Downloader() 1068 self.dl = Downloader()
1175 self.known_issues = [] # list of chain_labels to ignore 1069 self.known_issues = [] # list of chain_labels to ignore
...@@ -1189,6 +1083,7 @@ class Pipeline: ...@@ -1189,6 +1083,7 @@ class Pipeline:
1189 self.REUSE_ALL = False 1083 self.REUSE_ALL = False
1190 self.REDUNDANT = False 1084 self.REDUNDANT = False
1191 self.ALIGNOPTS = None 1085 self.ALIGNOPTS = None
1086 + self.RRNAALIGNOPTS = "--mxsize 8192 --cpu 10 --maxtau 0.1"
1192 self.STATSOPTS = None 1087 self.STATSOPTS = None
1193 self.USESINA = False 1088 self.USESINA = False
1194 self.SELECT_ONLY = None 1089 self.SELECT_ONLY = None
...@@ -1207,7 +1102,7 @@ class Pipeline: ...@@ -1207,7 +1102,7 @@ class Pipeline:
1207 1102
1208 try: 1103 try:
1209 opts, _ = getopt.getopt(sys.argv[1:], "r:fhs", ["help", "resolution=", "3d-folder=", "seq-folder=", "keep-hetatm=", 1104 opts, _ = getopt.getopt(sys.argv[1:], "r:fhs", ["help", "resolution=", "3d-folder=", "seq-folder=", "keep-hetatm=",
1210 - "only=", "cmalign-opts=", "stats-opts=", "maxcores=", "sina", "from-scratch", 1105 + "only=", "cmalign-opts=", "cmalign-rrna-opts=", "stats-opts=", "maxcores=", "sina", "from-scratch",
1211 "full-inference", "no-homology", "redundant", "ignore-issues", "extract", 1106 "full-inference", "no-homology", "redundant", "ignore-issues", "extract",
1212 "all", "no-logs", "archive", "update-homologous", "version"]) 1107 "all", "no-logs", "archive", "update-homologous", "version"])
1213 except getopt.GetoptError as err: 1108 except getopt.GetoptError as err:
...@@ -1323,6 +1218,8 @@ class Pipeline: ...@@ -1323,6 +1218,8 @@ class Pipeline:
1323 self.REUSE_ALL = True 1218 self.REUSE_ALL = True
1324 elif opt == "cmalign-opts": 1219 elif opt == "cmalign-opts":
1325 self.ALIGNOPTS = arg 1220 self.ALIGNOPTS = arg
1221 + elif opt == "cmalign-rrna-opts":
1222 + self.RRNAALIGNOPTS = arg
1326 elif opt == "stats-opts": 1223 elif opt == "stats-opts":
1327 self.STATSOPTS = " ".split(arg) 1224 self.STATSOPTS = " ".split(arg)
1328 elif opt == "--all": 1225 elif opt == "--all":
...@@ -1382,7 +1279,7 @@ class Pipeline: ...@@ -1382,7 +1279,7 @@ class Pipeline:
1382 # If self.FULLINFERENCE is False, the extended list is already filtered to remove 1279 # If self.FULLINFERENCE is False, the extended list is already filtered to remove
1383 # the chains which already are in the database. 1280 # the chains which already are in the database.
1384 print("> Building list of structures...", flush=True) 1281 print("> Building list of structures...", flush=True)
1385 - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores) 1282 + p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=ncores)
1386 try: 1283 try:
1387 1284
1388 pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, 1285 pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1,
...@@ -1491,7 +1388,7 @@ class Pipeline: ...@@ -1491,7 +1388,7 @@ class Pipeline:
1491 else: 1388 else:
1492 mmcif_list = sorted(set([c.pdb_id for c in self.update])) 1389 mmcif_list = sorted(set([c.pdb_id for c in self.update]))
1493 try: 1390 try:
1494 - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=int(coeff_ncores*ncores)) 1391 + p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=int(coeff_ncores*ncores))
1495 pbar = tqdm(mmcif_list, maxinterval=1.0, miniters=1, desc="mmCIF files") 1392 pbar = tqdm(mmcif_list, maxinterval=1.0, miniters=1, desc="mmCIF files")
1496 for _ in p.imap_unordered(work_mmcif, mmcif_list, chunksize=1): 1393 for _ in p.imap_unordered(work_mmcif, mmcif_list, chunksize=1):
1497 pbar.update(1) # Everytime the iteration finishes, update the global progress bar 1394 pbar.update(1) # Everytime the iteration finishes, update the global progress bar
...@@ -1634,7 +1531,11 @@ class Pipeline: ...@@ -1634,7 +1531,11 @@ class Pipeline:
1634 joblist = [] 1531 joblist = []
1635 for f in self.fam_list: 1532 for f in self.fam_list:
1636 # the function already uses all CPUs so launch them one by one (how_many_in_parallel=1) 1533 # the function already uses all CPUs so launch them one by one (how_many_in_parallel=1)
1637 - joblist.append(Job(function=work_realign, args=[self.USESINA, self.ALIGNOPTS, f], how_many_in_parallel=1, label=f)) 1534 + if f in LSU_set or f in SSU_set:
1535 + opts = self.RRNAALIGNOPTS
1536 + else:
1537 + opts = self.ALIGNOPTS
1538 + joblist.append(Job(function=work_realign, args=[self.USESINA, opts, f], how_many_in_parallel=1, label=f))
1638 1539
1639 # Execute the jobs 1540 # Execute the jobs
1640 try: 1541 try:
...@@ -1684,7 +1585,7 @@ class Pipeline: ...@@ -1684,7 +1585,7 @@ class Pipeline:
1684 1585
1685 # Start a process pool to dispatch the RNA families, 1586 # Start a process pool to dispatch the RNA families,
1686 # over multiple CPUs (one family by CPU) 1587 # over multiple CPUs (one family by CPU)
1687 - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) 1588 + p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=nworkers)
1688 1589
1689 try: 1590 try:
1690 fam_pbar = tqdm(total=len(self.fam_list), desc="RNA families", position=0, leave=True) 1591 fam_pbar = tqdm(total=len(self.fam_list), desc="RNA families", position=0, leave=True)
...@@ -1741,7 +1642,7 @@ class Pipeline: ...@@ -1741,7 +1642,7 @@ class Pipeline:
1741 os.makedirs(path_to_3D_data + "datapoints/") 1642 os.makedirs(path_to_3D_data + "datapoints/")
1742 1643
1743 # Save to by-chain CSV files 1644 # Save to by-chain CSV files
1744 - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3) 1645 + p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=3)
1745 try: 1646 try:
1746 pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True) 1647 pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True)
1747 for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains)): 1648 for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains)):
...@@ -1867,6 +1768,7 @@ class Pipeline: ...@@ -1867,6 +1768,7 @@ class Pipeline:
1867 1768
1868 conn.close() 1769 conn.close()
1869 1770
1771 +# ==================== General helper functions =====================
1870 1772
1871 def read_cpu_number(): 1773 def read_cpu_number():
1872 """This function reads the number of CPU cores available from /proc/cpuinfo. 1774 """This function reads the number of CPU cores available from /proc/cpuinfo.
...@@ -1876,13 +1778,29 @@ def read_cpu_number(): ...@@ -1876,13 +1778,29 @@ def read_cpu_number():
1876 p = subprocess.run(['grep', '-Ec', '(Intel|AMD)', '/proc/cpuinfo'], stdout=subprocess.PIPE) 1778 p = subprocess.run(['grep', '-Ec', '(Intel|AMD)', '/proc/cpuinfo'], stdout=subprocess.PIPE)
1877 return int(int(p.stdout.decode('utf-8')[:-1])/2) 1779 return int(int(p.stdout.decode('utf-8')[:-1])/2)
1878 1780
1879 -def init_worker(tqdm_lock=None): 1781 +def init_with_tqdm(tqdm_lock=None):
1782 + """
1783 + This initiation method kills the children when signal is received,
1784 + and the children progress is followed using TQDM progress bars.
1785 + """
1880 signal.signal(signal.SIGINT, signal.SIG_IGN) 1786 signal.signal(signal.SIGINT, signal.SIG_IGN)
1881 if tqdm_lock is not None: 1787 if tqdm_lock is not None:
1882 tqdm.set_lock(tqdm_lock) 1788 tqdm.set_lock(tqdm_lock)
1883 1789
1790 +def init_no_tqdm(arg1, arg2, arg3):
1791 + """
1792 + This initiaiton method does not kill the children when signal is received,
1793 + they will complete and die even after the main process stops.
1794 + The children progress is followed using stdout text logs (notify(), warn(), etc)
1795 + """
1796 + global n_launched, n_finished, n_skipped
1797 + n_launched = arg1
1798 + n_finished = arg2
1799 + n_skipped = arg3
1800 +
1884 def warn(message, error=False): 1801 def warn(message, error=False):
1885 - """Pretty-print warnings and error messages. 1802 + """
1803 + Pretty-print warnings and error messages.
1886 """ 1804 """
1887 # Cut if too long 1805 # Cut if too long
1888 if len(message) > 66: 1806 if len(message) > 66:
...@@ -1900,20 +1818,133 @@ def warn(message, error=False): ...@@ -1900,20 +1818,133 @@ def warn(message, error=False):
1900 print(f"\t> \033[33mWARN: {message:64s}\033[0m\t{warnsymb}", flush=True) 1818 print(f"\t> \033[33mWARN: {message:64s}\033[0m\t{warnsymb}", flush=True)
1901 1819
1902 def notify(message, post=''): 1820 def notify(message, post=''):
1821 + """
1822 + Pretty-print successful finished tasks.
1823 + """
1903 if len(post): 1824 if len(post):
1904 post = '(' + post + ')' 1825 post = '(' + post + ')'
1905 print(f"\t> {message:70s}\t{validsymb}\t{post}", flush=True) 1826 print(f"\t> {message:70s}\t{validsymb}\t{post}", flush=True)
1906 1827
1907 -def _mutable_SeqIO_to_alignment_iterator(handle): 1828 +# ========================= Biopython overloads =====================
1908 - records = list(MutableFastaIterator(handle))
1909 - if records:
1910 - yield MultipleSeqAlignment(records)
1911 1829
1912 -def parse(handle): 1830 +def save_mmcif(ioobj, out_file, select=Select(), preserve_atom_numbering=False):
1913 - with open(handle, 'r') as fp: 1831 + """
1914 - yield from _mutable_SeqIO_to_alignment_iterator(fp) 1832 + MMCIF writer which renumbers residues according to the RNANet index_chain (coming from DSSR).
1833 + """
1834 +
1835 + if isinstance(out_file, str):
1836 + fp = open(out_file, "w")
1837 + close_file = True
1838 + else:
1839 + fp = out_file
1840 + close_file = False
1841 + atom_dict = defaultdict(list)
1842 +
1843 + # Iterate on models
1844 + for model in ioobj.structure.get_list():
1845 + if not select.accept_model(model):
1846 + continue
1847 +
1848 + # mmCIF files with a single model have it specified as model 1
1849 + if model.serial_num == 0:
1850 + model_n = "1"
1851 + else:
1852 + model_n = str(model.serial_num)
1853 +
1854 + # This is used to write label_entity_id and label_asym_id and
1855 + # increments from 1, changing with each molecule
1856 + entity_id = 0
1857 + if not preserve_atom_numbering:
1858 + atom_number = 1
1859 +
1860 + # Iterate on chains
1861 + for chain in model.get_list():
1862 + if not select.accept_chain(chain):
1863 + continue
1864 + chain_id = chain.get_id()
1865 + if chain_id == " ":
1866 + chain_id = "."
1867 +
1868 + # This is used to write label_seq_id, remaining blank for hetero residues
1869 + prev_residue_type = ""
1870 + prev_resname = ""
1871 +
1872 + # Iterate on residues
1873 + for residue in chain.get_unpacked_list():
1874 + if not select.accept_residue(residue):
1875 + continue
1876 + hetfield, resseq, icode = residue.get_id()
1877 + if hetfield == " ":
1878 + residue_type = "ATOM"
1879 + label_seq_id = str(resseq)
1880 + else:
1881 + residue_type = "HETATM"
1882 + label_seq_id = "."
1883 + resseq = str(resseq)
1884 + if icode == " ":
1885 + icode = "?"
1886 + resname = residue.get_resname()
1887 +
1888 + # Check if the molecule changes within the chain.
1889 + # This will always increment for the first residue in a
1890 + # chain due to the starting values above
1891 + if residue_type != prev_residue_type or (residue_type == "HETATM" and resname != prev_resname):
1892 + entity_id += 1
1893 + prev_residue_type = residue_type
1894 + prev_resname = resname
1895 + label_asym_id = ioobj._get_label_asym_id(entity_id)
1896 +
1897 + # Iterate on atoms
1898 + for atom in residue.get_unpacked_list():
1899 + if select.accept_atom(atom):
1900 + atom_dict["_atom_site.group_PDB"].append(residue_type)
1901 + if preserve_atom_numbering:
1902 + atom_number = atom.get_serial_number()
1903 + atom_dict["_atom_site.id"].append(str(atom_number))
1904 + if not preserve_atom_numbering:
1905 + atom_number += 1
1906 + element = atom.element.strip()
1907 + if element == "":
1908 + element = "?"
1909 + atom_dict["_atom_site.type_symbol"].append(element)
1910 + atom_dict["_atom_site.label_atom_id"].append(atom.get_name().strip())
1911 + altloc = atom.get_altloc()
1912 + if altloc == " ":
1913 + altloc = "."
1914 + atom_dict["_atom_site.label_alt_id"].append(altloc)
1915 + atom_dict["_atom_site.label_comp_id"].append(resname.strip())
1916 + atom_dict["_atom_site.label_asym_id"].append(label_asym_id)
1917 + # The entity ID should be the same for similar chains
1918 + # However this is non-trivial to calculate so we write "?"
1919 + atom_dict["_atom_site.label_entity_id"].append("?")
1920 + atom_dict["_atom_site.label_seq_id"].append(label_seq_id)
1921 + atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode)
1922 + coord = atom.get_coord()
1923 + atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0])
1924 + atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1])
1925 + atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2])
1926 + atom_dict["_atom_site.occupancy"].append(str(atom.get_occupancy()))
1927 + atom_dict["_atom_site.B_iso_or_equiv"].append(str(atom.get_bfactor()) )
1928 + atom_dict["_atom_site.auth_seq_id"].append(resseq)
1929 + atom_dict["_atom_site.auth_asym_id"].append(chain_id)
1930 + atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n)
1931 +
1932 + # Data block name is the structure ID with special characters removed
1933 + structure_id = ioobj.structure.id
1934 + for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]:
1935 + structure_id = structure_id.replace(c, "")
1936 + atom_dict["data_"] = structure_id
1937 +
1938 + # Set the dictionary and write out using the generic dictionary method
1939 + ioobj.dic = atom_dict
1940 + ioobj._save_dict(fp)
1941 + if close_file:
1942 + fp.close()
1915 1943
1916 def read(handle): 1944 def read(handle):
1945 + """
1946 + A shortcut to parse alignment files with our custom class MutableFastaIterator.
1947 + """
1917 iterator = parse(handle) 1948 iterator = parse(handle)
1918 try: 1949 try:
1919 alignment = next(iterator) 1950 alignment = next(iterator)
...@@ -1926,6 +1957,25 @@ def read(handle): ...@@ -1926,6 +1957,25 @@ def read(handle):
1926 pass 1957 pass
1927 return alignment 1958 return alignment
1928 1959
1960 +def parse(handle):
1961 + """
1962 + A shortcut to parse alignment files with our custom class MutableFastaIterator.
1963 + Called by function read().
1964 + """
1965 + with open(handle, 'r') as fp:
1966 + yield from _mutable_SeqIO_to_alignment_iterator(fp)
1967 +
1968 +def _mutable_SeqIO_to_alignment_iterator(handle):
1969 + """
1970 + A shortcut to parse alignment files with our custom class MutableFastaIterator.
1971 + Used by the parse() function.
1972 + """
1973 + records = list(MutableFastaIterator(handle))
1974 + if records:
1975 + yield MultipleSeqAlignment(records)
1976 +
1977 +# ========================== SQL related ============================
1978 +
1929 def sql_define_tables(conn): 1979 def sql_define_tables(conn):
1930 conn.executescript( 1980 conn.executescript(
1931 """ PRAGMA foreign_keys = on; 1981 """ PRAGMA foreign_keys = on;
...@@ -2085,12 +2135,19 @@ def sql_execute(conn, sql, many=False, data=None, warn_every=10): ...@@ -2085,12 +2135,19 @@ def sql_execute(conn, sql, many=False, data=None, warn_every=10):
2085 time.sleep(0.2) 2135 time.sleep(0.2)
2086 warn("Tried to reach database 100 times and failed. Aborting.", error=True) 2136 warn("Tried to reach database 100 times and failed. Aborting.", error=True)
2087 2137
2138 +# ======================= RNANet Jobs and tasks ======================
2139 +
2088 @trace_unhandled_exceptions 2140 @trace_unhandled_exceptions
2089 def execute_job(j, jobcount): 2141 def execute_job(j, jobcount):
2090 - """Run a Job object.
2091 """ 2142 """
2143 + Run a Job object.
2144 + """
2145 +
2146 + global n_launched, n_skipped, n_finished
2147 +
2092 # increase the counter of running jobs 2148 # increase the counter of running jobs
2093 - running_stats[0] += 1 2149 + with n_launched.get_lock():
2150 + n_launched.value += 1
2094 2151
2095 # Monitor this process 2152 # Monitor this process
2096 m = -1 2153 m = -1
...@@ -2098,7 +2155,7 @@ def execute_job(j, jobcount): ...@@ -2098,7 +2155,7 @@ def execute_job(j, jobcount):
2098 2155
2099 if len(j.cmd_): # The job is a system command 2156 if len(j.cmd_): # The job is a system command
2100 2157
2101 - print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.label}") 2158 + print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.label}")
2102 2159
2103 # Add the command to logfile 2160 # Add the command to logfile
2104 os.makedirs(runDir+"/logs", exist_ok=True) 2161 os.makedirs(runDir+"/logs", exist_ok=True)
...@@ -2114,9 +2171,20 @@ def execute_job(j, jobcount): ...@@ -2114,9 +2171,20 @@ def execute_job(j, jobcount):
2114 2171
2115 # run the command. subprocess.run will be a child of this process, and stays monitored. 2172 # run the command. subprocess.run will be a child of this process, and stays monitored.
2116 start_time = time.time() 2173 start_time = time.time()
2117 - r = subprocess.run(j.cmd_, timeout=j.timeout_, 2174 + r = subprocess.run(j.cmd_, timeout=j.timeout_, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2118 - stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2119 end_time = time.time() 2175 end_time = time.time()
2176 + if r.returncode != 0:
2177 + if r.stderr is not None:
2178 + print(r.stderr, flush=True)
2179 + print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\tIssue faced with {j.label}, skipping it and adding it to known issues (if not known).")
2180 + with n_launched.get_lock():
2181 + n_launched.value -= 1
2182 + with n_skipped.get_lock():
2183 + n_skipped.value += 1
2184 + if j.label not in issues:
2185 + issues.add(j.label)
2186 + with open("known_issues.txt", "a") as iss:
2187 + iss.write(j.label+"\n")
2120 2188
2121 # Stop the Monitor, then get its result 2189 # Stop the Monitor, then get its result
2122 monitor.keep_watching = False 2190 monitor.keep_watching = False
...@@ -2124,7 +2192,7 @@ def execute_job(j, jobcount): ...@@ -2124,7 +2192,7 @@ def execute_job(j, jobcount):
2124 2192
2125 elif j.func_ is not None: 2193 elif j.func_ is not None:
2126 2194
2127 - print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if type(a) != list])})", flush=True) 2195 + print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if type(a) != list])})", flush=True)
2128 2196
2129 with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: 2197 with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
2130 # put the monitor in a different thread 2198 # put the monitor in a different thread
...@@ -2193,7 +2261,7 @@ def execute_joblist(fulljoblist): ...@@ -2193,7 +2261,7 @@ def execute_joblist(fulljoblist):
2193 2261
2194 print("using", n, "processes:") 2262 print("using", n, "processes:")
2195 # execute jobs of priority i that should be processed n by n: 2263 # execute jobs of priority i that should be processed n by n:
2196 - p = Pool(processes=n, maxtasksperchild=1, initializer=init_worker) 2264 + p = Pool(processes=n, maxtasksperchild=1, initializer=init_no_tqdm, initargs=(n_launched, n_finished, n_skipped))
2197 try: 2265 try:
2198 raw_results = p.map(partial(execute_job, jobcount=jobcount), bunch, chunksize=2) 2266 raw_results = p.map(partial(execute_job, jobcount=jobcount), bunch, chunksize=2)
2199 p.close() 2267 p.close()
...@@ -2207,7 +2275,11 @@ def execute_joblist(fulljoblist): ...@@ -2207,7 +2275,11 @@ def execute_joblist(fulljoblist):
2207 for j, r in zip(bunch, raw_results): 2275 for j, r in zip(bunch, raw_results):
2208 j.comp_time = round(r[0], 2) # seconds 2276 j.comp_time = round(r[0], 2) # seconds
2209 j.max_mem = int(r[1]/1000000) # MB 2277 j.max_mem = int(r[1]/1000000) # MB
2210 - results.append((j.label, r[2], round(r[0], 2), int(r[1]/1000000))) 2278 + results.append((j.label, r[2], j.comp_time, j.max_mem))
2279 +
2280 + # Job is finished
2281 + with n_finished.get_lock():
2282 + n_finished.value += 1
2211 2283
2212 # throw back the money 2284 # throw back the money
2213 return results 2285 return results
...@@ -2679,8 +2751,8 @@ def use_infernal(rfam_acc, alignopts): ...@@ -2679,8 +2751,8 @@ def use_infernal(rfam_acc, alignopts):
2679 2751
2680 # Convert Stockholm to aligned FASTA 2752 # Convert Stockholm to aligned FASTA
2681 subprocess.run(["esl-reformat", "-o", path_to_seq_data + f"realigned/{rfam_acc}++.afa", 2753 subprocess.run(["esl-reformat", "-o", path_to_seq_data + f"realigned/{rfam_acc}++.afa",
2682 - "--informat", "stockholm", 2754 + "--informat", "stockholm",
2683 - "afa", path_to_seq_data + f"realigned/{rfam_acc}++.stk"]) 2755 + "afa", path_to_seq_data + f"realigned/{rfam_acc}++.stk"])
2684 subprocess.run(["rm", "-f", "esltmp*"]) # We can use a joker here, because we are not running in parallel for this part. 2756 subprocess.run(["rm", "-f", "esltmp*"]) # We can use a joker here, because we are not running in parallel for this part.
2685 2757
2686 @trace_unhandled_exceptions 2758 @trace_unhandled_exceptions
...@@ -3037,6 +3109,8 @@ def work_save(c, homology=True): ...@@ -3037,6 +3109,8 @@ def work_save(c, homology=True):
3037 3109
3038 df.to_csv(filename, float_format="%.2f", index=False) 3110 df.to_csv(filename, float_format="%.2f", index=False)
3039 3111
3112 +# =========================== Main function =============================
3113 +
3040 if __name__ == "__main__": 3114 if __name__ == "__main__":
3041 3115
3042 fileDir = os.path.dirname(os.path.realpath(__file__)) 3116 fileDir = os.path.dirname(os.path.realpath(__file__))
......
...@@ -7,6 +7,15 @@ In `cmalign` alignments, - means a nucleotide is missing compared to the covaria ...@@ -7,6 +7,15 @@ In `cmalign` alignments, - means a nucleotide is missing compared to the covaria
7 7
8 In the final filtered alignment that we provide for download, the same rule applies, but on top of that, some '.' are replaced by '-' when a gap in the 3D structure (a missing, unresolved nucleotide) is mapped to an insertion gap. 8 In the final filtered alignment that we provide for download, the same rule applies, but on top of that, some '.' are replaced by '-' when a gap in the 3D structure (a missing, unresolved nucleotide) is mapped to an insertion gap.
9 9
10 +* **What are the cmalign options for ?**
11 +
12 +From Infernal's user guide, we can quote that Infernal uses an HMM banding technique to accelerate alignment by default. It also takes care of 3' or 5' truncated sequences to be aligned correctly (and we have some).
13 +First, one can choose an algorithm, between `--optacc` (maximizing posterior probabilities, the default) and `--cyk` (maximizing likelihood).
14 +
15 +Then, the use of bands allows faster and more memory efficient computation, at the price of the guarantee of determining the optimal alignment. Bands can be disabled using the `--nonbanded` option. A best idea would be to control the threshold of probability mass to be considered negligible during HMM band calculation with the `--tau` parameter. Higher values of Tau yield greater speedups and lower memory usage, but a greater chance to miss the optimal alignment. In practice, the algorithm explores several Tau values (increasing it by a factor 2.0 from the original `--tau` value) until the DP matrix size falls below the threshold given by `--mxsize` (default 1028 Mb) or the value of `--maxtau` is reached (in this case, the program fails). One can disable this exploration with option `--fixedtau`. The default value of `--tau` is 1e-7, the default `--maxtau` is 0.05. Basically, you may decide on a value of `--mxsize` by dividing your available RAM by the number of cores used with cmalign. If necessary, you may use less cores than you have, using option `--cpu`.
16 +
17 +Finally, if using `--cyk --nonbanded --notrunc --noprob`, one can use the `--small` option to align using the divide-and-conquer CYK algorithm from Eddy 2002, requiring a very few memory but a lot of time. The major drawback of this is that it requires `--notrunc` and `--noprob`, so we give up on the correct alignment of truncated sequences, and the computation of posterior probabilities.
18 +
10 * **Why are there some gap-only columns in the alignment ?** 19 * **Why are there some gap-only columns in the alignment ?**
11 20
12 These columns are not completely gap-only, they contain at least one dash-gap '-'. This means an actual, physical nucleotide which should exist in the 3D structure should be located there. The previous and following nucleotides are **not** contiguous in space in 3D. 21 These columns are not completely gap-only, they contain at least one dash-gap '-'. This means an actual, physical nucleotide which should exist in the 3D structure should be located there. The previous and following nucleotides are **not** contiguous in space in 3D.
...@@ -31,5 +40,5 @@ We first remove the nucleotides whose number is outside the family mapping (if a ...@@ -31,5 +40,5 @@ We first remove the nucleotides whose number is outside the family mapping (if a
31 40
32 * **What are the versions of the dependencies you use ?** 41 * **What are the versions of the dependencies you use ?**
33 42
34 -`cmalign` is v1.1.4, `sina` is v1.6.0, `x3dna-dssr` is v1.9.9, Biopython is v1.78. 43 +`cmalign` is v1.1.4, `sina` is v1.6.0, `x3dna-dssr` is v2.3.2-2021jun29, Biopython is v1.78.
35 44
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -6,23 +6,16 @@ ...@@ -6,23 +6,16 @@
6 * Some chains are not correctly renamed A in the produced separate files (e.g. 1d4r-B) 6 * Some chains are not correctly renamed A in the produced separate files (e.g. 1d4r-B)
7 7
8 ## Alignment issues 8 ## Alignment issues
9 -* [SOLVED] Filtered alignments are shorter than the number of alignment columns saved to the SQL table `align_column`
10 * Chain names appear in triple in the FASTA header (e.g. 1d4r[1]-B 1d4r[1]-B 1d4r[1]-B) 9 * Chain names appear in triple in the FASTA header (e.g. 1d4r[1]-B 1d4r[1]-B 1d4r[1]-B)
11 10
12 -## Technical running issues
13 -* [SOLVED] Files produced by Docker containers are owned by root and require root permissions to be read
14 -* [SOLVED] SQLite WAL files are not deleted properly
15 -
16 # Known feature requests 11 # Known feature requests
17 -* [DONE] Get filtered versions of the sequence alignments containing the 3D chains, publicly available for download 12 +* Automated annotation of detected Recurrent Interaction Networks (RINs), see http://carnaval.lri.fr/ .
18 -* [DONE] Get a consensus residue for each alignement column 13 +* Possibly, automated detection of HLs and ILs from the 3D Motif Atlas (BGSU). Maybe. Their own website already does the job.
19 -* [DONE] Get an option to limit the number of cores 14 +* Weight sequences in alignment to give more importance to rarer sequences
20 -* [DONE] Move to SILVA LSU release 138.1 15 +* Give both gap_percent and insertion_gap_percent
21 -* [UPCOMING] Automated annotation of detected Recurrent Interaction Networks (RINs), see http://carnaval.lri.fr/ .
22 -* [UPCOMING] Possibly, automated detection of HLs and ILs from the 3D Motif Atlas (BGSU). Maybe. Their own website already does the job.
23 -* [UPCOMING] Weight sequences in alignment to give more importance to rarer sequences
24 -* [UPCOMING] Give both gap_percent and insertion_gap_percent
25 * A field estimating the quality of the sequence alignment in table family. 16 * A field estimating the quality of the sequence alignment in table family.
26 * Possibly, more metrics about the alignments coming from Infernal. 17 * Possibly, more metrics about the alignments coming from Infernal.
27 * Run cmscan ourselves from the NDB instead of using Rfam-PDB mappings ? (Iff this actually makes a real difference, untested yet) 18 * Run cmscan ourselves from the NDB instead of using Rfam-PDB mappings ? (Iff this actually makes a real difference, untested yet)
28 * Use and save Infernal alignment bounds and truncation information 19 * Use and save Infernal alignment bounds and truncation information
20 +* Save if a chain is a representative in BGSU list
21 +* Annotate unstructured regions (on a nucleotide basis)
......
This diff could not be displayed because it is too large.
1 6ydp_1_AA_1176-2737 1 6ydp_1_AA_1176-2737
2 6ydw_1_AA_1176-2737 2 6ydw_1_AA_1176-2737
3 2z9q_1_A_1-72 3 2z9q_1_A_1-72
4 -1ml5_1_b_5-121
5 -1ml5_1_a_1-2914
6 3ep2_1_Y_1-72 4 3ep2_1_Y_1-72
7 3eq3_1_Y_1-72 5 3eq3_1_Y_1-72
8 4v48_1_A6_1-73 6 4v48_1_A6_1-73
9 -1ml5_1_A_2-1520 7 +1ml5_1_b_5-121
8 +1ml5_1_a_1-2914
10 1qzb_1_B_1-73 9 1qzb_1_B_1-73
11 1qza_1_B_1-73 10 1qza_1_B_1-73
12 1ls2_1_B_1-73 11 1ls2_1_B_1-73
12 +1ml5_1_A_2-1520
13 1gsg_1_T_1-72 13 1gsg_1_T_1-72
14 7d1a_1_A_805-902 14 7d1a_1_A_805-902
15 7d0g_1_A_805-913 15 7d0g_1_A_805-913
...@@ -22,15 +22,12 @@ ...@@ -22,15 +22,12 @@
22 2rdo_1_A_3-118 22 2rdo_1_A_3-118
23 4v48_1_A9_3-118 23 4v48_1_A9_3-118
24 4v47_1_A9_3-118 24 4v47_1_A9_3-118
25 +4v42_1_BA_1-2914
26 +4v42_1_BB_5-121
25 2ob7_1_A_10-319 27 2ob7_1_A_10-319
26 1x1l_1_A_1-130 28 1x1l_1_A_1-130
27 1zc8_1_Z_1-91 29 1zc8_1_Z_1-91
28 2ob7_1_D_1-130 30 2ob7_1_D_1-130
29 -4v42_1_BA_1-2914
30 -4v42_1_BB_5-121
31 -1r2x_1_C_1-58
32 -1r2w_1_C_1-58
33 -1eg0_1_L_1-56
34 3dg2_1_A_1-1542 31 3dg2_1_A_1-1542
35 3dg0_1_A_1-1542 32 3dg0_1_A_1-1542
36 4v48_1_BA_1-1543 33 4v48_1_BA_1-1543
...@@ -46,11 +43,14 @@ ...@@ -46,11 +43,14 @@
46 3dg4_1_B_1-2904 43 3dg4_1_B_1-2904
47 3dg5_1_B_1-2904 44 3dg5_1_B_1-2904
48 1eg0_1_O_1-73 45 1eg0_1_O_1-73
49 -1zc8_1_A_1-59 46 +1r2x_1_C_1-58
47 +1r2w_1_C_1-58
48 +1eg0_1_L_1-56
50 1jgq_1_A_2-1520 49 1jgq_1_A_2-1520
51 4v42_1_AA_2-1520 50 4v42_1_AA_2-1520
52 1jgo_1_A_2-1520 51 1jgo_1_A_2-1520
53 1jgp_1_A_2-1520 52 1jgp_1_A_2-1520
53 +1zc8_1_A_1-59
54 1mvr_1_D_1-59 54 1mvr_1_D_1-59
55 4c9d_1_D_29-1 55 4c9d_1_D_29-1
56 4c9d_1_C_29-1 56 4c9d_1_C_29-1
...@@ -61,12 +61,6 @@ ...@@ -61,12 +61,6 @@
61 3ep2_1_B_1-50 61 3ep2_1_B_1-50
62 3eq3_1_B_1-50 62 3eq3_1_B_1-50
63 3eq4_1_B_1-50 63 3eq4_1_B_1-50
64 -3pgw_1_R_1-164
65 -3pgw_1_N_1-164
66 -3cw1_1_x_1-138
67 -3cw1_1_w_1-138
68 -3cw1_1_V_1-138
69 -3cw1_1_v_1-138
70 2iy3_1_B_9-105 64 2iy3_1_B_9-105
71 3jcr_1_N_1-106 65 3jcr_1_N_1-106
72 2vaz_1_A_64-177 66 2vaz_1_A_64-177
...@@ -78,6 +72,12 @@ ...@@ -78,6 +72,12 @@
78 4v5z_1_BY_2-113 72 4v5z_1_BY_2-113
79 4v5z_1_BZ_1-70 73 4v5z_1_BZ_1-70
80 4v5z_1_B1_2-123 74 4v5z_1_B1_2-123
75 +3pgw_1_R_1-164
76 +3pgw_1_N_1-164
77 +3cw1_1_x_1-138
78 +3cw1_1_w_1-138
79 +3cw1_1_V_1-138
80 +3cw1_1_v_1-138
81 1mvr_1_B_1-96 81 1mvr_1_B_1-96
82 4adx_1_0_1-2923 82 4adx_1_0_1-2923
83 3eq4_1_Y_1-69 83 3eq4_1_Y_1-69
...@@ -295,7 +295,12 @@ ...@@ -295,7 +295,12 @@
295 6ucq_1_2Y 295 6ucq_1_2Y
296 4w2e_1_X 296 4w2e_1_X
297 6ucq_1_2X 297 6ucq_1_2X
298 +7n1p_1_DT
299 +7n2u_1_DT
298 6yss_1_W 300 6yss_1_W
301 +7n30_1_DT
302 +7n31_1_DT
303 +7n2c_1_DT
299 5afi_1_Y 304 5afi_1_Y
300 5uq8_1_Z 305 5uq8_1_Z
301 5wdt_1_Y 306 5wdt_1_Y
...@@ -333,6 +338,22 @@ ...@@ -333,6 +338,22 @@
333 4v4j_1_X 338 4v4j_1_X
334 4v4i_1_X 339 4v4i_1_X
335 4v42_1_BB 340 4v42_1_BB
341 +4jrc_1_B
342 +4jrc_1_A
343 +6lkq_1_S
344 +5h5u_1_H
345 +7d6z_1_F
346 +5lze_1_Y
347 +5lze_1_V
348 +5lze_1_X
349 +3jcj_1_G
350 +6o7k_1_G
351 +3dg2_1_A
352 +3dg0_1_A
353 +4v48_1_BA
354 +4v47_1_BA
355 +3dg4_1_A
356 +3dg5_1_A
336 6d30_1_C 357 6d30_1_C
337 6j7z_1_C 358 6j7z_1_C
338 3er9_1_D 359 3er9_1_D
...@@ -437,25 +458,22 @@ ...@@ -437,25 +458,22 @@
437 6doc_1_B 458 6doc_1_B
438 6doe_1_B 459 6doe_1_B
439 6n6g_1_D 460 6n6g_1_D
440 -6lkq_1_S
441 -5h5u_1_H
442 -7d6z_1_F
443 -5lze_1_Y
444 -5lze_1_V
445 -5lze_1_X
446 -3jcj_1_G
447 -6o7k_1_G
448 -3dg2_1_A
449 -3dg0_1_A
450 -4v48_1_BA
451 -4v47_1_BA
452 -3dg4_1_A
453 -3dg5_1_A
454 4b3r_1_W 461 4b3r_1_W
455 4b3t_1_W 462 4b3t_1_W
456 4b3s_1_W 463 4b3s_1_W
464 +7b5k_1_X
457 5o2r_1_X 465 5o2r_1_X
458 5kcs_1_1X 466 5kcs_1_1X
467 +7n1p_1_PT
468 +7n2u_1_PT
469 +7n30_1_PT
470 +7n31_1_PT
471 +7n2c_1_PT
472 +6yl5_1_I
473 +6yl5_1_E
474 +6yl5_1_A
475 +6yl5_1_K
476 +6yl5_1_G
459 6zvk_1_E2 477 6zvk_1_E2
460 6zvk_1_H2 478 6zvk_1_H2
461 7a01_1_E2 479 7a01_1_E2
...@@ -526,6 +544,7 @@ ...@@ -526,6 +544,7 @@
526 6w6l_1_V 544 6w6l_1_V
527 6olf_1_V 545 6olf_1_V
528 3erc_1_G 546 3erc_1_G
547 +4qjd_1_D
529 6of1_1_1W 548 6of1_1_1W
530 6cae_1_1Y 549 6cae_1_1Y
531 6o97_1_1W 550 6o97_1_1W
...@@ -557,7 +576,9 @@ ...@@ -557,7 +576,9 @@
557 4v48_1_A6 576 4v48_1_A6
558 2z9q_1_A 577 2z9q_1_A
559 4hot_1_X 578 4hot_1_X
579 +5ns4_1_C
560 6d2z_1_C 580 6d2z_1_C
581 +7eh0_1_I
561 4tu0_1_F 582 4tu0_1_F
562 4tu0_1_G 583 4tu0_1_G
563 6r9o_1_B 584 6r9o_1_B
...@@ -578,20 +599,23 @@ ...@@ -578,20 +599,23 @@
578 6sv4_1_NC 599 6sv4_1_NC
579 6i7o_1_NB 600 6i7o_1_NB
580 1ml5_1_A 601 1ml5_1_A
602 +7nsq_1_V
581 6swa_1_Q 603 6swa_1_Q
582 6swa_1_R 604 6swa_1_R
583 -3j6x_1_IR
584 -3j6y_1_IR
585 6ole_1_T 605 6ole_1_T
586 6om0_1_T 606 6om0_1_T
587 6oli_1_T 607 6oli_1_T
588 6om7_1_T 608 6om7_1_T
589 6olf_1_T 609 6olf_1_T
590 6w6l_1_T 610 6w6l_1_T
611 +6tnu_1_M
612 +5mc6_1_M
613 +7nrc_1_SM
591 6tb3_1_N 614 6tb3_1_N
592 7b7d_1_SM 615 7b7d_1_SM
593 7b7d_1_SN 616 7b7d_1_SN
594 6tnu_1_N 617 6tnu_1_N
618 +7nrc_1_SN
595 7nrd_1_SN 619 7nrd_1_SN
596 6zot_1_C 620 6zot_1_C
597 2uxb_1_X 621 2uxb_1_X
...@@ -602,6 +626,9 @@ ...@@ -602,6 +626,9 @@
602 1eg0_1_M 626 1eg0_1_M
603 3eq4_1_D 627 3eq4_1_D
604 5o1y_1_B 628 5o1y_1_B
629 +4kzy_1_I
630 +4kzz_1_I
631 +4kzx_1_I
605 3jcr_1_H 632 3jcr_1_H
606 6dzi_1_H 633 6dzi_1_H
607 5zeu_1_A 634 5zeu_1_A
...@@ -705,7 +732,6 @@ ...@@ -705,7 +732,6 @@
705 6ip6_1_ZZ 732 6ip6_1_ZZ
706 6uu3_1_333 733 6uu3_1_333
707 6uu1_1_333 734 6uu1_1_333
708 -1pn8_1_D
709 3er8_1_H 735 3er8_1_H
710 3er8_1_G 736 3er8_1_G
711 3er8_1_F 737 3er8_1_F
...@@ -744,9 +770,8 @@ ...@@ -744,9 +770,8 @@
744 4wtl_1_T 770 4wtl_1_T
745 4wtl_1_P 771 4wtl_1_P
746 1xnq_1_W 772 1xnq_1_W
747 -1x18_1_C 773 +7n2v_1_DT
748 -1x18_1_B 774 +4peh_1_Z
749 -1x18_1_D
750 1vq6_1_4 775 1vq6_1_4
751 4am3_1_D 776 4am3_1_D
752 4am3_1_H 777 4am3_1_H
...@@ -758,12 +783,45 @@ ...@@ -758,12 +783,45 @@
758 4wtj_1_T 783 4wtj_1_T
759 4wtj_1_P 784 4wtj_1_P
760 4xbf_1_D 785 4xbf_1_D
786 +5w1h_1_B
761 6n6d_1_D 787 6n6d_1_D
762 6n6k_1_C 788 6n6k_1_C
763 6n6k_1_D 789 6n6k_1_D
764 3rtj_1_D 790 3rtj_1_D
765 6ty9_1_M 791 6ty9_1_M
766 6tz1_1_N 792 6tz1_1_N
793 +6q1h_1_D
794 +6q1h_1_H
795 +6p7p_1_F
796 +6p7p_1_E
797 +6p7p_1_D
798 +6vm6_1_J
799 +6vm6_1_G
800 +6wan_1_K
801 +6wan_1_H
802 +6wan_1_G
803 +6wan_1_L
804 +6wan_1_I
805 +6ywo_1_F
806 +6wan_1_J
807 +4oau_1_A
808 +6ywo_1_E
809 +6ywo_1_K
810 +6vm6_1_I
811 +6vm6_1_H
812 +6ywo_1_I
813 +2a1r_1_C
814 +6m6v_1_F
815 +6m6v_1_E
816 +2a1r_1_D
817 +3gpq_1_E
818 +3gpq_1_F
819 +6o79_1_C
820 +6vm6_1_K
821 +6m6v_1_G
822 +6hyu_1_D
823 +1laj_1_R
824 +6ybv_1_K
767 6sce_1_B 825 6sce_1_B
768 6xl1_1_C 826 6xl1_1_C
769 6scf_1_I 827 6scf_1_I
...@@ -809,11 +867,12 @@ ...@@ -809,11 +867,12 @@
809 1y1y_1_P 867 1y1y_1_P
810 5zuu_1_I 868 5zuu_1_I
811 5zuu_1_G 869 5zuu_1_G
870 +7am2_1_R1
812 4peh_1_W 871 4peh_1_W
813 4peh_1_V 872 4peh_1_V
814 4peh_1_X 873 4peh_1_X
815 4peh_1_Y 874 4peh_1_Y
816 -4peh_1_Z 875 +7d8c_1_C
817 6mkn_1_W 876 6mkn_1_W
818 7kl3_1_B 877 7kl3_1_B
819 4cxg_1_C 878 4cxg_1_C
...@@ -826,14 +885,7 @@ ...@@ -826,14 +885,7 @@
826 4eya_1_F 885 4eya_1_F
827 4eya_1_Q 886 4eya_1_Q
828 4eya_1_R 887 4eya_1_R
829 -1qzc_1_B
830 -1t1o_1_B
831 1mvr_1_C 888 1mvr_1_C
832 -1t1m_1_B
833 -1t1o_1_C
834 -1t1m_1_A
835 -1t1o_1_A
836 -2r1g_1_B
837 4ht9_1_E 889 4ht9_1_E
838 6z1p_1_AB 890 6z1p_1_AB
839 6z1p_1_AA 891 6z1p_1_AA
...@@ -844,11 +896,9 @@ ...@@ -844,11 +896,9 @@
844 5uk4_1_W 896 5uk4_1_W
845 5uk4_1_U 897 5uk4_1_U
846 5f6c_1_E 898 5f6c_1_E
899 +7nwh_1_HH
847 4rcj_1_B 900 4rcj_1_B
848 1xnr_1_W 901 1xnr_1_W
849 -2agn_1_A
850 -2agn_1_C
851 -2agn_1_B
852 6e0o_1_C 902 6e0o_1_C
853 6o75_1_D 903 6o75_1_D
854 6o75_1_C 904 6o75_1_C
...@@ -866,8 +916,7 @@ ...@@ -866,8 +916,7 @@
866 1ibm_1_Z 916 1ibm_1_Z
867 4dr5_1_V 917 4dr5_1_V
868 4d61_1_J 918 4d61_1_J
869 -1trj_1_B 919 +7nwg_1_Q3
870 -1trj_1_C
871 5tbw_1_SR 920 5tbw_1_SR
872 6hhq_1_SR 921 6hhq_1_SR
873 6zvi_1_H 922 6zvi_1_H
...@@ -883,6 +932,8 @@ ...@@ -883,6 +932,8 @@
883 5k8h_1_A 932 5k8h_1_A
884 5z4a_1_B 933 5z4a_1_B
885 3jbu_1_V 934 3jbu_1_V
935 +4ts2_1_Y
936 +4ts0_1_Y
886 1h2c_1_R 937 1h2c_1_R
887 1h2d_1_S 938 1h2d_1_S
888 1h2d_1_R 939 1h2d_1_R
...@@ -909,6 +960,7 @@ ...@@ -909,6 +960,7 @@
909 6ppn_1_I 960 6ppn_1_I
910 5flx_1_Z 961 5flx_1_Z
911 6eri_1_AX 962 6eri_1_AX
963 +7k5l_1_R
912 7d80_1_Y 964 7d80_1_Y
913 1zc8_1_A 965 1zc8_1_A
914 1zc8_1_C 966 1zc8_1_C
...@@ -916,6 +968,7 @@ ...@@ -916,6 +968,7 @@
916 1zc8_1_G 968 1zc8_1_G
917 1zc8_1_I 969 1zc8_1_I
918 1zc8_1_H 970 1zc8_1_H
971 +6bfb_1_Y
919 1zc8_1_J 972 1zc8_1_J
920 7du2_1_R 973 7du2_1_R
921 4v8z_1_CX 974 4v8z_1_CX
...@@ -951,6 +1004,8 @@ ...@@ -951,6 +1004,8 @@
951 4x9e_1_H 1004 4x9e_1_H
952 6z1p_1_BB 1005 6z1p_1_BB
953 6z1p_1_BA 1006 6z1p_1_BA
1007 +3p22_1_C
1008 +3p22_1_G
954 2uxd_1_X 1009 2uxd_1_X
955 6ywe_1_BB 1010 6ywe_1_BB
956 3ol9_1_D 1011 3ol9_1_D
...@@ -973,8 +1028,6 @@ ...@@ -973,8 +1028,6 @@
973 3ol7_1_H 1028 3ol7_1_H
974 3ol8_1_L 1029 3ol8_1_L
975 3ol8_1_P 1030 3ol8_1_P
976 -1qzc_1_C
977 -1qzc_1_A
978 6yrq_1_E 1031 6yrq_1_E
979 6yrq_1_H 1032 6yrq_1_H
980 6yrq_1_G 1033 6yrq_1_G
...@@ -1054,6 +1107,7 @@ ...@@ -1054,6 +1107,7 @@
1054 3iy9_1_A 1107 3iy9_1_A
1055 4wtk_1_T 1108 4wtk_1_T
1056 4wtk_1_P 1109 4wtk_1_P
1110 +6wlj_3_A
1057 1vqn_1_4 1111 1vqn_1_4
1058 4oav_1_C 1112 4oav_1_C
1059 4oav_1_A 1113 4oav_1_A
...@@ -1070,18 +1124,13 @@ ...@@ -1070,18 +1124,13 @@
1070 3eq3_1_B 1124 3eq3_1_B
1071 3eq4_1_B 1125 3eq4_1_B
1072 4i67_1_B 1126 4i67_1_B
1073 -3pgw_1_R 1127 +4jf2_1_A
1074 -3pgw_1_N
1075 -3cw1_1_X
1076 -3cw1_1_W
1077 -3cw1_1_V
1078 -7b0y_1_A
1079 6k32_1_T 1128 6k32_1_T
1080 6k32_1_P 1129 6k32_1_P
1081 5mmj_1_A 1130 5mmj_1_A
1082 5x8r_1_A 1131 5x8r_1_A
1083 -2agn_1_E 1132 +3fu2_1_B
1084 -2agn_1_D 1133 +3fu2_1_A
1085 4v5z_1_BD 1134 4v5z_1_BD
1086 6yw5_1_AA 1135 6yw5_1_AA
1087 6ywe_1_AA 1136 6ywe_1_AA
...@@ -1117,6 +1166,17 @@ ...@@ -1117,6 +1166,17 @@
1117 3p6y_1_Q 1166 3p6y_1_Q
1118 3p6y_1_W 1167 3p6y_1_W
1119 5dto_1_B 1168 5dto_1_B
1169 +6yml_1_A
1170 +6ymm_1_A
1171 +6ymi_1_M
1172 +6ymi_1_F
1173 +6ymi_1_A
1174 +6ylb_1_F
1175 +6ymi_1_C
1176 +6ymj_1_C
1177 +6ylb_1_C
1178 +6ymj_1_I
1179 +6ymj_1_O
1120 4cxh_1_X 1180 4cxh_1_X
1121 1uvj_1_F 1181 1uvj_1_F
1122 1uvj_1_D 1182 1uvj_1_D
...@@ -1153,6 +1213,12 @@ ...@@ -1153,6 +1213,12 @@
1153 4v4f_1_B4 1213 4v4f_1_B4
1154 4v4f_1_A6 1214 4v4f_1_A6
1155 4v4f_1_B2 1215 4v4f_1_B2
1216 +7m4y_1_V
1217 +7m4x_1_V
1218 +6v3a_1_V
1219 +6v39_1_V
1220 +6ck5_1_A
1221 +6ck5_1_B
1156 5it9_1_I 1222 5it9_1_I
1157 7jqc_1_I 1223 7jqc_1_I
1158 5zsb_1_C 1224 5zsb_1_C
...@@ -1162,6 +1228,8 @@ ...@@ -1162,6 +1228,8 @@
1162 1cwp_1_D 1228 1cwp_1_D
1163 3jcr_1_N 1229 3jcr_1_N
1164 6gfw_1_R 1230 6gfw_1_R
1231 +3j6x_1_IR
1232 +3j6y_1_IR
1165 2vaz_1_A 1233 2vaz_1_A
1166 6zm6_1_X 1234 6zm6_1_X
1167 6zm5_1_X 1235 6zm5_1_X
...@@ -1177,11 +1245,11 @@ ...@@ -1177,11 +1245,11 @@
1177 5uh6_1_I 1245 5uh6_1_I
1178 6l74_1_I 1246 6l74_1_I
1179 5uh9_1_I 1247 5uh9_1_I
1248 +4v5z_1_BS
1180 2ftc_1_R 1249 2ftc_1_R
1181 7a5j_1_X 1250 7a5j_1_X
1182 6sag_1_R 1251 6sag_1_R
1183 4udv_1_R 1252 4udv_1_R
1184 -2r1g_1_E
1185 5zsc_1_D 1253 5zsc_1_D
1186 5zsc_1_C 1254 5zsc_1_C
1187 6woy_1_I 1255 6woy_1_I
...@@ -1209,7 +1277,7 @@ ...@@ -1209,7 +1277,7 @@
1209 3m85_1_X 1277 3m85_1_X
1210 3m85_1_Z 1278 3m85_1_Z
1211 3m85_1_Y 1279 3m85_1_Y
1212 -1e8s_1_C 1280 +5u34_1_B
1213 5wnp_1_B 1281 5wnp_1_B
1214 5wnv_1_B 1282 5wnv_1_B
1215 5yts_1_B 1283 5yts_1_B
...@@ -1232,8 +1300,11 @@ ...@@ -1232,8 +1300,11 @@
1232 6ij2_1_E 1300 6ij2_1_E
1233 3u2e_1_D 1301 3u2e_1_D
1234 3u2e_1_C 1302 3u2e_1_C
1303 +7eh1_1_I
1235 5uef_1_C 1304 5uef_1_C
1236 5uef_1_D 1305 5uef_1_D
1306 +7eh2_1_R
1307 +7eh2_1_I
1237 4x4u_1_H 1308 4x4u_1_H
1238 4afy_1_D 1309 4afy_1_D
1239 6oy5_1_I 1310 6oy5_1_I
...@@ -1244,13 +1315,15 @@ ...@@ -1244,13 +1315,15 @@
1244 6s0m_1_C 1315 6s0m_1_C
1245 6ymw_1_C 1316 6ymw_1_C
1246 7a5g_1_J 1317 7a5g_1_J
1318 +1m5k_1_B
1319 +1m5o_1_E
1320 +1m5v_1_B
1247 6gx6_1_B 1321 6gx6_1_B
1248 4k4s_1_D 1322 4k4s_1_D
1249 4k4s_1_H 1323 4k4s_1_H
1250 4k4t_1_H 1324 4k4t_1_H
1251 4k4t_1_D 1325 4k4t_1_D
1252 1zn1_1_C 1326 1zn1_1_C
1253 -1zn0_1_C
1254 1xpu_1_G 1327 1xpu_1_G
1255 1xpu_1_L 1328 1xpu_1_L
1256 1xpr_1_L 1329 1xpr_1_L
...@@ -1274,7 +1347,9 @@ ...@@ -1274,7 +1347,9 @@
1274 6gc5_1_F 1347 6gc5_1_F
1275 6gc5_1_H 1348 6gc5_1_H
1276 6gc5_1_G 1349 6gc5_1_G
1350 +4rne_1_C
1277 1n1h_1_B 1351 1n1h_1_B
1352 +7n2v_1_PT
1278 4ohz_1_B 1353 4ohz_1_B
1279 6t83_1_6B 1354 6t83_1_6B
1280 4gv6_1_C 1355 4gv6_1_C
...@@ -1290,6 +1365,9 @@ ...@@ -1290,6 +1365,9 @@
1290 4v5z_1_BC 1365 4v5z_1_BC
1291 5y88_1_X 1366 5y88_1_X
1292 4v5z_1_BB 1367 4v5z_1_BB
1368 +5y85_1_D
1369 +5y85_1_B
1370 +5y87_1_D
1293 3j0o_1_H 1371 3j0o_1_H
1294 3j0l_1_H 1372 3j0l_1_H
1295 3j0p_1_H 1373 3j0p_1_H
...@@ -1351,11 +1429,11 @@ ...@@ -1351,11 +1429,11 @@
1351 4e6b_1_A 1429 4e6b_1_A
1352 4e6b_1_B 1430 4e6b_1_B
1353 6a6l_1_D 1431 6a6l_1_D
1354 -4v5z_1_BS
1355 4v8t_1_1 1432 4v8t_1_1
1356 1uvi_1_D 1433 1uvi_1_D
1357 1uvi_1_F 1434 1uvi_1_F
1358 1uvi_1_E 1435 1uvi_1_E
1436 +3gs5_1_A
1359 4m7d_1_P 1437 4m7d_1_P
1360 4k4u_1_D 1438 4k4u_1_D
1361 4k4u_1_H 1439 4k4u_1_H
...@@ -1376,8 +1454,8 @@ ...@@ -1376,8 +1454,8 @@
1376 6ip5_1_2M 1454 6ip5_1_2M
1377 6ip6_1_2M 1455 6ip6_1_2M
1378 6qcs_1_M 1456 6qcs_1_M
1457 +7b5k_1_Z
1379 486d_1_G 1458 486d_1_G
1380 -2r1g_1_C
1381 486d_1_F 1459 486d_1_F
1382 4v5z_1_B0 1460 4v5z_1_B0
1383 4nia_1_O 1461 4nia_1_O
...@@ -1391,11 +1469,11 @@ ...@@ -1391,11 +1469,11 @@
1391 4oq9_1_F 1469 4oq9_1_F
1392 4oq9_1_L 1470 4oq9_1_L
1393 6r9q_1_B 1471 6r9q_1_B
1472 +7m4u_1_A
1394 6v3a_1_SN1 1473 6v3a_1_SN1
1395 6v3b_1_SN1 1474 6v3b_1_SN1
1396 6v39_1_SN1 1475 6v39_1_SN1
1397 6v3e_1_SN1 1476 6v3e_1_SN1
1398 -1pn7_1_C
1399 1mj1_1_Q 1477 1mj1_1_Q
1400 1mj1_1_R 1478 1mj1_1_R
1401 4dr6_1_V 1479 4dr6_1_V
...@@ -1437,14 +1515,25 @@ ...@@ -1437,14 +1515,25 @@
1437 6ow3_1_I 1515 6ow3_1_I
1438 6ovy_1_I 1516 6ovy_1_I
1439 6oy6_1_I 1517 6oy6_1_I
1440 -4bbl_1_Y
1441 -4bbl_1_Z
1442 4qvd_1_H 1518 4qvd_1_H
1443 5gxi_1_B 1519 5gxi_1_B
1444 3iy8_1_A 1520 3iy8_1_A
1445 -6tnu_1_M 1521 +7n06_1_G
1446 -5mc6_1_M 1522 +7n06_1_H
1523 +7n06_1_I
1524 +7n06_1_J
1525 +7n06_1_K
1526 +7n06_1_L
1527 +7n33_1_G
1528 +7n33_1_H
1529 +7n33_1_I
1530 +7n33_1_J
1531 +7n33_1_K
1532 +7n33_1_L
1447 5mc6_1_N 1533 5mc6_1_N
1534 +2qwy_1_C
1535 +2qwy_1_A
1536 +2qwy_1_B
1448 4eya_1_O 1537 4eya_1_O
1449 4eya_1_P 1538 4eya_1_P
1450 4eya_1_C 1539 4eya_1_C
...@@ -1453,8 +1542,6 @@ ...@@ -1453,8 +1542,6 @@
1453 6htq_1_W 1542 6htq_1_W
1454 6htq_1_U 1543 6htq_1_U
1455 6uu6_1_333 1544 6uu6_1_333
1456 -6v3a_1_V
1457 -6v39_1_V
1458 5a0v_1_F 1545 5a0v_1_F
1459 3avt_1_T 1546 3avt_1_T
1460 6d1v_1_C 1547 6d1v_1_C
...@@ -1497,6 +1584,7 @@ ...@@ -1497,6 +1584,7 @@
1497 6o78_1_E 1584 6o78_1_E
1498 6xa1_1_BV 1585 6xa1_1_BV
1499 6ha8_1_X 1586 6ha8_1_X
1587 +3bnp_1_B
1500 1m8w_1_E 1588 1m8w_1_E
1501 1m8w_1_F 1589 1m8w_1_F
1502 5udi_1_B 1590 5udi_1_B
...@@ -1520,16 +1608,29 @@ ...@@ -1520,16 +1608,29 @@
1520 6een_1_H 1608 6een_1_H
1521 4wti_1_T 1609 4wti_1_T
1522 4wti_1_P 1610 4wti_1_P
1611 +6dlr_1_A
1612 +6dlt_1_A
1613 +6dls_1_A
1614 +6dlq_1_A
1615 +6dnr_1_A
1523 5l3p_1_Y 1616 5l3p_1_Y
1524 4hor_1_X 1617 4hor_1_X
1525 3rzo_1_R 1618 3rzo_1_R
1619 +5wlh_1_B
1526 2f4v_1_Z 1620 2f4v_1_Z
1621 +5ml7_1_B
1527 1qln_1_R 1622 1qln_1_R
1623 +3pgw_1_R
1624 +3pgw_1_N
1625 +3cw1_1_X
1626 +3cw1_1_W
1627 +3cw1_1_V
1628 +7b0y_1_A
1528 6ogy_1_M 1629 6ogy_1_M
1529 6ogy_1_N 1630 6ogy_1_N
1530 6uej_1_B 1631 6uej_1_B
1632 +7kga_1_A
1531 6ywy_1_BB 1633 6ywy_1_BB
1532 -1x18_1_A
1533 5ytx_1_B 1634 5ytx_1_B
1534 4g0a_1_H 1635 4g0a_1_H
1535 6r9p_1_B 1636 6r9p_1_B
...@@ -1572,12 +1673,8 @@ ...@@ -1572,12 +1673,8 @@
1572 5mre_1_AA 1673 5mre_1_AA
1573 5mrf_1_AA 1674 5mrf_1_AA
1574 7jhy_1_Z 1675 7jhy_1_Z
1575 -2r1g_1_A
1576 -2r1g_1_D
1577 -2r1g_1_F
1578 3eq4_1_Y 1676 3eq4_1_Y
1579 4wkr_1_C 1677 4wkr_1_C
1580 -2r1g_1_X
1581 4v99_1_EC 1678 4v99_1_EC
1582 4v99_1_AC 1679 4v99_1_AC
1583 4v99_1_BH 1680 4v99_1_BH
...@@ -1641,44 +1738,21 @@ ...@@ -1641,44 +1738,21 @@
1641 6rcl_1_C 1738 6rcl_1_C
1642 5jju_1_C 1739 5jju_1_C
1643 4ejt_1_G 1740 4ejt_1_G
1741 +1et4_1_A
1742 +1et4_1_C
1743 +1et4_1_B
1744 +1et4_1_D
1745 +1et4_1_E
1746 +1ddy_1_C
1747 +1ddy_1_A
1748 +1ddy_1_E
1644 6lkq_1_W 1749 6lkq_1_W
1750 +6r47_1_A
1645 3qsu_1_P 1751 3qsu_1_P
1646 3qsu_1_R 1752 3qsu_1_R
1647 2xs7_1_B 1753 2xs7_1_B
1648 1n38_1_B 1754 1n38_1_B
1649 4qvc_1_G 1755 4qvc_1_G
1650 -6q1h_1_D
1651 -6q1h_1_H
1652 -6p7p_1_F
1653 -6p7p_1_E
1654 -6p7p_1_D
1655 -6vm6_1_J
1656 -6vm6_1_G
1657 -6wan_1_K
1658 -6wan_1_H
1659 -6wan_1_G
1660 -6wan_1_L
1661 -6wan_1_I
1662 -6ywo_1_F
1663 -6wan_1_J
1664 -4oau_1_A
1665 -6ywo_1_E
1666 -6ywo_1_K
1667 -6vm6_1_I
1668 -6vm6_1_H
1669 -6ywo_1_I
1670 -2a1r_1_C
1671 -6m6v_1_F
1672 -6m6v_1_E
1673 -2a1r_1_D
1674 -3gpq_1_E
1675 -3gpq_1_F
1676 -6o79_1_C
1677 -6vm6_1_K
1678 -6m6v_1_G
1679 -6hyu_1_D
1680 -1laj_1_R
1681 -6ybv_1_K
1682 6mpf_1_W 1756 6mpf_1_W
1683 6spc_1_A 1757 6spc_1_A
1684 6spe_1_A 1758 6spe_1_A
...@@ -1687,14 +1761,12 @@ ...@@ -1687,14 +1761,12 @@
1687 6fti_1_V 1761 6fti_1_V
1688 6ftj_1_V 1762 6ftj_1_V
1689 6ftg_1_V 1763 6ftg_1_V
1764 +3npn_1_A
1690 4g0a_1_G 1765 4g0a_1_G
1691 4g0a_1_F 1766 4g0a_1_F
1692 4g0a_1_E 1767 4g0a_1_E
1693 2b2d_1_S 1768 2b2d_1_S
1694 5hkc_1_C 1769 5hkc_1_C
1695 -4kzy_1_I
1696 -4kzz_1_I
1697 -4kzx_1_I
1698 1rmv_1_B 1770 1rmv_1_B
1699 4qu7_1_X 1771 4qu7_1_X
1700 4qu7_1_V 1772 4qu7_1_V
...@@ -1710,25 +1782,3 @@ ...@@ -1710,25 +1782,3 @@
1710 6pmi_1_3 1782 6pmi_1_3
1711 6pmj_1_3 1783 6pmj_1_3
1712 5hjz_1_C 1784 5hjz_1_C
1713 -7nrc_1_SM
1714 -7nrc_1_SN
1715 -7am2_1_R1
1716 -7k5l_1_R
1717 -7b5k_1_X
1718 -7d8c_1_C
1719 -7m4y_1_V
1720 -7m4x_1_V
1721 -7b5k_1_Z
1722 -7m4u_1_A
1723 -7n06_1_G
1724 -7n06_1_H
1725 -7n06_1_I
1726 -7n06_1_J
1727 -7n06_1_K
1728 -7n06_1_L
1729 -7n33_1_G
1730 -7n33_1_H
1731 -7n33_1_I
1732 -7n33_1_J
1733 -7n33_1_K
1734 -7n33_1_L
......
...@@ -7,12 +7,6 @@ Could not find nucleotides of chain AA in annotation 6ydw.json. Either there is ...@@ -7,12 +7,6 @@ Could not find nucleotides of chain AA in annotation 6ydw.json. Either there is
7 2z9q_1_A_1-72 7 2z9q_1_A_1-72
8 DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A_1-72. 8 DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A_1-72.
9 9
10 -1ml5_1_b_5-121
11 -Could not find nucleotides of chain b in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
12 -
13 -1ml5_1_a_1-2914
14 -Could not find nucleotides of chain a in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
15 -
16 3ep2_1_Y_1-72 10 3ep2_1_Y_1-72
17 DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_Y_1-72. 11 DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_Y_1-72.
18 12
...@@ -22,8 +16,11 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_Y_1-72. ...@@ -22,8 +16,11 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_Y_1-72.
22 4v48_1_A6_1-73 16 4v48_1_A6_1-73
23 DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A6_1-73. 17 DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A6_1-73.
24 18
25 -1ml5_1_A_2-1520 19 +1ml5_1_b_5-121
26 -Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 20 +Could not find nucleotides of chain b in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
21 +
22 +1ml5_1_a_1-2914
23 +Could not find nucleotides of chain a in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
27 24
28 1qzb_1_B_1-73 25 1qzb_1_B_1-73
29 DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B_1-73. 26 DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B_1-73.
...@@ -34,6 +31,9 @@ DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B_1-73. ...@@ -34,6 +31,9 @@ DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B_1-73.
34 1ls2_1_B_1-73 31 1ls2_1_B_1-73
35 DSSR warning 1ls2.json: no nucleotides found. Ignoring 1ls2_1_B_1-73. 32 DSSR warning 1ls2.json: no nucleotides found. Ignoring 1ls2_1_B_1-73.
36 33
34 +1ml5_1_A_2-1520
35 +Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
36 +
37 1gsg_1_T_1-72 37 1gsg_1_T_1-72
38 DSSR warning 1gsg.json: no nucleotides found. Ignoring 1gsg_1_T_1-72. 38 DSSR warning 1gsg.json: no nucleotides found. Ignoring 1gsg_1_T_1-72.
39 39
...@@ -70,6 +70,12 @@ DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A9_3-118. ...@@ -70,6 +70,12 @@ DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A9_3-118.
70 4v47_1_A9_3-118 70 4v47_1_A9_3-118
71 DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A9_3-118. 71 DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A9_3-118.
72 72
73 +4v42_1_BA_1-2914
74 +Could not find nucleotides of chain BA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
75 +
76 +4v42_1_BB_5-121
77 +Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
78 +
73 2ob7_1_A_10-319 79 2ob7_1_A_10-319
74 DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_A_10-319. 80 DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_A_10-319.
75 81
...@@ -82,21 +88,6 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_Z_1-91. ...@@ -82,21 +88,6 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_Z_1-91.
82 2ob7_1_D_1-130 88 2ob7_1_D_1-130
83 DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_D_1-130. 89 DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_D_1-130.
84 90
85 -4v42_1_BA_1-2914
86 -Could not find nucleotides of chain BA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
87 -
88 -4v42_1_BB_5-121
89 -Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
90 -
91 -1r2x_1_C_1-58
92 -DSSR warning 1r2x.json: no nucleotides found. Ignoring 1r2x_1_C_1-58.
93 -
94 -1r2w_1_C_1-58
95 -DSSR warning 1r2w.json: no nucleotides found. Ignoring 1r2w_1_C_1-58.
96 -
97 -1eg0_1_L_1-56
98 -DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_L_1-56.
99 -
100 3dg2_1_A_1-1542 91 3dg2_1_A_1-1542
101 DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A_1-1542. 92 DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A_1-1542.
102 93
...@@ -142,8 +133,14 @@ DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B_1-2904. ...@@ -142,8 +133,14 @@ DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B_1-2904.
142 1eg0_1_O_1-73 133 1eg0_1_O_1-73
143 DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_O_1-73. 134 DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_O_1-73.
144 135
145 -1zc8_1_A_1-59 136 +1r2x_1_C_1-58
146 -DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_A_1-59. 137 +DSSR warning 1r2x.json: no nucleotides found. Ignoring 1r2x_1_C_1-58.
138 +
139 +1r2w_1_C_1-58
140 +DSSR warning 1r2w.json: no nucleotides found. Ignoring 1r2w_1_C_1-58.
141 +
142 +1eg0_1_L_1-56
143 +DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_L_1-56.
147 144
148 1jgq_1_A_2-1520 145 1jgq_1_A_2-1520
149 Could not find nucleotides of chain A in annotation 1jgq.json. Either there is a problem with 1jgq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 146 Could not find nucleotides of chain A in annotation 1jgq.json. Either there is a problem with 1jgq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
...@@ -157,6 +154,9 @@ Could not find nucleotides of chain A in annotation 1jgo.json. Either there is a ...@@ -157,6 +154,9 @@ Could not find nucleotides of chain A in annotation 1jgo.json. Either there is a
157 1jgp_1_A_2-1520 154 1jgp_1_A_2-1520
158 Could not find nucleotides of chain A in annotation 1jgp.json. Either there is a problem with 1jgp mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 155 Could not find nucleotides of chain A in annotation 1jgp.json. Either there is a problem with 1jgp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
159 156
157 +1zc8_1_A_1-59
158 +DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_A_1-59.
159 +
160 1mvr_1_D_1-59 160 1mvr_1_D_1-59
161 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_D_1-59. 161 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_D_1-59.
162 162
...@@ -187,24 +187,6 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_B_1-50. ...@@ -187,24 +187,6 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_B_1-50.
187 3eq4_1_B_1-50 187 3eq4_1_B_1-50
188 DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B_1-50. 188 DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B_1-50.
189 189
190 -3pgw_1_R_1-164
191 -DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R_1-164.
192 -
193 -3pgw_1_N_1-164
194 -DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N_1-164.
195 -
196 -3cw1_1_x_1-138
197 -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_x_1-138.
198 -
199 -3cw1_1_w_1-138
200 -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_w_1-138.
201 -
202 -3cw1_1_V_1-138
203 -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V_1-138.
204 -
205 -3cw1_1_v_1-138
206 -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_v_1-138.
207 -
208 2iy3_1_B_9-105 190 2iy3_1_B_9-105
209 DSSR warning 2iy3.json: no nucleotides found. Ignoring 2iy3_1_B_9-105. 191 DSSR warning 2iy3.json: no nucleotides found. Ignoring 2iy3_1_B_9-105.
210 192
...@@ -238,6 +220,24 @@ DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BZ_1-70. ...@@ -238,6 +220,24 @@ DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BZ_1-70.
238 4v5z_1_B1_2-123 220 4v5z_1_B1_2-123
239 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B1_2-123. 221 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B1_2-123.
240 222
223 +3pgw_1_R_1-164
224 +DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R_1-164.
225 +
226 +3pgw_1_N_1-164
227 +DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N_1-164.
228 +
229 +3cw1_1_x_1-138
230 +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_x_1-138.
231 +
232 +3cw1_1_w_1-138
233 +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_w_1-138.
234 +
235 +3cw1_1_V_1-138
236 +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V_1-138.
237 +
238 +3cw1_1_v_1-138
239 +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_v_1-138.
240 +
241 1mvr_1_B_1-96 241 1mvr_1_B_1-96
242 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_B_1-96. 242 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_B_1-96.
243 243
...@@ -889,9 +889,24 @@ Could not find nucleotides of chain X in annotation 4w2e.json. Either there is a ...@@ -889,9 +889,24 @@ Could not find nucleotides of chain X in annotation 4w2e.json. Either there is a
889 6ucq_1_2X 889 6ucq_1_2X
890 Could not find nucleotides of chain 2X in annotation 6ucq.json. Either there is a problem with 6ucq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 890 Could not find nucleotides of chain 2X in annotation 6ucq.json. Either there is a problem with 6ucq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
891 891
892 +7n1p_1_DT
893 +Could not find nucleotides of chain DT in annotation 7n1p.json. Either there is a problem with 7n1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
894 +
895 +7n2u_1_DT
896 +Could not find nucleotides of chain DT in annotation 7n2u.json. Either there is a problem with 7n2u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
897 +
892 6yss_1_W 898 6yss_1_W
893 Could not find nucleotides of chain W in annotation 6yss.json. Either there is a problem with 6yss mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 899 Could not find nucleotides of chain W in annotation 6yss.json. Either there is a problem with 6yss mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
894 900
901 +7n30_1_DT
902 +Could not find nucleotides of chain DT in annotation 7n30.json. Either there is a problem with 7n30 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
903 +
904 +7n31_1_DT
905 +Could not find nucleotides of chain DT in annotation 7n31.json. Either there is a problem with 7n31 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
906 +
907 +7n2c_1_DT
908 +Could not find nucleotides of chain DT in annotation 7n2c.json. Either there is a problem with 7n2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
909 +
895 5afi_1_Y 910 5afi_1_Y
896 Could not find nucleotides of chain Y in annotation 5afi.json. Either there is a problem with 5afi mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 911 Could not find nucleotides of chain Y in annotation 5afi.json. Either there is a problem with 5afi mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
897 912
...@@ -1003,6 +1018,54 @@ Could not find nucleotides of chain X in annotation 4v4i.json. Either there is a ...@@ -1003,6 +1018,54 @@ Could not find nucleotides of chain X in annotation 4v4i.json. Either there is a
1003 4v42_1_BB 1018 4v42_1_BB
1004 Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1019 Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1005 1020
1021 +4jrc_1_B
1022 +Nucleotides not inserted !
1023 +
1024 +4jrc_1_A
1025 +Nucleotides not inserted !
1026 +
1027 +6lkq_1_S
1028 +Could not find nucleotides of chain S in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1029 +
1030 +5h5u_1_H
1031 +Could not find nucleotides of chain H in annotation 5h5u.json. Either there is a problem with 5h5u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1032 +
1033 +7d6z_1_F
1034 +Could not find nucleotides of chain F in annotation 7d6z.json. Either there is a problem with 7d6z mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1035 +
1036 +5lze_1_Y
1037 +Could not find nucleotides of chain Y in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1038 +
1039 +5lze_1_V
1040 +Could not find nucleotides of chain V in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1041 +
1042 +5lze_1_X
1043 +Could not find nucleotides of chain X in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1044 +
1045 +3jcj_1_G
1046 +Could not find nucleotides of chain G in annotation 3jcj.json. Either there is a problem with 3jcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1047 +
1048 +6o7k_1_G
1049 +Could not find nucleotides of chain G in annotation 6o7k.json. Either there is a problem with 6o7k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1050 +
1051 +3dg2_1_A
1052 +DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A.
1053 +
1054 +3dg0_1_A
1055 +DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A.
1056 +
1057 +4v48_1_BA
1058 +DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_BA.
1059 +
1060 +4v47_1_BA
1061 +DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_BA.
1062 +
1063 +3dg4_1_A
1064 +DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A.
1065 +
1066 +3dg5_1_A
1067 +DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A.
1068 +
1006 6d30_1_C 1069 6d30_1_C
1007 Sequence is too short. (< 5 resolved nts) 1070 Sequence is too short. (< 5 resolved nts)
1008 1071
...@@ -1315,62 +1378,53 @@ Sequence is too short. (< 5 resolved nts) ...@@ -1315,62 +1378,53 @@ Sequence is too short. (< 5 resolved nts)
1315 6n6g_1_D 1378 6n6g_1_D
1316 Sequence is too short. (< 5 resolved nts) 1379 Sequence is too short. (< 5 resolved nts)
1317 1380
1318 -6lkq_1_S 1381 +4b3r_1_W
1319 -Could not find nucleotides of chain S in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1382 +Sequence is too short. (< 5 resolved nts)
1320 -
1321 -5h5u_1_H
1322 -Could not find nucleotides of chain H in annotation 5h5u.json. Either there is a problem with 5h5u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1323 -
1324 -7d6z_1_F
1325 -Could not find nucleotides of chain F in annotation 7d6z.json. Either there is a problem with 7d6z mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1326 -
1327 -5lze_1_Y
1328 -Could not find nucleotides of chain Y in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1329 1383
1330 -5lze_1_V 1384 +4b3t_1_W
1331 -Could not find nucleotides of chain V in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1385 +Sequence is too short. (< 5 resolved nts)
1332 1386
1333 -5lze_1_X 1387 +4b3s_1_W
1334 -Could not find nucleotides of chain X in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1388 +Sequence is too short. (< 5 resolved nts)
1335 1389
1336 -3jcj_1_G 1390 +7b5k_1_X
1337 -Could not find nucleotides of chain G in annotation 3jcj.json. Either there is a problem with 3jcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1391 +Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1338 1392
1339 -6o7k_1_G 1393 +5o2r_1_X
1340 -Could not find nucleotides of chain G in annotation 6o7k.json. Either there is a problem with 6o7k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1394 +Could not find nucleotides of chain X in annotation 5o2r.json. Either there is a problem with 5o2r mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1341 1395
1342 -3dg2_1_A 1396 +5kcs_1_1X
1343 -DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A. 1397 +Could not find nucleotides of chain 1X in annotation 5kcs.json. Either there is a problem with 5kcs mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1344 1398
1345 -3dg0_1_A 1399 +7n1p_1_PT
1346 -DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A. 1400 +Could not find nucleotides of chain PT in annotation 7n1p.json. Either there is a problem with 7n1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1347 1401
1348 -4v48_1_BA 1402 +7n2u_1_PT
1349 -DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_BA. 1403 +Could not find nucleotides of chain PT in annotation 7n2u.json. Either there is a problem with 7n2u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1350 1404
1351 -4v47_1_BA 1405 +7n30_1_PT
1352 -DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_BA. 1406 +Could not find nucleotides of chain PT in annotation 7n30.json. Either there is a problem with 7n30 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1353 1407
1354 -3dg4_1_A 1408 +7n31_1_PT
1355 -DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A. 1409 +Could not find nucleotides of chain PT in annotation 7n31.json. Either there is a problem with 7n31 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1356 1410
1357 -3dg5_1_A 1411 +7n2c_1_PT
1358 -DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A. 1412 +Could not find nucleotides of chain PT in annotation 7n2c.json. Either there is a problem with 7n2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1359 1413
1360 -4b3r_1_W 1414 +6yl5_1_I
1361 -Sequence is too short. (< 5 resolved nts) 1415 +Nucleotides not inserted !
1362 1416
1363 -4b3t_1_W 1417 +6yl5_1_E
1364 -Sequence is too short. (< 5 resolved nts) 1418 +Nucleotides not inserted !
1365 1419
1366 -4b3s_1_W 1420 +6yl5_1_A
1367 -Sequence is too short. (< 5 resolved nts) 1421 +Nucleotides not inserted !
1368 1422
1369 -5o2r_1_X 1423 +6yl5_1_K
1370 -Could not find nucleotides of chain X in annotation 5o2r.json. Either there is a problem with 5o2r mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1424 +Nucleotides not inserted !
1371 1425
1372 -5kcs_1_1X 1426 +6yl5_1_G
1373 -Could not find nucleotides of chain 1X in annotation 5kcs.json. Either there is a problem with 5kcs mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1427 +Nucleotides not inserted !
1374 1428
1375 6zvk_1_E2 1429 6zvk_1_E2
1376 Could not find nucleotides of chain E2 in annotation 6zvk.json. Either there is a problem with 6zvk mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1430 Could not find nucleotides of chain E2 in annotation 6zvk.json. Either there is a problem with 6zvk mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
...@@ -1582,6 +1636,9 @@ Could not find nucleotides of chain V in annotation 6olf.json. Either there is a ...@@ -1582,6 +1636,9 @@ Could not find nucleotides of chain V in annotation 6olf.json. Either there is a
1582 3erc_1_G 1636 3erc_1_G
1583 Sequence is too short. (< 5 resolved nts) 1637 Sequence is too short. (< 5 resolved nts)
1584 1638
1639 +4qjd_1_D
1640 +Nucleotides not inserted !
1641 +
1585 6of1_1_1W 1642 6of1_1_1W
1586 Could not find nucleotides of chain 1W in annotation 6of1.json. Either there is a problem with 6of1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1643 Could not find nucleotides of chain 1W in annotation 6of1.json. Either there is a problem with 6of1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1587 1644
...@@ -1675,9 +1732,15 @@ DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A. ...@@ -1675,9 +1732,15 @@ DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A.
1675 4hot_1_X 1732 4hot_1_X
1676 Sequence is too short. (< 5 resolved nts) 1733 Sequence is too short. (< 5 resolved nts)
1677 1734
1735 +5ns4_1_C
1736 +Nucleotides not inserted !
1737 +
1678 6d2z_1_C 1738 6d2z_1_C
1679 Sequence is too short. (< 5 resolved nts) 1739 Sequence is too short. (< 5 resolved nts)
1680 1740
1741 +7eh0_1_I
1742 +Sequence is too short. (< 5 resolved nts)
1743 +
1681 4tu0_1_F 1744 4tu0_1_F
1682 Sequence is too short. (< 5 resolved nts) 1745 Sequence is too short. (< 5 resolved nts)
1683 1746
...@@ -1738,18 +1801,15 @@ Could not find nucleotides of chain NB in annotation 6i7o.json. Either there is ...@@ -1738,18 +1801,15 @@ Could not find nucleotides of chain NB in annotation 6i7o.json. Either there is
1738 1ml5_1_A 1801 1ml5_1_A
1739 Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1802 Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1740 1803
1804 +7nsq_1_V
1805 +Could not find nucleotides of chain V in annotation 7nsq.json. Either there is a problem with 7nsq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1806 +
1741 6swa_1_Q 1807 6swa_1_Q
1742 Could not find nucleotides of chain Q in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1808 Could not find nucleotides of chain Q in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1743 1809
1744 6swa_1_R 1810 6swa_1_R
1745 Could not find nucleotides of chain R in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1811 Could not find nucleotides of chain R in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1746 1812
1747 -3j6x_1_IR
1748 -Could not find nucleotides of chain IR in annotation 3j6x.json. Either there is a problem with 3j6x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1749 -
1750 -3j6y_1_IR
1751 -Could not find nucleotides of chain IR in annotation 3j6y.json. Either there is a problem with 3j6y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1752 -
1753 6ole_1_T 1813 6ole_1_T
1754 Could not find nucleotides of chain T in annotation 6ole.json. Either there is a problem with 6ole mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1814 Could not find nucleotides of chain T in annotation 6ole.json. Either there is a problem with 6ole mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1755 1815
...@@ -1768,6 +1828,15 @@ Could not find nucleotides of chain T in annotation 6olf.json. Either there is a ...@@ -1768,6 +1828,15 @@ Could not find nucleotides of chain T in annotation 6olf.json. Either there is a
1768 6w6l_1_T 1828 6w6l_1_T
1769 Could not find nucleotides of chain T in annotation 6w6l.json. Either there is a problem with 6w6l mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1829 Could not find nucleotides of chain T in annotation 6w6l.json. Either there is a problem with 6w6l mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1770 1830
1831 +6tnu_1_M
1832 +Could not find nucleotides of chain M in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1833 +
1834 +5mc6_1_M
1835 +Could not find nucleotides of chain M in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1836 +
1837 +7nrc_1_SM
1838 +Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1839 +
1771 6tb3_1_N 1840 6tb3_1_N
1772 Could not find nucleotides of chain N in annotation 6tb3.json. Either there is a problem with 6tb3 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1841 Could not find nucleotides of chain N in annotation 6tb3.json. Either there is a problem with 6tb3 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1773 1842
...@@ -1780,6 +1849,9 @@ Could not find nucleotides of chain SN in annotation 7b7d.json. Either there is ...@@ -1780,6 +1849,9 @@ Could not find nucleotides of chain SN in annotation 7b7d.json. Either there is
1780 6tnu_1_N 1849 6tnu_1_N
1781 Could not find nucleotides of chain N in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1850 Could not find nucleotides of chain N in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1782 1851
1852 +7nrc_1_SN
1853 +Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1854 +
1783 7nrd_1_SN 1855 7nrd_1_SN
1784 Could not find nucleotides of chain SN in annotation 7nrd.json. Either there is a problem with 7nrd mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 1856 Could not find nucleotides of chain SN in annotation 7nrd.json. Either there is a problem with 7nrd mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1785 1857
...@@ -1810,6 +1882,15 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_D. ...@@ -1810,6 +1882,15 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_D.
1810 5o1y_1_B 1882 5o1y_1_B
1811 Sequence is too short. (< 5 resolved nts) 1883 Sequence is too short. (< 5 resolved nts)
1812 1884
1885 +4kzy_1_I
1886 +Could not find nucleotides of chain I in annotation 4kzy.json. Either there is a problem with 4kzy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1887 +
1888 +4kzz_1_I
1889 +Could not find nucleotides of chain I in annotation 4kzz.json. Either there is a problem with 4kzz mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1890 +
1891 +4kzx_1_I
1892 +Could not find nucleotides of chain I in annotation 4kzx.json. Either there is a problem with 4kzx mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
1893 +
1813 3jcr_1_H 1894 3jcr_1_H
1814 DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_H. 1895 DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_H.
1815 1896
...@@ -2119,9 +2200,6 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2119,9 +2200,6 @@ Sequence is too short. (< 5 resolved nts)
2119 6uu1_1_333 2200 6uu1_1_333
2120 Sequence is too short. (< 5 resolved nts) 2201 Sequence is too short. (< 5 resolved nts)
2121 2202
2122 -1pn8_1_D
2123 -DSSR warning 1pn8.json: no nucleotides found. Ignoring 1pn8_1_D.
2124 -
2125 3er8_1_H 2203 3er8_1_H
2126 Sequence is too short. (< 5 resolved nts) 2204 Sequence is too short. (< 5 resolved nts)
2127 2205
...@@ -2236,14 +2314,11 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2236,14 +2314,11 @@ Sequence is too short. (< 5 resolved nts)
2236 1xnq_1_W 2314 1xnq_1_W
2237 Sequence is too short. (< 5 resolved nts) 2315 Sequence is too short. (< 5 resolved nts)
2238 2316
2239 -1x18_1_C 2317 +7n2v_1_DT
2240 -DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_C. 2318 +Could not find nucleotides of chain DT in annotation 7n2v.json. Either there is a problem with 7n2v mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2241 -
2242 -1x18_1_B
2243 -DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_B.
2244 2319
2245 -1x18_1_D 2320 +4peh_1_Z
2246 -DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_D. 2321 +Sequence is too short. (< 5 resolved nts)
2247 2322
2248 1vq6_1_4 2323 1vq6_1_4
2249 Sequence is too short. (< 5 resolved nts) 2324 Sequence is too short. (< 5 resolved nts)
...@@ -2278,6 +2353,9 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2278,6 +2353,9 @@ Sequence is too short. (< 5 resolved nts)
2278 4xbf_1_D 2353 4xbf_1_D
2279 Sequence is too short. (< 5 resolved nts) 2354 Sequence is too short. (< 5 resolved nts)
2280 2355
2356 +5w1h_1_B
2357 +Nucleotides not inserted !
2358 +
2281 6n6d_1_D 2359 6n6d_1_D
2282 Sequence is too short. (< 5 resolved nts) 2360 Sequence is too short. (< 5 resolved nts)
2283 2361
...@@ -2296,52 +2374,148 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2296,52 +2374,148 @@ Sequence is too short. (< 5 resolved nts)
2296 6tz1_1_N 2374 6tz1_1_N
2297 Sequence is too short. (< 5 resolved nts) 2375 Sequence is too short. (< 5 resolved nts)
2298 2376
2299 -6sce_1_B 2377 +6q1h_1_D
2300 Sequence is too short. (< 5 resolved nts) 2378 Sequence is too short. (< 5 resolved nts)
2301 2379
2302 -6xl1_1_C 2380 +6q1h_1_H
2303 Sequence is too short. (< 5 resolved nts) 2381 Sequence is too short. (< 5 resolved nts)
2304 2382
2305 -6scf_1_I 2383 +6p7p_1_F
2306 Sequence is too short. (< 5 resolved nts) 2384 Sequence is too short. (< 5 resolved nts)
2307 2385
2308 -6scf_1_K 2386 +6p7p_1_E
2309 Sequence is too short. (< 5 resolved nts) 2387 Sequence is too short. (< 5 resolved nts)
2310 2388
2311 -6yud_1_K 2389 +6p7p_1_D
2312 Sequence is too short. (< 5 resolved nts) 2390 Sequence is too short. (< 5 resolved nts)
2313 2391
2314 -6yud_1_O 2392 +6vm6_1_J
2315 Sequence is too short. (< 5 resolved nts) 2393 Sequence is too short. (< 5 resolved nts)
2316 2394
2317 -6scf_1_M 2395 +6vm6_1_G
2318 Sequence is too short. (< 5 resolved nts) 2396 Sequence is too short. (< 5 resolved nts)
2319 2397
2320 -6yud_1_P 2398 +6wan_1_K
2321 Sequence is too short. (< 5 resolved nts) 2399 Sequence is too short. (< 5 resolved nts)
2322 2400
2323 -6scf_1_L 2401 +6wan_1_H
2324 Sequence is too short. (< 5 resolved nts) 2402 Sequence is too short. (< 5 resolved nts)
2325 2403
2326 -6yud_1_M 2404 +6wan_1_G
2327 Sequence is too short. (< 5 resolved nts) 2405 Sequence is too short. (< 5 resolved nts)
2328 2406
2329 -6yud_1_Q 2407 +6wan_1_L
2330 Sequence is too short. (< 5 resolved nts) 2408 Sequence is too short. (< 5 resolved nts)
2331 2409
2332 -6w11_1_C 2410 +6wan_1_I
2333 Sequence is too short. (< 5 resolved nts) 2411 Sequence is too short. (< 5 resolved nts)
2334 2412
2335 -6o6x_1_D 2413 +6ywo_1_F
2336 Sequence is too short. (< 5 resolved nts) 2414 Sequence is too short. (< 5 resolved nts)
2337 2415
2338 -4ba2_1_R 2416 +6wan_1_J
2339 Sequence is too short. (< 5 resolved nts) 2417 Sequence is too short. (< 5 resolved nts)
2340 2418
2341 -7bdv_1_F 2419 +4oau_1_A
2342 Sequence is too short. (< 5 resolved nts) 2420 Sequence is too short. (< 5 resolved nts)
2343 2421
2344 -7bdv_1_H 2422 +6ywo_1_E
2423 +Sequence is too short. (< 5 resolved nts)
2424 +
2425 +6ywo_1_K
2426 +Sequence is too short. (< 5 resolved nts)
2427 +
2428 +6vm6_1_I
2429 +Sequence is too short. (< 5 resolved nts)
2430 +
2431 +6vm6_1_H
2432 +Sequence is too short. (< 5 resolved nts)
2433 +
2434 +6ywo_1_I
2435 +Sequence is too short. (< 5 resolved nts)
2436 +
2437 +2a1r_1_C
2438 +Sequence is too short. (< 5 resolved nts)
2439 +
2440 +6m6v_1_F
2441 +Sequence is too short. (< 5 resolved nts)
2442 +
2443 +6m6v_1_E
2444 +Sequence is too short. (< 5 resolved nts)
2445 +
2446 +2a1r_1_D
2447 +Sequence is too short. (< 5 resolved nts)
2448 +
2449 +3gpq_1_E
2450 +Sequence is too short. (< 5 resolved nts)
2451 +
2452 +3gpq_1_F
2453 +Sequence is too short. (< 5 resolved nts)
2454 +
2455 +6o79_1_C
2456 +Sequence is too short. (< 5 resolved nts)
2457 +
2458 +6vm6_1_K
2459 +Sequence is too short. (< 5 resolved nts)
2460 +
2461 +6m6v_1_G
2462 +Sequence is too short. (< 5 resolved nts)
2463 +
2464 +6hyu_1_D
2465 +Sequence is too short. (< 5 resolved nts)
2466 +
2467 +1laj_1_R
2468 +Sequence is too short. (< 5 resolved nts)
2469 +
2470 +6ybv_1_K
2471 +Could not find nucleotides of chain K in annotation 6ybv.json. Either there is a problem with 6ybv mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2472 +
2473 +6sce_1_B
2474 +Sequence is too short. (< 5 resolved nts)
2475 +
2476 +6xl1_1_C
2477 +Sequence is too short. (< 5 resolved nts)
2478 +
2479 +6scf_1_I
2480 +Sequence is too short. (< 5 resolved nts)
2481 +
2482 +6scf_1_K
2483 +Sequence is too short. (< 5 resolved nts)
2484 +
2485 +6yud_1_K
2486 +Sequence is too short. (< 5 resolved nts)
2487 +
2488 +6yud_1_O
2489 +Sequence is too short. (< 5 resolved nts)
2490 +
2491 +6scf_1_M
2492 +Sequence is too short. (< 5 resolved nts)
2493 +
2494 +6yud_1_P
2495 +Sequence is too short. (< 5 resolved nts)
2496 +
2497 +6scf_1_L
2498 +Sequence is too short. (< 5 resolved nts)
2499 +
2500 +6yud_1_M
2501 +Sequence is too short. (< 5 resolved nts)
2502 +
2503 +6yud_1_Q
2504 +Sequence is too short. (< 5 resolved nts)
2505 +
2506 +6w11_1_C
2507 +Sequence is too short. (< 5 resolved nts)
2508 +
2509 +6o6x_1_D
2510 +Sequence is too short. (< 5 resolved nts)
2511 +
2512 +4ba2_1_R
2513 +Sequence is too short. (< 5 resolved nts)
2514 +
2515 +7bdv_1_F
2516 +Sequence is too short. (< 5 resolved nts)
2517 +
2518 +7bdv_1_H
2345 Sequence is too short. (< 5 resolved nts) 2519 Sequence is too short. (< 5 resolved nts)
2346 2520
2347 6o6x_1_C 2521 6o6x_1_C
...@@ -2423,7 +2597,7 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2423,7 +2597,7 @@ Sequence is too short. (< 5 resolved nts)
2423 Sequence is too short. (< 5 resolved nts) 2597 Sequence is too short. (< 5 resolved nts)
2424 2598
2425 1y1y_1_P 2599 1y1y_1_P
2426 -DSSR warning 1y1y.json: no nucleotides found. Ignoring 1y1y_1_P. 2600 +Sequence is too short. (< 5 resolved nts)
2427 2601
2428 5zuu_1_I 2602 5zuu_1_I
2429 Sequence is too short. (< 5 resolved nts) 2603 Sequence is too short. (< 5 resolved nts)
...@@ -2431,6 +2605,9 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2431,6 +2605,9 @@ Sequence is too short. (< 5 resolved nts)
2431 5zuu_1_G 2605 5zuu_1_G
2432 Sequence is too short. (< 5 resolved nts) 2606 Sequence is too short. (< 5 resolved nts)
2433 2607
2608 +7am2_1_R1
2609 +Sequence is too short. (< 5 resolved nts)
2610 +
2434 4peh_1_W 2611 4peh_1_W
2435 Sequence is too short. (< 5 resolved nts) 2612 Sequence is too short. (< 5 resolved nts)
2436 2613
...@@ -2443,7 +2620,7 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2443,7 +2620,7 @@ Sequence is too short. (< 5 resolved nts)
2443 4peh_1_Y 2620 4peh_1_Y
2444 Sequence is too short. (< 5 resolved nts) 2621 Sequence is too short. (< 5 resolved nts)
2445 2622
2446 -4peh_1_Z 2623 +7d8c_1_C
2447 Sequence is too short. (< 5 resolved nts) 2624 Sequence is too short. (< 5 resolved nts)
2448 2625
2449 6mkn_1_W 2626 6mkn_1_W
...@@ -2482,30 +2659,9 @@ Could not find nucleotides of chain Q in annotation 4eya.json. Either there is a ...@@ -2482,30 +2659,9 @@ Could not find nucleotides of chain Q in annotation 4eya.json. Either there is a
2482 4eya_1_R 2659 4eya_1_R
2483 Could not find nucleotides of chain R in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 2660 Could not find nucleotides of chain R in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2484 2661
2485 -1qzc_1_B
2486 -DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_B.
2487 -
2488 -1t1o_1_B
2489 -DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_B.
2490 -
2491 1mvr_1_C 2662 1mvr_1_C
2492 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_C. 2663 DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_C.
2493 2664
2494 -1t1m_1_B
2495 -DSSR warning 1t1m.json: no nucleotides found. Ignoring 1t1m_1_B.
2496 -
2497 -1t1o_1_C
2498 -DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_C.
2499 -
2500 -1t1m_1_A
2501 -DSSR warning 1t1m.json: no nucleotides found. Ignoring 1t1m_1_A.
2502 -
2503 -1t1o_1_A
2504 -DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_A.
2505 -
2506 -2r1g_1_B
2507 -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_B.
2508 -
2509 4ht9_1_E 2665 4ht9_1_E
2510 Sequence is too short. (< 5 resolved nts) 2666 Sequence is too short. (< 5 resolved nts)
2511 2667
...@@ -2536,21 +2692,15 @@ Could not find nucleotides of chain U in annotation 5uk4.json. Either there is a ...@@ -2536,21 +2692,15 @@ Could not find nucleotides of chain U in annotation 5uk4.json. Either there is a
2536 5f6c_1_E 2692 5f6c_1_E
2537 Sequence is too short. (< 5 resolved nts) 2693 Sequence is too short. (< 5 resolved nts)
2538 2694
2695 +7nwh_1_HH
2696 +Could not find nucleotides of chain HH in annotation 7nwh.json. Either there is a problem with 7nwh mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2697 +
2539 4rcj_1_B 2698 4rcj_1_B
2540 Sequence is too short. (< 5 resolved nts) 2699 Sequence is too short. (< 5 resolved nts)
2541 2700
2542 1xnr_1_W 2701 1xnr_1_W
2543 Sequence is too short. (< 5 resolved nts) 2702 Sequence is too short. (< 5 resolved nts)
2544 2703
2545 -2agn_1_A
2546 -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_A.
2547 -
2548 -2agn_1_C
2549 -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_C.
2550 -
2551 -2agn_1_B
2552 -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_B.
2553 -
2554 6e0o_1_C 2704 6e0o_1_C
2555 Sequence is too short. (< 5 resolved nts) 2705 Sequence is too short. (< 5 resolved nts)
2556 2706
...@@ -2602,11 +2752,8 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2602,11 +2752,8 @@ Sequence is too short. (< 5 resolved nts)
2602 4d61_1_J 2752 4d61_1_J
2603 Could not find nucleotides of chain J in annotation 4d61.json. Either there is a problem with 4d61 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 2753 Could not find nucleotides of chain J in annotation 4d61.json. Either there is a problem with 4d61 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2604 2754
2605 -1trj_1_B 2755 +7nwg_1_Q3
2606 -DSSR warning 1trj.json: no nucleotides found. Ignoring 1trj_1_B. 2756 +Could not find nucleotides of chain Q3 in annotation 7nwg.json. Either there is a problem with 7nwg mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2607 -
2608 -1trj_1_C
2609 -DSSR warning 1trj.json: no nucleotides found. Ignoring 1trj_1_C.
2610 2757
2611 5tbw_1_SR 2758 5tbw_1_SR
2612 Could not find nucleotides of chain SR in annotation 5tbw.json. Either there is a problem with 5tbw mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 2759 Could not find nucleotides of chain SR in annotation 5tbw.json. Either there is a problem with 5tbw mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
...@@ -2653,6 +2800,12 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2653,6 +2800,12 @@ Sequence is too short. (< 5 resolved nts)
2653 3jbu_1_V 2800 3jbu_1_V
2654 Could not find nucleotides of chain V in annotation 3jbu.json. Either there is a problem with 3jbu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 2801 Could not find nucleotides of chain V in annotation 3jbu.json. Either there is a problem with 3jbu mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2655 2802
2803 +4ts2_1_Y
2804 +Nucleotides not inserted !
2805 +
2806 +4ts0_1_Y
2807 +Nucleotides not inserted !
2808 +
2656 1h2c_1_R 2809 1h2c_1_R
2657 Sequence is too short. (< 5 resolved nts) 2810 Sequence is too short. (< 5 resolved nts)
2658 2811
...@@ -2731,6 +2884,9 @@ Could not find nucleotides of chain Z in annotation 5flx.json. Either there is a ...@@ -2731,6 +2884,9 @@ Could not find nucleotides of chain Z in annotation 5flx.json. Either there is a
2731 6eri_1_AX 2884 6eri_1_AX
2732 Could not find nucleotides of chain AX in annotation 6eri.json. Either there is a problem with 6eri mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 2885 Could not find nucleotides of chain AX in annotation 6eri.json. Either there is a problem with 6eri mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2733 2886
2887 +7k5l_1_R
2888 +Sequence is too short. (< 5 resolved nts)
2889 +
2734 7d80_1_Y 2890 7d80_1_Y
2735 Could not find nucleotides of chain Y in annotation 7d80.json. Either there is a problem with 7d80 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 2891 Could not find nucleotides of chain Y in annotation 7d80.json. Either there is a problem with 7d80 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2736 2892
...@@ -2752,6 +2908,9 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_I. ...@@ -2752,6 +2908,9 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_I.
2752 1zc8_1_H 2908 1zc8_1_H
2753 DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_H. 2909 DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_H.
2754 2910
2911 +6bfb_1_Y
2912 +Nucleotides not inserted !
2913 +
2755 1zc8_1_J 2914 1zc8_1_J
2756 DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_J. 2915 DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_J.
2757 2916
...@@ -2857,6 +3016,12 @@ Could not find nucleotides of chain BB in annotation 6z1p.json. Either there is ...@@ -2857,6 +3016,12 @@ Could not find nucleotides of chain BB in annotation 6z1p.json. Either there is
2857 6z1p_1_BA 3016 6z1p_1_BA
2858 Could not find nucleotides of chain BA in annotation 6z1p.json. Either there is a problem with 6z1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 3017 Could not find nucleotides of chain BA in annotation 6z1p.json. Either there is a problem with 6z1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
2859 3018
3019 +3p22_1_C
3020 +Nucleotides not inserted !
3021 +
3022 +3p22_1_G
3023 +Nucleotides not inserted !
3024 +
2860 2uxd_1_X 3025 2uxd_1_X
2861 Sequence is too short. (< 5 resolved nts) 3026 Sequence is too short. (< 5 resolved nts)
2862 3027
...@@ -2923,12 +3088,6 @@ Sequence is too short. (< 5 resolved nts) ...@@ -2923,12 +3088,6 @@ Sequence is too short. (< 5 resolved nts)
2923 3ol8_1_P 3088 3ol8_1_P
2924 Sequence is too short. (< 5 resolved nts) 3089 Sequence is too short. (< 5 resolved nts)
2925 3090
2926 -1qzc_1_C
2927 -DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_C.
2928 -
2929 -1qzc_1_A
2930 -DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_A.
2931 -
2932 6yrq_1_E 3091 6yrq_1_E
2933 Sequence is too short. (< 5 resolved nts) 3092 Sequence is too short. (< 5 resolved nts)
2934 3093
...@@ -3166,6 +3325,9 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3166,6 +3325,9 @@ Sequence is too short. (< 5 resolved nts)
3166 4wtk_1_P 3325 4wtk_1_P
3167 Sequence is too short. (< 5 resolved nts) 3326 Sequence is too short. (< 5 resolved nts)
3168 3327
3328 +6wlj_3_A
3329 +Nucleotides not inserted !
3330 +
3169 1vqn_1_4 3331 1vqn_1_4
3170 Sequence is too short. (< 5 resolved nts) 3332 Sequence is too short. (< 5 resolved nts)
3171 3333
...@@ -3214,23 +3376,8 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B. ...@@ -3214,23 +3376,8 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B.
3214 4i67_1_B 3376 4i67_1_B
3215 Sequence is too short. (< 5 resolved nts) 3377 Sequence is too short. (< 5 resolved nts)
3216 3378
3217 -3pgw_1_R 3379 +4jf2_1_A
3218 -DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R. 3380 +Nucleotides not inserted !
3219 -
3220 -3pgw_1_N
3221 -DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N.
3222 -
3223 -3cw1_1_X
3224 -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_X.
3225 -
3226 -3cw1_1_W
3227 -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_W.
3228 -
3229 -3cw1_1_V
3230 -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V.
3231 -
3232 -7b0y_1_A
3233 -Could not find nucleotides of chain A in annotation 7b0y.json. Either there is a problem with 7b0y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3234 3381
3235 6k32_1_T 3382 6k32_1_T
3236 Could not find nucleotides of chain T in annotation 6k32.json. Either there is a problem with 6k32 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 3383 Could not find nucleotides of chain T in annotation 6k32.json. Either there is a problem with 6k32 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
...@@ -3244,11 +3391,11 @@ Could not find nucleotides of chain A in annotation 5mmj.json. Either there is a ...@@ -3244,11 +3391,11 @@ Could not find nucleotides of chain A in annotation 5mmj.json. Either there is a
3244 5x8r_1_A 3391 5x8r_1_A
3245 Could not find nucleotides of chain A in annotation 5x8r.json. Either there is a problem with 5x8r mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 3392 Could not find nucleotides of chain A in annotation 5x8r.json. Either there is a problem with 5x8r mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3246 3393
3247 -2agn_1_E 3394 +3fu2_1_B
3248 -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_E. 3395 +Nucleotides not inserted !
3249 3396
3250 -2agn_1_D 3397 +3fu2_1_A
3251 -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_D. 3398 +Nucleotides not inserted !
3252 3399
3253 4v5z_1_BD 3400 4v5z_1_BD
3254 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BD. 3401 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BD.
...@@ -3355,6 +3502,39 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3355,6 +3502,39 @@ Sequence is too short. (< 5 resolved nts)
3355 5dto_1_B 3502 5dto_1_B
3356 Sequence is too short. (< 5 resolved nts) 3503 Sequence is too short. (< 5 resolved nts)
3357 3504
3505 +6yml_1_A
3506 +Nucleotides not inserted !
3507 +
3508 +6ymm_1_A
3509 +Nucleotides not inserted !
3510 +
3511 +6ymi_1_M
3512 +Nucleotides not inserted !
3513 +
3514 +6ymi_1_F
3515 +Nucleotides not inserted !
3516 +
3517 +6ymi_1_A
3518 +Nucleotides not inserted !
3519 +
3520 +6ylb_1_F
3521 +Nucleotides not inserted !
3522 +
3523 +6ymi_1_C
3524 +Nucleotides not inserted !
3525 +
3526 +6ymj_1_C
3527 +Nucleotides not inserted !
3528 +
3529 +6ylb_1_C
3530 +Nucleotides not inserted !
3531 +
3532 +6ymj_1_I
3533 +Nucleotides not inserted !
3534 +
3535 +6ymj_1_O
3536 +Nucleotides not inserted !
3537 +
3358 4cxh_1_X 3538 4cxh_1_X
3359 Could not find nucleotides of chain X in annotation 4cxh.json. Either there is a problem with 4cxh mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 3539 Could not find nucleotides of chain X in annotation 4cxh.json. Either there is a problem with 4cxh mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3360 3540
...@@ -3463,6 +3643,24 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3463,6 +3643,24 @@ Sequence is too short. (< 5 resolved nts)
3463 4v4f_1_B2 3643 4v4f_1_B2
3464 Sequence is too short. (< 5 resolved nts) 3644 Sequence is too short. (< 5 resolved nts)
3465 3645
3646 +7m4y_1_V
3647 +Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3648 +
3649 +7m4x_1_V
3650 +Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3651 +
3652 +6v3a_1_V
3653 +Could not find nucleotides of chain V in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3654 +
3655 +6v39_1_V
3656 +Could not find nucleotides of chain V in annotation 6v39.json. Either there is a problem with 6v39 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3657 +
3658 +6ck5_1_A
3659 +Nucleotides not inserted !
3660 +
3661 +6ck5_1_B
3662 +Nucleotides not inserted !
3663 +
3466 5it9_1_I 3664 5it9_1_I
3467 Could not find nucleotides of chain I in annotation 5it9.json. Either there is a problem with 5it9 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 3665 Could not find nucleotides of chain I in annotation 5it9.json. Either there is a problem with 5it9 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3468 3666
...@@ -3490,6 +3688,12 @@ DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_N. ...@@ -3490,6 +3688,12 @@ DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_N.
3490 6gfw_1_R 3688 6gfw_1_R
3491 Sequence is too short. (< 5 resolved nts) 3689 Sequence is too short. (< 5 resolved nts)
3492 3690
3691 +3j6x_1_IR
3692 +Could not find nucleotides of chain IR in annotation 3j6x.json. Either there is a problem with 3j6x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3693 +
3694 +3j6y_1_IR
3695 +Could not find nucleotides of chain IR in annotation 3j6y.json. Either there is a problem with 3j6y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3696 +
3493 2vaz_1_A 3697 2vaz_1_A
3494 DSSR warning 2vaz.json: no nucleotides found. Ignoring 2vaz_1_A. 3698 DSSR warning 2vaz.json: no nucleotides found. Ignoring 2vaz_1_A.
3495 3699
...@@ -3535,6 +3739,9 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3535,6 +3739,9 @@ Sequence is too short. (< 5 resolved nts)
3535 5uh9_1_I 3739 5uh9_1_I
3536 Sequence is too short. (< 5 resolved nts) 3740 Sequence is too short. (< 5 resolved nts)
3537 3741
3742 +4v5z_1_BS
3743 +DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BS.
3744 +
3538 2ftc_1_R 3745 2ftc_1_R
3539 DSSR warning 2ftc.json: no nucleotides found. Ignoring 2ftc_1_R. 3746 DSSR warning 2ftc.json: no nucleotides found. Ignoring 2ftc_1_R.
3540 3747
...@@ -3547,9 +3754,6 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3547,9 +3754,6 @@ Sequence is too short. (< 5 resolved nts)
3547 4udv_1_R 3754 4udv_1_R
3548 Sequence is too short. (< 5 resolved nts) 3755 Sequence is too short. (< 5 resolved nts)
3549 3756
3550 -2r1g_1_E
3551 -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_E.
3552 -
3553 5zsc_1_D 3757 5zsc_1_D
3554 Sequence is too short. (< 5 resolved nts) 3758 Sequence is too short. (< 5 resolved nts)
3555 3759
...@@ -3631,8 +3835,8 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3631,8 +3835,8 @@ Sequence is too short. (< 5 resolved nts)
3631 3m85_1_Y 3835 3m85_1_Y
3632 Sequence is too short. (< 5 resolved nts) 3836 Sequence is too short. (< 5 resolved nts)
3633 3837
3634 -1e8s_1_C 3838 +5u34_1_B
3635 -DSSR warning 1e8s.json: no nucleotides found. Ignoring 1e8s_1_C. 3839 +Nucleotides not inserted !
3636 3840
3637 5wnp_1_B 3841 5wnp_1_B
3638 Could not find nucleotides of chain B in annotation 5wnp.json. Either there is a problem with 5wnp mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 3842 Could not find nucleotides of chain B in annotation 5wnp.json. Either there is a problem with 5wnp mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
...@@ -3700,12 +3904,21 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3700,12 +3904,21 @@ Sequence is too short. (< 5 resolved nts)
3700 3u2e_1_C 3904 3u2e_1_C
3701 Sequence is too short. (< 5 resolved nts) 3905 Sequence is too short. (< 5 resolved nts)
3702 3906
3907 +7eh1_1_I
3908 +Sequence is too short. (< 5 resolved nts)
3909 +
3703 5uef_1_C 3910 5uef_1_C
3704 Sequence is too short. (< 5 resolved nts) 3911 Sequence is too short. (< 5 resolved nts)
3705 3912
3706 5uef_1_D 3913 5uef_1_D
3707 Sequence is too short. (< 5 resolved nts) 3914 Sequence is too short. (< 5 resolved nts)
3708 3915
3916 +7eh2_1_R
3917 +Sequence is too short. (< 5 resolved nts)
3918 +
3919 +7eh2_1_I
3920 +Sequence is too short. (< 5 resolved nts)
3921 +
3709 4x4u_1_H 3922 4x4u_1_H
3710 Sequence is too short. (< 5 resolved nts) 3923 Sequence is too short. (< 5 resolved nts)
3711 3924
...@@ -3736,6 +3949,15 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3736,6 +3949,15 @@ Sequence is too short. (< 5 resolved nts)
3736 7a5g_1_J 3949 7a5g_1_J
3737 Could not find nucleotides of chain J in annotation 7a5g.json. Either there is a problem with 7a5g mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 3950 Could not find nucleotides of chain J in annotation 7a5g.json. Either there is a problem with 7a5g mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3738 3951
3952 +1m5k_1_B
3953 +Nucleotides not inserted !
3954 +
3955 +1m5o_1_E
3956 +Nucleotides not inserted !
3957 +
3958 +1m5v_1_B
3959 +Nucleotides not inserted !
3960 +
3739 6gx6_1_B 3961 6gx6_1_B
3740 Sequence is too short. (< 5 resolved nts) 3962 Sequence is too short. (< 5 resolved nts)
3741 3963
...@@ -3754,9 +3976,6 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3754,9 +3976,6 @@ Sequence is too short. (< 5 resolved nts)
3754 1zn1_1_C 3976 1zn1_1_C
3755 DSSR warning 1zn1.json: no nucleotides found. Ignoring 1zn1_1_C. 3977 DSSR warning 1zn1.json: no nucleotides found. Ignoring 1zn1_1_C.
3756 3978
3757 -1zn0_1_C
3758 -DSSR warning 1zn0.json: no nucleotides found. Ignoring 1zn0_1_C.
3759 -
3760 1xpu_1_G 3979 1xpu_1_G
3761 Sequence is too short. (< 5 resolved nts) 3980 Sequence is too short. (< 5 resolved nts)
3762 3981
...@@ -3826,9 +4045,15 @@ Sequence is too short. (< 5 resolved nts) ...@@ -3826,9 +4045,15 @@ Sequence is too short. (< 5 resolved nts)
3826 6gc5_1_G 4045 6gc5_1_G
3827 Sequence is too short. (< 5 resolved nts) 4046 Sequence is too short. (< 5 resolved nts)
3828 4047
4048 +4rne_1_C
4049 +Nucleotides not inserted !
4050 +
3829 1n1h_1_B 4051 1n1h_1_B
3830 Sequence is too short. (< 5 resolved nts) 4052 Sequence is too short. (< 5 resolved nts)
3831 4053
4054 +7n2v_1_PT
4055 +Could not find nucleotides of chain PT in annotation 7n2v.json. Either there is a problem with 7n2v mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4056 +
3832 4ohz_1_B 4057 4ohz_1_B
3833 Sequence is too short. (< 5 resolved nts) 4058 Sequence is too short. (< 5 resolved nts)
3834 4059
...@@ -3874,6 +4099,15 @@ Could not find nucleotides of chain X in annotation 5y88.json. Either there is a ...@@ -3874,6 +4099,15 @@ Could not find nucleotides of chain X in annotation 5y88.json. Either there is a
3874 4v5z_1_BB 4099 4v5z_1_BB
3875 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BB. 4100 DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BB.
3876 4101
4102 +5y85_1_D
4103 +Nucleotides not inserted !
4104 +
4105 +5y85_1_B
4106 +Nucleotides not inserted !
4107 +
4108 +5y87_1_D
4109 +Nucleotides not inserted !
4110 +
3877 3j0o_1_H 4111 3j0o_1_H
3878 Could not find nucleotides of chain H in annotation 3j0o.json. Either there is a problem with 3j0o mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4112 Could not find nucleotides of chain H in annotation 3j0o.json. Either there is a problem with 3j0o mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
3879 4113
...@@ -4057,9 +4291,6 @@ Sequence is too short. (< 5 resolved nts) ...@@ -4057,9 +4291,6 @@ Sequence is too short. (< 5 resolved nts)
4057 6a6l_1_D 4291 6a6l_1_D
4058 Sequence is too short. (< 5 resolved nts) 4292 Sequence is too short. (< 5 resolved nts)
4059 4293
4060 -4v5z_1_BS
4061 -DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BS.
4062 -
4063 4v8t_1_1 4294 4v8t_1_1
4064 Could not find nucleotides of chain 1 in annotation 4v8t.json. Either there is a problem with 4v8t mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4295 Could not find nucleotides of chain 1 in annotation 4v8t.json. Either there is a problem with 4v8t mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4065 4296
...@@ -4072,6 +4303,9 @@ Sequence is too short. (< 5 resolved nts) ...@@ -4072,6 +4303,9 @@ Sequence is too short. (< 5 resolved nts)
4072 1uvi_1_E 4303 1uvi_1_E
4073 Sequence is too short. (< 5 resolved nts) 4304 Sequence is too short. (< 5 resolved nts)
4074 4305
4306 +3gs5_1_A
4307 +Nucleotides not inserted !
4308 +
4075 4m7d_1_P 4309 4m7d_1_P
4076 Sequence is too short. (< 5 resolved nts) 4310 Sequence is too short. (< 5 resolved nts)
4077 4311
...@@ -4132,12 +4366,12 @@ Could not find nucleotides of chain 2M in annotation 6ip6.json. Either there is ...@@ -4132,12 +4366,12 @@ Could not find nucleotides of chain 2M in annotation 6ip6.json. Either there is
4132 6qcs_1_M 4366 6qcs_1_M
4133 Sequence is too short. (< 5 resolved nts) 4367 Sequence is too short. (< 5 resolved nts)
4134 4368
4369 +7b5k_1_Z
4370 +Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4371 +
4135 486d_1_G 4372 486d_1_G
4136 Could not find nucleotides of chain G in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4373 Could not find nucleotides of chain G in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4137 4374
4138 -2r1g_1_C
4139 -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_C.
4140 -
4141 486d_1_F 4375 486d_1_F
4142 Could not find nucleotides of chain F in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4376 Could not find nucleotides of chain F in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4143 4377
...@@ -4177,6 +4411,9 @@ Could not find nucleotides of chain L in annotation 4oq9.json. Either there is a ...@@ -4177,6 +4411,9 @@ Could not find nucleotides of chain L in annotation 4oq9.json. Either there is a
4177 6r9q_1_B 4411 6r9q_1_B
4178 Sequence is too short. (< 5 resolved nts) 4412 Sequence is too short. (< 5 resolved nts)
4179 4413
4414 +7m4u_1_A
4415 +Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4416 +
4180 6v3a_1_SN1 4417 6v3a_1_SN1
4181 Could not find nucleotides of chain SN1 in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4418 Could not find nucleotides of chain SN1 in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4182 4419
...@@ -4189,9 +4426,6 @@ Could not find nucleotides of chain SN1 in annotation 6v39.json. Either there is ...@@ -4189,9 +4426,6 @@ Could not find nucleotides of chain SN1 in annotation 6v39.json. Either there is
4189 6v3e_1_SN1 4426 6v3e_1_SN1
4190 Could not find nucleotides of chain SN1 in annotation 6v3e.json. Either there is a problem with 6v3e mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4427 Could not find nucleotides of chain SN1 in annotation 6v3e.json. Either there is a problem with 6v3e mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4191 4428
4192 -1pn7_1_C
4193 -DSSR warning 1pn7.json: no nucleotides found. Ignoring 1pn7_1_C.
4194 -
4195 1mj1_1_Q 4429 1mj1_1_Q
4196 Could not find nucleotides of chain Q in annotation 1mj1.json. Either there is a problem with 1mj1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4430 Could not find nucleotides of chain Q in annotation 1mj1.json. Either there is a problem with 1mj1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4197 4431
...@@ -4315,12 +4549,6 @@ Sequence is too short. (< 5 resolved nts) ...@@ -4315,12 +4549,6 @@ Sequence is too short. (< 5 resolved nts)
4315 6oy6_1_I 4549 6oy6_1_I
4316 Sequence is too short. (< 5 resolved nts) 4550 Sequence is too short. (< 5 resolved nts)
4317 4551
4318 -4bbl_1_Y
4319 -DSSR warning 4bbl.json: no nucleotides found. Ignoring 4bbl_1_Y.
4320 -
4321 -4bbl_1_Z
4322 -DSSR warning 4bbl.json: no nucleotides found. Ignoring 4bbl_1_Z.
4323 -
4324 4qvd_1_H 4552 4qvd_1_H
4325 Sequence is too short. (< 5 resolved nts) 4553 Sequence is too short. (< 5 resolved nts)
4326 4554
...@@ -4330,15 +4558,54 @@ Sequence is too short. (< 5 resolved nts) ...@@ -4330,15 +4558,54 @@ Sequence is too short. (< 5 resolved nts)
4330 3iy8_1_A 4558 3iy8_1_A
4331 DSSR warning 3iy8.json: no nucleotides found. Ignoring 3iy8_1_A. 4559 DSSR warning 3iy8.json: no nucleotides found. Ignoring 3iy8_1_A.
4332 4560
4333 -6tnu_1_M 4561 +7n06_1_G
4334 -Could not find nucleotides of chain M in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4562 +Sequence is too short. (< 5 resolved nts)
4335 4563
4336 -5mc6_1_M 4564 +7n06_1_H
4337 -Could not find nucleotides of chain M in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4565 +Sequence is too short. (< 5 resolved nts)
4566 +
4567 +7n06_1_I
4568 +Sequence is too short. (< 5 resolved nts)
4569 +
4570 +7n06_1_J
4571 +Sequence is too short. (< 5 resolved nts)
4572 +
4573 +7n06_1_K
4574 +Sequence is too short. (< 5 resolved nts)
4575 +
4576 +7n06_1_L
4577 +Sequence is too short. (< 5 resolved nts)
4578 +
4579 +7n33_1_G
4580 +Sequence is too short. (< 5 resolved nts)
4581 +
4582 +7n33_1_H
4583 +Sequence is too short. (< 5 resolved nts)
4584 +
4585 +7n33_1_I
4586 +Sequence is too short. (< 5 resolved nts)
4587 +
4588 +7n33_1_J
4589 +Sequence is too short. (< 5 resolved nts)
4590 +
4591 +7n33_1_K
4592 +Sequence is too short. (< 5 resolved nts)
4593 +
4594 +7n33_1_L
4595 +Sequence is too short. (< 5 resolved nts)
4338 4596
4339 5mc6_1_N 4597 5mc6_1_N
4340 Could not find nucleotides of chain N in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4598 Could not find nucleotides of chain N in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4341 4599
4600 +2qwy_1_C
4601 +Nucleotides not inserted !
4602 +
4603 +2qwy_1_A
4604 +Nucleotides not inserted !
4605 +
4606 +2qwy_1_B
4607 +Nucleotides not inserted !
4608 +
4342 4eya_1_O 4609 4eya_1_O
4343 Could not find nucleotides of chain O in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4610 Could not find nucleotides of chain O in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4344 4611
...@@ -4363,12 +4630,6 @@ Could not find nucleotides of chain U in annotation 6htq.json. Either there is a ...@@ -4363,12 +4630,6 @@ Could not find nucleotides of chain U in annotation 6htq.json. Either there is a
4363 6uu6_1_333 4630 6uu6_1_333
4364 Sequence is too short. (< 5 resolved nts) 4631 Sequence is too short. (< 5 resolved nts)
4365 4632
4366 -6v3a_1_V
4367 -Could not find nucleotides of chain V in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4368 -
4369 -6v39_1_V
4370 -Could not find nucleotides of chain V in annotation 6v39.json. Either there is a problem with 6v39 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4371 -
4372 5a0v_1_F 4633 5a0v_1_F
4373 Sequence is too short. (< 5 resolved nts) 4634 Sequence is too short. (< 5 resolved nts)
4374 4635
...@@ -4495,6 +4756,9 @@ Could not find nucleotides of chain BV in annotation 6xa1.json. Either there is ...@@ -4495,6 +4756,9 @@ Could not find nucleotides of chain BV in annotation 6xa1.json. Either there is
4495 6ha8_1_X 4756 6ha8_1_X
4496 Could not find nucleotides of chain X in annotation 6ha8.json. Either there is a problem with 6ha8 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4757 Could not find nucleotides of chain X in annotation 6ha8.json. Either there is a problem with 6ha8 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4497 4758
4759 +3bnp_1_B
4760 +Nucleotides not inserted !
4761 +
4498 1m8w_1_E 4762 1m8w_1_E
4499 Could not find nucleotides of chain E in annotation 1m8w.json. Either there is a problem with 1m8w mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4763 Could not find nucleotides of chain E in annotation 1m8w.json. Either there is a problem with 1m8w mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4500 4764
...@@ -4564,6 +4828,21 @@ Sequence is too short. (< 5 resolved nts) ...@@ -4564,6 +4828,21 @@ Sequence is too short. (< 5 resolved nts)
4564 4wti_1_P 4828 4wti_1_P
4565 Sequence is too short. (< 5 resolved nts) 4829 Sequence is too short. (< 5 resolved nts)
4566 4830
4831 +6dlr_1_A
4832 +Nucleotides not inserted !
4833 +
4834 +6dlt_1_A
4835 +Nucleotides not inserted !
4836 +
4837 +6dls_1_A
4838 +Nucleotides not inserted !
4839 +
4840 +6dlq_1_A
4841 +Nucleotides not inserted !
4842 +
4843 +6dnr_1_A
4844 +Nucleotides not inserted !
4845 +
4567 5l3p_1_Y 4846 5l3p_1_Y
4568 Could not find nucleotides of chain Y in annotation 5l3p.json. Either there is a problem with 5l3p mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4847 Could not find nucleotides of chain Y in annotation 5l3p.json. Either there is a problem with 5l3p mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4569 4848
...@@ -4573,12 +4852,36 @@ Sequence is too short. (< 5 resolved nts) ...@@ -4573,12 +4852,36 @@ Sequence is too short. (< 5 resolved nts)
4573 3rzo_1_R 4852 3rzo_1_R
4574 Sequence is too short. (< 5 resolved nts) 4853 Sequence is too short. (< 5 resolved nts)
4575 4854
4855 +5wlh_1_B
4856 +Nucleotides not inserted !
4857 +
4576 2f4v_1_Z 4858 2f4v_1_Z
4577 Sequence is too short. (< 5 resolved nts) 4859 Sequence is too short. (< 5 resolved nts)
4578 4860
4861 +5ml7_1_B
4862 +Nucleotides not inserted !
4863 +
4579 1qln_1_R 4864 1qln_1_R
4580 Sequence is too short. (< 5 resolved nts) 4865 Sequence is too short. (< 5 resolved nts)
4581 4866
4867 +3pgw_1_R
4868 +DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R.
4869 +
4870 +3pgw_1_N
4871 +DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N.
4872 +
4873 +3cw1_1_X
4874 +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_X.
4875 +
4876 +3cw1_1_W
4877 +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_W.
4878 +
4879 +3cw1_1_V
4880 +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V.
4881 +
4882 +7b0y_1_A
4883 +Could not find nucleotides of chain A in annotation 7b0y.json. Either there is a problem with 7b0y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4884 +
4582 6ogy_1_M 4885 6ogy_1_M
4583 Sequence is too short. (< 5 resolved nts) 4886 Sequence is too short. (< 5 resolved nts)
4584 4887
...@@ -4588,12 +4891,12 @@ Sequence is too short. (< 5 resolved nts) ...@@ -4588,12 +4891,12 @@ Sequence is too short. (< 5 resolved nts)
4588 6uej_1_B 4891 6uej_1_B
4589 Sequence is too short. (< 5 resolved nts) 4892 Sequence is too short. (< 5 resolved nts)
4590 4893
4894 +7kga_1_A
4895 +Nucleotides not inserted !
4896 +
4591 6ywy_1_BB 4897 6ywy_1_BB
4592 Could not find nucleotides of chain BB in annotation 6ywy.json. Either there is a problem with 6ywy mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 4898 Could not find nucleotides of chain BB in annotation 6ywy.json. Either there is a problem with 6ywy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4593 4899
4594 -1x18_1_A
4595 -DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_A.
4596 -
4597 5ytx_1_B 4900 5ytx_1_B
4598 Sequence is too short. (< 5 resolved nts) 4901 Sequence is too short. (< 5 resolved nts)
4599 4902
...@@ -4720,24 +5023,12 @@ Could not find nucleotides of chain AA in annotation 5mrf.json. Either there is ...@@ -4720,24 +5023,12 @@ Could not find nucleotides of chain AA in annotation 5mrf.json. Either there is
4720 7jhy_1_Z 5023 7jhy_1_Z
4721 Could not find nucleotides of chain Z in annotation 7jhy.json. Either there is a problem with 7jhy mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 5024 Could not find nucleotides of chain Z in annotation 7jhy.json. Either there is a problem with 7jhy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4722 5025
4723 -2r1g_1_A
4724 -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_A.
4725 -
4726 -2r1g_1_D
4727 -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_D.
4728 -
4729 -2r1g_1_F
4730 -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_F.
4731 -
4732 3eq4_1_Y 5026 3eq4_1_Y
4733 DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_Y. 5027 DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_Y.
4734 5028
4735 4wkr_1_C 5029 4wkr_1_C
4736 Sequence is too short. (< 5 resolved nts) 5030 Sequence is too short. (< 5 resolved nts)
4737 5031
4738 -2r1g_1_X
4739 -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_X.
4740 -
4741 4v99_1_EC 5032 4v99_1_EC
4742 Could not find nucleotides of chain EC in annotation 4v99.json. Either there is a problem with 4v99 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 5033 Could not find nucleotides of chain EC in annotation 4v99.json. Either there is a problem with 4v99 mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4743 5034
...@@ -4927,120 +5218,51 @@ Sequence is too short. (< 5 resolved nts) ...@@ -4927,120 +5218,51 @@ Sequence is too short. (< 5 resolved nts)
4927 4ejt_1_G 5218 4ejt_1_G
4928 Sequence is too short. (< 5 resolved nts) 5219 Sequence is too short. (< 5 resolved nts)
4929 5220
4930 -6lkq_1_W 5221 +1et4_1_A
4931 -Could not find nucleotides of chain W in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 5222 +Nucleotides not inserted !
4932 -
4933 -3qsu_1_P
4934 -Sequence is too short. (< 5 resolved nts)
4935 -
4936 -3qsu_1_R
4937 -Sequence is too short. (< 5 resolved nts)
4938 -
4939 -2xs7_1_B
4940 -Sequence is too short. (< 5 resolved nts)
4941 -
4942 -1n38_1_B
4943 -Sequence is too short. (< 5 resolved nts)
4944 -
4945 -4qvc_1_G
4946 -Sequence is too short. (< 5 resolved nts)
4947 -
4948 -6q1h_1_D
4949 -Sequence is too short. (< 5 resolved nts)
4950 -
4951 -6q1h_1_H
4952 -Sequence is too short. (< 5 resolved nts)
4953 -
4954 -6p7p_1_F
4955 -Sequence is too short. (< 5 resolved nts)
4956 -
4957 -6p7p_1_E
4958 -Sequence is too short. (< 5 resolved nts)
4959 -
4960 -6p7p_1_D
4961 -Sequence is too short. (< 5 resolved nts)
4962 -
4963 -6vm6_1_J
4964 -Sequence is too short. (< 5 resolved nts)
4965 5223
4966 -6vm6_1_G 5224 +1et4_1_C
4967 -Sequence is too short. (< 5 resolved nts) 5225 +Nucleotides not inserted !
4968 5226
4969 -6wan_1_K 5227 +1et4_1_B
4970 -Sequence is too short. (< 5 resolved nts) 5228 +Nucleotides not inserted !
4971 5229
4972 -6wan_1_H 5230 +1et4_1_D
4973 -Sequence is too short. (< 5 resolved nts) 5231 +Nucleotides not inserted !
4974 5232
4975 -6wan_1_G 5233 +1et4_1_E
4976 -Sequence is too short. (< 5 resolved nts) 5234 +Nucleotides not inserted !
4977 5235
4978 -6wan_1_L 5236 +1ddy_1_C
4979 -Sequence is too short. (< 5 resolved nts) 5237 +Nucleotides not inserted !
4980 5238
4981 -6wan_1_I 5239 +1ddy_1_A
4982 -Sequence is too short. (< 5 resolved nts) 5240 +Nucleotides not inserted !
4983 5241
4984 -6ywo_1_F 5242 +1ddy_1_E
4985 -Sequence is too short. (< 5 resolved nts) 5243 +Nucleotides not inserted !
4986 -
4987 -6wan_1_J
4988 -Sequence is too short. (< 5 resolved nts)
4989 -
4990 -4oau_1_A
4991 -Sequence is too short. (< 5 resolved nts)
4992 5244
4993 -6ywo_1_E 5245 +6lkq_1_W
4994 -Sequence is too short. (< 5 resolved nts) 5246 +Could not find nucleotides of chain W in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
4995 -
4996 -6ywo_1_K
4997 -Sequence is too short. (< 5 resolved nts)
4998 -
4999 -6vm6_1_I
5000 -Sequence is too short. (< 5 resolved nts)
5001 -
5002 -6vm6_1_H
5003 -Sequence is too short. (< 5 resolved nts)
5004 -
5005 -6ywo_1_I
5006 -Sequence is too short. (< 5 resolved nts)
5007 -
5008 -2a1r_1_C
5009 -Sequence is too short. (< 5 resolved nts)
5010 -
5011 -6m6v_1_F
5012 -Sequence is too short. (< 5 resolved nts)
5013 -
5014 -6m6v_1_E
5015 -Sequence is too short. (< 5 resolved nts)
5016 -
5017 -2a1r_1_D
5018 -Sequence is too short. (< 5 resolved nts)
5019 -
5020 -3gpq_1_E
5021 -Sequence is too short. (< 5 resolved nts)
5022 5247
5023 -3gpq_1_F 5248 +6r47_1_A
5024 -Sequence is too short. (< 5 resolved nts) 5249 +Nucleotides not inserted !
5025 5250
5026 -6o79_1_C 5251 +3qsu_1_P
5027 Sequence is too short. (< 5 resolved nts) 5252 Sequence is too short. (< 5 resolved nts)
5028 5253
5029 -6vm6_1_K 5254 +3qsu_1_R
5030 Sequence is too short. (< 5 resolved nts) 5255 Sequence is too short. (< 5 resolved nts)
5031 5256
5032 -6m6v_1_G 5257 +2xs7_1_B
5033 Sequence is too short. (< 5 resolved nts) 5258 Sequence is too short. (< 5 resolved nts)
5034 5259
5035 -6hyu_1_D 5260 +1n38_1_B
5036 Sequence is too short. (< 5 resolved nts) 5261 Sequence is too short. (< 5 resolved nts)
5037 5262
5038 -1laj_1_R 5263 +4qvc_1_G
5039 Sequence is too short. (< 5 resolved nts) 5264 Sequence is too short. (< 5 resolved nts)
5040 5265
5041 -6ybv_1_K
5042 -Could not find nucleotides of chain K in annotation 6ybv.json. Either there is a problem with 6ybv mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5043 -
5044 6mpf_1_W 5266 6mpf_1_W
5045 Sequence is too short. (< 5 resolved nts) 5267 Sequence is too short. (< 5 resolved nts)
5046 5268
...@@ -5065,6 +5287,9 @@ Could not find nucleotides of chain V in annotation 6ftj.json. Either there is a ...@@ -5065,6 +5287,9 @@ Could not find nucleotides of chain V in annotation 6ftj.json. Either there is a
5065 6ftg_1_V 5287 6ftg_1_V
5066 Could not find nucleotides of chain V in annotation 6ftg.json. Either there is a problem with 6ftg mmCIF download, or the bases are not resolved in the structure. Delete it and retry. 5288 Could not find nucleotides of chain V in annotation 6ftg.json. Either there is a problem with 6ftg mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5067 5289
5290 +3npn_1_A
5291 +Nucleotides not inserted !
5292 +
5068 4g0a_1_G 5293 4g0a_1_G
5069 Sequence is too short. (< 5 resolved nts) 5294 Sequence is too short. (< 5 resolved nts)
5070 5295
...@@ -5080,15 +5305,6 @@ Sequence is too short. (< 5 resolved nts) ...@@ -5080,15 +5305,6 @@ Sequence is too short. (< 5 resolved nts)
5080 5hkc_1_C 5305 5hkc_1_C
5081 Sequence is too short. (< 5 resolved nts) 5306 Sequence is too short. (< 5 resolved nts)
5082 5307
5083 -4kzy_1_I
5084 -Could not find nucleotides of chain I in annotation 4kzy.json. Either there is a problem with 4kzy mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5085 -
5086 -4kzz_1_I
5087 -Could not find nucleotides of chain I in annotation 4kzz.json. Either there is a problem with 4kzz mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5088 -
5089 -4kzx_1_I
5090 -Could not find nucleotides of chain I in annotation 4kzx.json. Either there is a problem with 4kzx mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5091 -
5092 1rmv_1_B 5308 1rmv_1_B
5093 Sequence is too short. (< 5 resolved nts) 5309 Sequence is too short. (< 5 resolved nts)
5094 5310
...@@ -5134,69 +5350,3 @@ Sequence is too short. (< 5 resolved nts) ...@@ -5134,69 +5350,3 @@ Sequence is too short. (< 5 resolved nts)
5134 5hjz_1_C 5350 5hjz_1_C
5135 Sequence is too short. (< 5 resolved nts) 5351 Sequence is too short. (< 5 resolved nts)
5136 5352
5137 -7nrc_1_SM
5138 -Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5139 -
5140 -7nrc_1_SN
5141 -Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5142 -
5143 -7am2_1_R1
5144 -Sequence is too short. (< 5 resolved nts)
5145 -
5146 -7k5l_1_R
5147 -Sequence is too short. (< 5 resolved nts)
5148 -
5149 -7b5k_1_X
5150 -Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5151 -
5152 -7d8c_1_C
5153 -Sequence is too short. (< 5 resolved nts)
5154 -
5155 -7m4y_1_V
5156 -Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5157 -
5158 -7m4x_1_V
5159 -Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5160 -
5161 -7b5k_1_Z
5162 -Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5163 -
5164 -7m4u_1_A
5165 -Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry.
5166 -
5167 -7n06_1_G
5168 -Sequence is too short. (< 5 resolved nts)
5169 -
5170 -7n06_1_H
5171 -Sequence is too short. (< 5 resolved nts)
5172 -
5173 -7n06_1_I
5174 -Sequence is too short. (< 5 resolved nts)
5175 -
5176 -7n06_1_J
5177 -Sequence is too short. (< 5 resolved nts)
5178 -
5179 -7n06_1_K
5180 -Sequence is too short. (< 5 resolved nts)
5181 -
5182 -7n06_1_L
5183 -Sequence is too short. (< 5 resolved nts)
5184 -
5185 -7n33_1_G
5186 -Sequence is too short. (< 5 resolved nts)
5187 -
5188 -7n33_1_H
5189 -Sequence is too short. (< 5 resolved nts)
5190 -
5191 -7n33_1_I
5192 -Sequence is too short. (< 5 resolved nts)
5193 -
5194 -7n33_1_J
5195 -Sequence is too short. (< 5 resolved nts)
5196 -
5197 -7n33_1_K
5198 -Sequence is too short. (< 5 resolved nts)
5199 -
5200 -7n33_1_L
5201 -Sequence is too short. (< 5 resolved nts)
5202 -
......
...@@ -7,38 +7,27 @@ ...@@ -7,38 +7,27 @@
7 # Run this file if you want the base counts, pair-type counts, identity percents, etc 7 # Run this file if you want the base counts, pair-type counts, identity percents, etc
8 # in the database. 8 # in the database.
9 9
10 -import getopt, os, pickle, sqlite3, shlex, subprocess, sys, warnings 10 +import getopt, glob, json, os, sqlite3, shlex, subprocess, sys, warnings
11 import numpy as np 11 import numpy as np
12 import pandas as pd 12 import pandas as pd
13 -import threading as th
14 import scipy.stats as st 13 import scipy.stats as st
15 import matplotlib 14 import matplotlib
16 import matplotlib.pyplot as plt 15 import matplotlib.pyplot as plt
17 import matplotlib.cm as cm 16 import matplotlib.cm as cm
18 import matplotlib.patches as mpatches 17 import matplotlib.patches as mpatches
19 import scipy.cluster.hierarchy as sch 18 import scipy.cluster.hierarchy as sch
20 -import sklearn
21 -import json
22 -import glob
23 -import pickle
24 -import Bio
25 from scipy.spatial.distance import squareform 19 from scipy.spatial.distance import squareform
26 from mpl_toolkits.mplot3d import axes3d 20 from mpl_toolkits.mplot3d import axes3d
27 from Bio import AlignIO, SeqIO 21 from Bio import AlignIO, SeqIO
28 from Bio.PDB.MMCIFParser import MMCIFParser 22 from Bio.PDB.MMCIFParser import MMCIFParser
29 -from Bio.PDB.vectors import Vector, calc_angle, calc_dihedral
30 from functools import partial 23 from functools import partial
31 -from multiprocessing import Pool, Manager 24 +from multiprocessing import Pool, Manager, Value
32 from os import path 25 from os import path
33 from tqdm import tqdm 26 from tqdm import tqdm
34 from collections import Counter 27 from collections import Counter
35 from setproctitle import setproctitle 28 from setproctitle import setproctitle
36 from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker, trace_unhandled_exceptions 29 from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker, trace_unhandled_exceptions
37 -from sklearn.mixture import GaussianMixture 30 +from geometric_stats import *
38 -import warnings
39 -from pandas.core.common import SettingWithCopyWarning
40 -from joblib import Parallel, delayed
41 -
42 31
43 np.set_printoptions(threshold=sys.maxsize, linewidth=np.inf, precision=8) 32 np.set_printoptions(threshold=sys.maxsize, linewidth=np.inf, precision=8)
44 path_to_3D_data = "tobedefinedbyoptions" 33 path_to_3D_data = "tobedefinedbyoptions"
...@@ -928,6 +917,7 @@ def general_stats(): ...@@ -928,6 +917,7 @@ def general_stats():
928 fig.savefig(runDir + "/results/figures/Nfamilies.png") 917 fig.savefig(runDir + "/results/figures/Nfamilies.png")
929 plt.close() 918 plt.close()
930 919
920 +@trace_unhandled_exceptions
931 def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s): 921 def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
932 922
933 # Identify the right 3D file 923 # Identify the right 3D file
...@@ -1135,11 +1125,6 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): ...@@ -1135,11 +1125,6 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False):
1135 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") 1125 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
1136 return 0 1126 return 0
1137 1127
1138 -def log_to_pbar(pbar):
1139 - def update(r):
1140 - pbar.update(1)
1141 - return update
1142 -
1143 def family_order(f): 1128 def family_order(f):
1144 # sort the RNA families so that the plots are readable 1129 # sort the RNA families so that the plots are readable
1145 1130
...@@ -1154,70 +1139,6 @@ def family_order(f): ...@@ -1154,70 +1139,6 @@ def family_order(f):
1154 else: 1139 else:
1155 return 2 1140 return 2
1156 1141
1157 -def conversion_angles(bdd):
1158 - """
1159 - Convert database torsion angles to degrees
1160 - and put them in a list to reuse for statistics
1161 - """
1162 - BASE_DIR = os.path.dirname(os.path.abspath(__file__))
1163 - db_path = os.path.join(BASE_DIR, bdd)
1164 - baseDeDonnees = sqlite3.connect(db_path)
1165 - curseur = baseDeDonnees.cursor()
1166 - curseur.execute("SELECT chain_id, nt_name, alpha, beta, gamma, delta, epsilon, zeta, chi FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U' ;")
1167 - liste=[]
1168 - for nt in curseur.fetchall(): # retrieve the angle measurements and put them in a list
1169 - liste.append(nt)
1170 - angles_torsion=[]
1171 - for nt in liste :
1172 - angles_deg=[]
1173 - angles_deg.append(nt[0]) #chain_id
1174 - angles_deg.append(nt[1]) #nt_name
1175 - for i in range (2,9): # on all angles
1176 - angle=0
1177 - if nt[i] == None :
1178 - angle=None
1179 - elif nt[i]<=np.pi: #if angle value <pi, positive
1180 - angle=(180/np.pi)*nt[i]
1181 - elif np.pi < nt[i] <= 2*np.pi : #if value of the angle between pi and 2pi, negative
1182 - angle=((180/np.pi)*nt[i])-360
1183 - else :
1184 - angle=nt[i] # in case some angles still in degrees
1185 - angles_deg.append(angle)
1186 - angles_torsion.append(angles_deg)
1187 - return angles_torsion
1188 -
1189 -def conversion_eta_theta(bdd):
1190 - """
1191 - Convert database pseudotorsion angles to degrees
1192 - and put them in a list to reuse for statistics
1193 - """
1194 - BASE_DIR = os.path.dirname(os.path.abspath(__file__))
1195 - db_path = os.path.join(BASE_DIR, bdd)
1196 - baseDeDonnees = sqlite3.connect(db_path)
1197 - curseur = baseDeDonnees.cursor()
1198 - curseur.execute("SELECT chain_id, nt_name, eta, theta, eta_prime, theta_prime, eta_base, theta_base FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U';")
1199 - liste=[]
1200 - for nt in curseur.fetchall():
1201 - liste.append(nt)
1202 - angles_virtuels=[]
1203 - for nt in liste :
1204 - angles_deg=[]
1205 - angles_deg.append(nt[0]) #chain_id
1206 - angles_deg.append(nt[1]) #nt_name
1207 - for i in range (2,8):
1208 - angle=0
1209 - if nt[i] == None :
1210 - angle=None
1211 - elif nt[i]<=np.pi:
1212 - angle=(180/np.pi)*nt[i]
1213 - elif np.pi < nt[i] <= 2*np.pi :
1214 - angle=((180/np.pi)*nt[i])-360
1215 - else :
1216 - angle=nt[i]
1217 - angles_deg.append(angle)
1218 - angles_virtuels.append(angles_deg)
1219 - return angles_virtuels
1220 -
1221 def nt_3d_centers(cif_file, consider_all_atoms): 1142 def nt_3d_centers(cif_file, consider_all_atoms):
1222 """Return the nucleotides' coordinates, summarizing a nucleotide by only one point. 1143 """Return the nucleotides' coordinates, summarizing a nucleotide by only one point.
1223 If consider_all_atoms : barycentre is used 1144 If consider_all_atoms : barycentre is used
...@@ -1252,1674 +1173,30 @@ def nt_3d_centers(cif_file, consider_all_atoms): ...@@ -1252,1674 +1173,30 @@ def nt_3d_centers(cif_file, consider_all_atoms):
1252 result.append(res) 1173 result.append(res)
1253 return(result) 1174 return(result)
1254 1175
1255 -def liste_repres(fpath): 1176 +def representatives_from_nrlist(res):
1256 - repres=[] 1177 + nr_code = min([i for i in [1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 20.0] if i >= res])
1257 - df=pd.read_csv(os.path.abspath(fpath)) 1178 + fpath = f"/home/data/RNA/3D/latest_nr_list_{nr_code}A.csv"
1179 + repres = []
1180 + df = pd.read_csv(os.path.abspath(fpath))
1258 for i in range(df.shape[0]): 1181 for i in range(df.shape[0]):
1259 - up_name=df["representative"][i] 1182 + up_name = df["representative"][i]
1260 if '+' in up_name: 1183 if '+' in up_name:
1261 - up_name=up_name.split('+') 1184 + up_name = up_name.split('+')
1262 for i in range(len(up_name)): 1185 for i in range(len(up_name)):
1263 - chain=up_name[i].split('|') 1186 + chain = up_name[i].split('|')
1264 - chain=chain[0].lower()+'_'+chain[1]+'_'+chain[2] 1187 + chain = chain[0].lower() + '_' + chain[1] + '_' + chain[2]
1265 - repres.append(chain+'.cif') 1188 + repres.append(chain + '.cif')
1266 else : 1189 else :
1267 - up_name=up_name.split('|') 1190 + up_name = up_name.split('|')
1268 - low_name=up_name[0].lower()+'_'+up_name[1]+'_'+up_name[2] 1191 + low_name = up_name[0].lower() + '_' + up_name[1] + '_' + up_name[2]
1269 - repres.append(low_name+'.cif') 1192 + repres.append(low_name + '.cif')
1270 1193
1271 return repres 1194 return repres
1272 1195
1273 - 1196 +def log_to_pbar(pbar):
1274 -def get_euclidian_distance(L1, L2): 1197 + def update(r):
1275 - """
1276 - Returns the distance between two points (coordinates in lists)
1277 - """
1278 -
1279 - if len(L1)*len(L2) == 0:
1280 - return np.nan
1281 -
1282 - if len(L1) == 1:
1283 - L1 = L1[0]
1284 - if len(L2) == 1:
1285 - L2 = L2[0]
1286 -
1287 - e = 0
1288 - for i in range(len(L1)):
1289 - try:
1290 - e += float(L1[i] - L2[i])**2
1291 - except TypeError:
1292 - print("Terms: ", L1, L2)
1293 - except IndexError:
1294 - print("Terms: ", L1, L2)
1295 -
1296 - return np.sqrt(e)
1297 -
1298 -def get_flat_angle(L1, L2, L3):
1299 - if len(L1)*len(L2)*len(L3) == 0:
1300 - return np.nan
1301 -
1302 - return calc_angle(Vector(L1[0]), Vector(L2[0]), Vector(L3[0]))*(180/np.pi)
1303 -
1304 -def get_torsion_angle(L1, L2, L3, L4):
1305 - if len(L1)*len(L2)*len(L3)*len(L4) == 0:
1306 - return np.nan
1307 -
1308 - return calc_dihedral(Vector(L1[0]), Vector(L2[0]), Vector(L3[0]), Vector(L4[0]))*(180/np.pi)
1309 -
1310 -def pos_b1(res):
1311 - """
1312 - Returns the coordinates of virtual atom B1 (center of the first aromatic cycle)
1313 - """
1314 - coordb1=[]
1315 - somme_x_b1=0
1316 - somme_y_b1=0
1317 - somme_z_b1=0
1318 - moy_x_b1=0
1319 - moy_y_b1=0
1320 - moy_z_b1=0
1321 - #different cases
1322 - #some residues have 2 aromatic cycles
1323 - if res.get_resname() in ['A', 'G', '2MG', '7MG', 'MA6', '6IA', 'OMG' , '2MA', 'B9B', 'A2M', '1MA', 'E7G', 'P7G', 'B8W', 'B8K', 'BGH', '6MZ', 'E6G', 'MHG', 'M7A', 'M2G', 'P5P', 'G7M', '1MG', 'T6A', 'MIA', 'YG', 'YYG', 'I', 'DG', 'N79', '574', 'DJF', 'AET', '12A', 'ANZ', 'UY4'] :
1324 - c=0
1325 - names=[]
1326 - for atom in res :
1327 - if (atom.get_fullname() in ['N9', 'C8', 'N7', 'C4', 'C5']) :
1328 - c=c+1
1329 - names.append(atom.get_name())
1330 - coord=atom.get_vector()
1331 - somme_x_b1=somme_x_b1+coord[0]
1332 - somme_y_b1=somme_y_b1+coord[1]
1333 - somme_z_b1=somme_z_b1+coord[2]
1334 - else :
1335 - c=c
1336 - #calcul coord B1
1337 - if c != 0 :
1338 - moy_x_b1=somme_x_b1/c
1339 - moy_y_b1=somme_y_b1/c
1340 - moy_z_b1=somme_z_b1/c
1341 - coordb1.append(moy_x_b1)
1342 - coordb1.append(moy_y_b1)
1343 - coordb1.append(moy_z_b1)
1344 - #others have only one cycle
1345 - if res.get_resname() in ['C', 'U', 'AG9', '70U', '1RN', 'RSP', '3AU', 'CM0', 'U8U', 'IU', 'E3C', '4SU', '5HM', 'LV2', 'LHH', '4AC', 'CH', 'Y5P', '2MU', '4OC', 'B8T', 'JMH', 'JMC', 'DC', 'B9H', 'UR3', 'I4U', 'B8Q', 'P4U', 'OMU', 'OMC', '5MU', 'H2U', 'CBV', 'M1Y', 'B8N', '3TD', 'B8H'] :
1346 - c=0
1347 - for atom in res :
1348 - if (atom.get_fullname() in ['C6', 'N3', 'N1', 'C2', 'C4', 'C5']):
1349 - c=c+1
1350 - coord=atom.get_vector()
1351 - somme_x_b1=somme_x_b1+coord[0]
1352 - somme_y_b1=somme_y_b1+coord[1]
1353 - somme_z_b1=somme_z_b1+coord[2]
1354 - #calcul coord B1
1355 - if c != 0 :
1356 - moy_x_b1=somme_x_b1/c
1357 - moy_y_b1=somme_y_b1/c
1358 - moy_z_b1=somme_z_b1/c
1359 - coordb1.append(moy_x_b1)
1360 - coordb1.append(moy_y_b1)
1361 - coordb1.append(moy_z_b1)
1362 -
1363 - if len(coordb1):
1364 - return [coordb1]
1365 - else:
1366 - return []
1367 -
1368 -def pos_b2(res):
1369 - """
1370 - Returns the coordinates of virtual atom B2 (center of the second aromatic cycle, if exists)
1371 - """
1372 - coordb2=[]
1373 - somme_x_b2=0
1374 - somme_y_b2=0
1375 - somme_z_b2=0
1376 - moy_x_b2=0
1377 - moy_y_b2=0
1378 - moy_z_b2=0
1379 -
1380 - if res.get_resname() in ['A', 'G', '2MG', '7MG', 'MA6', '6IA', 'OMG' , '2MA', 'B9B', 'A2M', '1MA', 'E7G', 'P7G', 'B8W', 'B8K', 'BGH', '6MZ', 'E6G', 'MHG', 'M7A', 'M2G', 'P5P', 'G7M', '1MG', 'T6A', 'MIA', 'YG', 'YYG', 'I', 'DG', 'N79', '574', 'DJF', 'AET', '12A', 'ANZ', 'UY4'] : #2 cycles aromatiques
1381 - c=0
1382 - for atom in res :
1383 - if atom.get_fullname() in ['C6', 'N3', 'N1', 'C2', 'C4', 'C5'] :
1384 - c=c+1
1385 - coord=atom.get_vector()
1386 - somme_x_b2=somme_x_b2+coord[0]
1387 - somme_y_b2=somme_y_b2+coord[1]
1388 - somme_z_b2=somme_z_b2+coord[2]
1389 - #calcul coord B2
1390 - if c!=0 :
1391 - moy_x_b2=somme_x_b2/c
1392 - moy_y_b2=somme_y_b2/c
1393 - moy_z_b2=somme_z_b2/c
1394 - coordb2.append(moy_x_b2)
1395 - coordb2.append(moy_y_b2)
1396 - coordb2.append(moy_z_b2)
1397 - if len(coordb2):
1398 - return [coordb2]
1399 - else:
1400 - return []
1401 -
1402 -@trace_unhandled_exceptions
1403 -def basepair_measures(res, pair):
1404 - """
1405 - measurement of the flat angles describing a basepair in the HiRE-RNA model
1406 - """
1407 - if res.get_resname()=='C' or res.get_resname()=='U' :
1408 - atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
1409 - atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1410 - atom_b1_res = pos_b1(res)
1411 - if not len(atom_c4_res) or not len(atom_c1p_res) or not len(atom_b1_res):
1412 - return
1413 - a3_res = Vector(atom_c4_res[0])
1414 - a2_res = Vector(atom_c1p_res[0])
1415 - a1_res = Vector(atom_b1_res[0])
1416 - if res.get_resname()=='A' or res.get_resname()=='G' :
1417 - atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1418 - atom_b1_res = pos_b1(res)
1419 - atom_b2_res = pos_b2(res)
1420 - if not len(atom_c1p_res) or not len(atom_b1_res) or not len(atom_b2_res):
1421 - return
1422 - a3_res = Vector(atom_c1p_res[0])
1423 - a2_res = Vector(atom_b1_res[0])
1424 - a1_res = Vector(atom_b2_res[0])
1425 -
1426 - if pair.get_resname()=='C' or pair.get_resname()=='U' :
1427 - atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ]
1428 - atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
1429 - atom_b1_pair = pos_b1(pair)
1430 - if not len(atom_c4_pair) or not len(atom_c1p_pair) or not len(atom_b1_pair):
1431 - return
1432 - a3_pair = Vector(atom_c4_pair[0])
1433 - a2_pair = Vector(atom_c1p_pair[0])
1434 - a1_pair = Vector(atom_b1_pair[0])
1435 - if pair.get_resname()=='A' or pair.get_resname()=='G' :
1436 - atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ]
1437 - atom_b1_pair = pos_b1(pair)
1438 - atom_b2_pair = pos_b2(pair)
1439 - if not len(atom_c1p_pair) or not len(atom_b1_pair) or not len(atom_b2_pair): # No C1' atom in the paired nucleotide, skip measures.
1440 - return
1441 - a3_pair = Vector(atom_c1p_pair[0])
1442 - a2_pair = Vector(atom_b1_pair[0])
1443 - a1_pair = Vector(atom_b2_pair[0])
1444 -
1445 - # Bond vectors
1446 - res_32 = a3_res - a2_res
1447 - res_12 = a1_res - a2_res
1448 - pair_32 = a3_pair - a2_pair
1449 - pair_12 = a1_pair - a2_pair
1450 - rho = a1_res - a1_pair # from pair to res
1451 -
1452 - # dist
1453 - dist = rho.norm()
1454 -
1455 - # we calculate the 2 plane angles
1456 - with warnings.catch_warnings():
1457 - warnings.simplefilter('ignore', RuntimeWarning)
1458 - b = res_12.angle(rho)*(180/np.pi) # equal to the previous implementation
1459 - c = pair_12.angle(-rho)*(180/np.pi) #
1460 - # a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) # not required
1461 - # b = calc_angle(a2_res, a1_res, a1_pair)*(180/np.pi)
1462 - # c = calc_angle(a1_res, a1_pair, a2_pair)*(180/np.pi)
1463 - # d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) # not required
1464 -
1465 - # Compute plane vectors
1466 - n1 = (res_32**res_12).normalized() # ** between vectors, is the cross product
1467 - n2 = (pair_32**pair_12).normalized()
1468 -
1469 - # Distances between base tip and the other base's plane (orthogonal projection)
1470 - # if angle(rho, n) > pi/2 the distance is negative (signed following n)
1471 - d1 = rho*n1 # projection of rho on axis n1
1472 - d2 = rho*n2
1473 -
1474 - # Now the projection of rho in the planes. It's just a sum of the triangles' two other edges.
1475 - p1 = (-rho+n1**d1).normalized() # between vector and scalar, ** is the multiplication by a scalar
1476 - p2 = (rho-n2**d2).normalized()
1477 -
1478 - # Measure tau, the dihedral
1479 - u = (res_12**rho).normalized()
1480 - v = (rho**pair_12).normalized()
1481 - cosTau1 = n1*u
1482 - cosTau2 = v*n2
1483 -
1484 - # cosTau is enough to compute alpha, but we can't distinguish
1485 - # yet betwwen tau and -tau. If the full computation if required, then:
1486 - tau1 = np.arccos(cosTau1)*(180/np.pi)
1487 - tau2 = np.arccos(cosTau2)*(180/np.pi)
1488 - w1 = u**n1
1489 - w2 = v**n2
1490 - if res_12*w1 < 0:
1491 - tau1 = -tau1
1492 - if pair_12*w2 < 0:
1493 - tau2 = -tau2
1494 -
1495 - # And finally, the a1 and a2 angles between res_12 and p1 / pair_12 and p2
1496 - with warnings.catch_warnings():
1497 - warnings.simplefilter('ignore', RuntimeWarning)
1498 - a1 = (-res_12).angle(p1)*(180/np.pi)
1499 - a2 = (-pair_12).angle(p2)*(180/np.pi)
1500 - if cosTau1 > 0:
1501 - # CosTau > 0 (Tau < 90 or Tau > 270) implies that alpha > 180.
1502 - a1 = -a1
1503 - if cosTau2 > 0:
1504 - a2 = -a2
1505 -
1506 - return [dist, b, c, d1, d2, a1, a2, tau1, tau2]
1507 -
1508 -@trace_unhandled_exceptions
1509 -def measure_from_structure(f):
1510 - """
1511 - Do geometric measures required on a given filename
1512 - """
1513 -
1514 - name = f.split('.')[0]
1515 -
1516 - global idxQueue
1517 - thr_idx = idxQueue.get()
1518 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measure_from_structure({f})")
1519 -
1520 - # Open the structure
1521 - with warnings.catch_warnings():
1522 - # Ignore the PDB problems. This mostly warns that some chain is discontinuous.
1523 - warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.PDBConstructionWarning)
1524 - warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.BiopythonWarning)
1525 - parser=MMCIFParser()
1526 - s = parser.get_structure(f, os.path.abspath(path_to_3D_data+ "rna_only/" + f))
1527 -
1528 - #pyle_measures(name, s, thr_idx)
1529 - #measures_aa(name, s, thr_idx)
1530 - if DO_HIRE_RNA_MEASURES:
1531 - measures_hrna(name, s, thr_idx)
1532 - measures_hrna_basepairs(name, s, thr_idx)
1533 - if DO_WADLEY_ANALYSIS:
1534 - #measures_wadley(name, s, thr_idx)
1535 - pyle_measures(name, s, thr_idx)
1536 -
1537 - idxQueue.put(thr_idx) # replace the thread index in the queue
1538 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
1539 -
1540 -@trace_unhandled_exceptions
1541 -def measures_wadley(name, s, thr_idx):
1542 - """
1543 - Measures the distances and plane angles involving C1' and P atoms
1544 - Saves the results in a dataframe
1545 - """
1546 -
1547 - # do not recompute something already computed
1548 - if (path.isfile(runDir + '/results/geometry/Pyle/angles/flat_angles_pyle_' + name + '.csv') and
1549 - path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")):
1550 - return
1551 -
1552 - liste_dist = []
1553 - liste_angl = []
1554 - last_p = []
1555 - last_c1p = []
1556 - last_c4p = []
1557 -
1558 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_wadley({name})")
1559 -
1560 - chain = next(s[0].get_chains())
1561 - for res in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_wadley", unit="res", leave=False):
1562 - p_c1p_psuiv = np.nan
1563 - c1p_psuiv_c1psuiv = np.nan
1564 - if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] :
1565 - atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"]
1566 - atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1567 - atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
1568 - if len(atom_c1p) > 1:
1569 - for atom in res:
1570 - if "C1'" in atom.get_fullname():
1571 - print("\n", atom.get_fullname(), "-", res.get_resname(), "\n")
1572 -
1573 - p_c1p_psuiv = get_flat_angle(last_p, last_c1p, atom_p)
1574 - c1p_psuiv_c1psuiv = get_flat_angle(last_c1p, atom_p, atom_c1p)
1575 - c1p_psuiv = get_euclidian_distance(last_c1p, atom_p)
1576 - p_c1p = get_euclidian_distance(atom_p, atom_c1p)
1577 - c4p_psuiv = get_euclidian_distance(last_c4p, atom_p)
1578 - p_c4p = get_euclidian_distance(atom_p, atom_c4p)
1579 -
1580 - last_p = atom_p
1581 - last_c1p = atom_c1p
1582 - last_c4p = atom_c4p
1583 -
1584 - liste_dist.append([res.get_resname(), c1p_psuiv, p_c1p, c4p_psuiv, p_c4p])
1585 - liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv])
1586 -
1587 - df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"])
1588 - df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")
1589 - df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"])
1590 - df.to_csv(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle_"+name+".csv")
1591 -
1592 -@trace_unhandled_exceptions
1593 -def measures_aa(name, s, thr_idx):
1594 - """
1595 - Measures the distance between atoms linked by covalent bonds
1596 - """
1597 -
1598 - # do not recompute something already computed
1599 - if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms_"+name+".csv"):
1600 - return
1601 -
1602 - last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide
1603 - liste_common = []
1604 - liste_purines = []
1605 - liste_pyrimidines = []
1606 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measure_aa_dists({name})")
1607 -
1608 - chain = next(s[0].get_chains()) # 1 chain per file
1609 - residues = list(chain.get_residues())
1610 - pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measure_aa_dists", unit="res", leave=False)
1611 - pbar.update(0)
1612 - for res in chain :
1613 -
1614 - # for residues A, G, C, U
1615 - op3_p=[]
1616 - p_op1=[]
1617 - p_op2=[]
1618 - p_o5p=[]
1619 - o5p_c5p=[]
1620 - c5p_c4p=[]
1621 - c4p_o4p=[]
1622 - o4p_c1p=[]
1623 - c1p_c2p=[]
1624 - c2p_o2p=[]
1625 - c2p_c3p=[]
1626 - c3p_o3p=[]
1627 - c4p_c3p=[]
1628 -
1629 - #if res = A or G
1630 - c1p_n9=None
1631 - n9_c8=None
1632 - c8_n7=None
1633 - n7_c5=None
1634 - c5_c6=None
1635 - c6_n1=None
1636 - n1_c2=None
1637 - c2_n3=None
1638 - n3_c4=None
1639 - c4_n9=None
1640 - c4_c5=None
1641 - #if res=G
1642 - c6_o6=None
1643 - c2_n2=None
1644 - #if res = A
1645 - c6_n6=None
1646 -
1647 - #if res = C or U
1648 - c1p_n1=None
1649 - n1_c6=None
1650 - c6_c5=None
1651 - c5_c4=None
1652 - c4_n3=None
1653 - n3_c2=None
1654 - c2_n1=None
1655 - c2_o2=None
1656 - #if res =C
1657 - c4_n4=None
1658 - #if res=U
1659 - c4_o4=None
1660 - last_o3p_p=None
1661 -
1662 -
1663 - if res.get_resname()=='A' or res.get_resname()=='G' or res.get_resname()=='C' or res.get_resname()=='U' :
1664 - #get the coordinates of the atoms
1665 - atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"]
1666 - atom_op3 = [ atom.get_coord() for atom in res if "OP3" in atom.get_fullname() ]
1667 - atom_op1 = [ atom.get_coord() for atom in res if "OP1" in atom.get_fullname() ]
1668 - atom_op2 = [ atom.get_coord() for atom in res if "OP2" in atom.get_fullname() ]
1669 - atom_o5p= [ atom.get_coord() for atom in res if "O5'" in atom.get_fullname() ]
1670 - atom_c5p = [ atom.get_coord() for atom in res if "C5'" in atom.get_fullname() ]
1671 - atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
1672 - atom_o4p = [ atom.get_coord() for atom in res if "O4'" in atom.get_fullname() ]
1673 - atom_c3p = [ atom.get_coord() for atom in res if "C3'" in atom.get_fullname() ]
1674 - atom_o3p = [ atom.get_coord() for atom in res if "O3'" in atom.get_fullname() ]
1675 - atom_c2p = [ atom.get_coord() for atom in res if "C2'" in atom.get_fullname() ]
1676 - atom_o2p = [ atom.get_coord() for atom in res if "O2'" in atom.get_fullname() ]
1677 - atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1678 - atom_n9 = [ atom.get_coord() for atom in res if "N9" in atom.get_fullname() ]
1679 - atom_c8 = [ atom.get_coord() for atom in res if "C8" in atom.get_fullname() ]
1680 - atom_n7 = [ atom.get_coord() for atom in res if "N7" in atom.get_fullname() ]
1681 - atom_c5 = [ atom.get_coord() for atom in res if atom.get_name() == "C5"]
1682 - atom_c6 = [ atom.get_coord() for atom in res if "C6" in atom.get_fullname() ]
1683 - atom_o6 = [ atom.get_coord() for atom in res if "O6" in atom.get_fullname() ]
1684 - atom_n6 = [ atom.get_coord() for atom in res if "N6" in atom.get_fullname() ]
1685 - atom_n1 = [ atom.get_coord() for atom in res if "N1" in atom.get_fullname() ]
1686 - atom_c2 = [ atom.get_coord() for atom in res if atom.get_name() == "C2"]
1687 - atom_n2 = [ atom.get_coord() for atom in res if "N2" in atom.get_fullname() ]
1688 - atom_o2 = [ atom.get_coord() for atom in res if atom.get_name() == "O2"]
1689 - atom_n3 = [ atom.get_coord() for atom in res if "N3" in atom.get_fullname() ]
1690 - atom_c4 = [ atom.get_coord() for atom in res if atom.get_name() == "C4" ]
1691 - atom_n4 = [ atom.get_coord() for atom in res if "N4" in atom.get_fullname() ]
1692 - atom_o4 = [ atom.get_coord() for atom in res if atom.get_name() == "O4"]
1693 -
1694 - op3_p = get_euclidian_distance(atom_op3, atom_p)
1695 - last_o3p_p = get_euclidian_distance(last_o3p, atom_p) # link with the previous nucleotide
1696 - p_op1 = get_euclidian_distance(atom_op1, atom_p)
1697 - p_op2 = get_euclidian_distance(atom_op2, atom_p)
1698 - p_o5p = get_euclidian_distance(atom_o5p, atom_p)
1699 - o5p_c5p = get_euclidian_distance(atom_o5p, atom_c5p)
1700 - c5p_c4p = get_euclidian_distance(atom_c5p, atom_c4p)
1701 - c4p_o4p = get_euclidian_distance(atom_c4p, atom_o4p)
1702 - c4p_c3p = get_euclidian_distance(atom_c4p, atom_c3p)
1703 - o4p_c1p = get_euclidian_distance(atom_o4p, atom_c1p)
1704 - c1p_c2p = get_euclidian_distance(atom_c1p, atom_c2p)
1705 - c2p_o2p = get_euclidian_distance(atom_c2p, atom_o2p)
1706 - c2p_c3p = get_euclidian_distance(atom_c2p, atom_c3p)
1707 - c3p_o3p = get_euclidian_distance(atom_c3p, atom_o3p)
1708 -
1709 - last_o3p=atom_o3p # o3' of this residue becomes the previous o3' of the following
1710 -
1711 - #different cases for the aromatic cycles
1712 - if res.get_resname()=='A' or res.get_resname()=='G':
1713 - # computes the distances between atoms of aromatic cycles
1714 - c1p_n9 = get_euclidian_distance(atom_c1p, atom_n9)
1715 - n9_c8 = get_euclidian_distance(atom_n9, atom_c8)
1716 - c8_n7 = get_euclidian_distance(atom_c8, atom_n7)
1717 - n7_c5 = get_euclidian_distance(atom_n7, atom_c5)
1718 - c5_c6 = get_euclidian_distance(atom_c5, atom_c6)
1719 - c6_o6 = get_euclidian_distance(atom_c6, atom_o6)
1720 - c6_n6 = get_euclidian_distance(atom_c6, atom_n6)
1721 - c6_n1 = get_euclidian_distance(atom_c6, atom_n1)
1722 - n1_c2 = get_euclidian_distance(atom_n1, atom_c2)
1723 - c2_n2 = get_euclidian_distance(atom_c2, atom_n2)
1724 - c2_n3 = get_euclidian_distance(atom_c2, atom_n3)
1725 - n3_c4 = get_euclidian_distance(atom_n3, atom_c4)
1726 - c4_n9 = get_euclidian_distance(atom_c4, atom_n9)
1727 - c4_c5 = get_euclidian_distance(atom_c4, atom_c5)
1728 - if res.get_resname()=='C' or res.get_resname()=='U' :
1729 - c1p_n1 = get_euclidian_distance(atom_c1p, atom_n1)
1730 - n1_c6 = get_euclidian_distance(atom_n1, atom_c6)
1731 - c6_c5 = get_euclidian_distance(atom_c6, atom_c5)
1732 - c5_c4 = get_euclidian_distance(atom_c5, atom_c4)
1733 - c4_n3 = get_euclidian_distance(atom_c4, atom_n3)
1734 - n3_c2 = get_euclidian_distance(atom_n3, atom_c2)
1735 - c2_o2 = get_euclidian_distance(atom_c2, atom_o2)
1736 - c2_n1 = get_euclidian_distance(atom_c2, atom_n1)
1737 - c4_n4 = get_euclidian_distance(atom_c4, atom_n4)
1738 - c4_o4 = get_euclidian_distance(atom_c4, atom_o4)
1739 -
1740 - liste_common.append([res.get_resname(), last_o3p_p, op3_p, p_op1, p_op2, p_o5p, o5p_c5p, c5p_c4p, c4p_o4p, c4p_c3p, o4p_c1p, c1p_c2p, c2p_o2p, c2p_c3p, c3p_o3p] )
1741 - liste_purines.append([c1p_n9, n9_c8, c8_n7, n7_c5, c5_c6, c6_o6, c6_n6, c6_n1, n1_c2, c2_n2, c2_n3, n3_c4, c4_n9, c4_c5])
1742 - liste_pyrimidines.append([c1p_n1, n1_c6, c6_c5, c5_c4, c4_n3, n3_c2, c2_o2, c2_n1, c4_n4, c4_o4])
1743 - pbar.update(1)
1744 -
1745 - df_comm=pd.DataFrame(liste_common, columns=["Residu", "O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'"])
1746 - df_pur=pd.DataFrame(liste_purines, columns=["C1'-N9", "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5" ])
1747 - df_pyr=pd.DataFrame(liste_pyrimidines, columns=["C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"])
1748 - df=pd.concat([df_comm, df_pur, df_pyr], axis = 1)
1749 - pbar.close()
1750 -
1751 - df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms_" + name + ".csv")
1752 -
1753 -@trace_unhandled_exceptions
1754 -def measures_hrna(name, s, thr_idx):
1755 - """
1756 - Measures the distance/angles between the atoms of the HiRE-RNA model linked by covalent bonds
1757 - """
1758 -
1759 - # do not recompute something already computed
1760 - if (path.isfile(runDir + '/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA '+name+'.csv') and
1761 - path.isfile(runDir + '/results/geometry/HiRE-RNA/angles/angles_hire_RNA '+name+'.csv') and
1762 - path.isfile(runDir + '/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA '+name+'.csv')):
1763 - return
1764 -
1765 - liste_dist=[]
1766 - liste_angl = []
1767 - liste_tors = []
1768 - last_c4p = []
1769 - last_c5p = []
1770 - last_c1p = []
1771 - last_o5p = []
1772 -
1773 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_hrna({name})")
1774 -
1775 - chain = next(s[0].get_chains())
1776 - residues=list(chain.get_residues())
1777 - for res in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna", unit="res", leave=False):
1778 - # distances
1779 - p_o5p = None
1780 - o5p_c5p = None
1781 - c5p_c4p = None
1782 - c4p_c1p = None
1783 - c1p_b1 = None
1784 - b1_b2 = None
1785 - last_c4p_p = np.nan
1786 -
1787 - # angles
1788 - p_o5p_c5p = None
1789 - o5p_c5p_c4p = None
1790 - c5p_c4p_c1p = None
1791 - c4p_c1p_b1 = None
1792 - c1p_b1_b2 = None
1793 - lastc4p_p_o5p = None
1794 - lastc5p_lastc4p_p = None
1795 - lastc1p_lastc4p_p = None
1796 -
1797 - # torsions
1798 - p_o5_c5_c4 = np.nan
1799 - o5_c5_c4_c1 = np.nan
1800 - c5_c4_c1_b1 = np.nan
1801 - c4_c1_b1_b2 = np.nan
1802 - o5_c5_c4_psuiv = np.nan
1803 - c5_c4_psuiv_o5suiv = np.nan
1804 - c4_psuiv_o5suiv_c5suiv = np.nan
1805 - c1_c4_psuiv_o5suiv = np.nan
1806 -
1807 - if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] : # several phosphate groups, ignore
1808 - atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"]
1809 - atom_o5p= [ atom.get_coord() for atom in res if "O5'" in atom.get_fullname() ]
1810 - atom_c5p = [ atom.get_coord() for atom in res if "C5'" in atom.get_fullname() ]
1811 - atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ]
1812 - atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ]
1813 - atom_b1 = pos_b1(res) # position b1 to be calculated, depending on the case
1814 - atom_b2 = pos_b2(res) # position b2 to be calculated only for those with 2 cycles
1815 -
1816 - # Distances. If one of the atoms is empty, the euclidian distance returns NaN.
1817 - last_c4p_p = get_euclidian_distance(last_c4p, atom_p)
1818 - p_o5p = get_euclidian_distance(atom_p, atom_o5p)
1819 - o5p_c5p = get_euclidian_distance(atom_o5p, atom_c5p)
1820 - c5p_c4p = get_euclidian_distance(atom_c5p, atom_c4p)
1821 - c4p_c1p = get_euclidian_distance(atom_c4p, atom_c1p)
1822 - c1p_b1 = get_euclidian_distance(atom_c1p, atom_b1)
1823 - b1_b2 = get_euclidian_distance(atom_b1, atom_b2)
1824 -
1825 - # flat angles. Same.
1826 - lastc4p_p_o5p = get_flat_angle(last_c4p, atom_p, atom_o5p)
1827 - lastc1p_lastc4p_p = get_flat_angle(last_c1p, last_c4p, atom_p)
1828 - lastc5p_lastc4p_p = get_flat_angle(last_c5p, last_c4p, atom_p)
1829 - p_o5p_c5p = get_flat_angle(atom_p, atom_o5p, atom_c5p)
1830 - o5p_c5p_c4p = get_flat_angle(atom_o5p, atom_c5p, atom_c4p)
1831 - c5p_c4p_c1p = get_flat_angle(atom_c5p, atom_c4p, atom_c1p)
1832 - c4p_c1p_b1 = get_flat_angle(atom_c4p, atom_c1p, atom_b1)
1833 - c1p_b1_b2 = get_flat_angle(atom_c1p, atom_b1, atom_b2)
1834 -
1835 - # torsions. Idem.
1836 - p_o5_c5_c4 = get_torsion_angle(atom_p, atom_o5p, atom_c5p, atom_c4p)
1837 - o5_c5_c4_c1 = get_torsion_angle(atom_o5p, atom_c5p, atom_c4p, atom_c1p)
1838 - c5_c4_c1_b1 = get_torsion_angle(atom_c5p, atom_c4p, atom_c1p, atom_b1)
1839 - c4_c1_b1_b2 = get_torsion_angle(atom_c4p, atom_c1p, atom_b1, atom_b2)
1840 - o5_c5_c4_psuiv = get_torsion_angle(last_o5p, last_c5p, last_c4p, atom_p)
1841 - c5_c4_psuiv_o5suiv = get_torsion_angle(last_c5p, last_c4p, atom_p, atom_o5p)
1842 - c4_psuiv_o5suiv_c5suiv = get_torsion_angle(last_c4p, atom_p, atom_o5p, atom_c5p)
1843 - c1_c4_psuiv_o5suiv = get_torsion_angle(last_c1p, last_c4p, atom_p, atom_o5p)
1844 -
1845 - last_c4p = atom_c4p
1846 - last_c5p = atom_c5p
1847 - last_c1p = atom_c1p
1848 - last_o5p = atom_o5p
1849 - liste_dist.append([res.get_resname(), last_c4p_p, p_o5p, o5p_c5p, c5p_c4p, c4p_c1p, c1p_b1, b1_b2])
1850 - liste_angl.append([res.get_resname(), lastc4p_p_o5p, lastc1p_lastc4p_p, lastc5p_lastc4p_p, p_o5p_c5p, o5p_c5p_c4p, c5p_c4p_c1p, c4p_c1p_b1, c1p_b1_b2])
1851 - liste_tors.append([res.get_resname(), p_o5_c5_c4, o5_c5_c4_c1, c5_c4_c1_b1, c4_c1_b1_b2, o5_c5_c4_psuiv, c5_c4_psuiv_o5suiv, c4_psuiv_o5suiv_c5suiv, c1_c4_psuiv_o5suiv])
1852 - df = pd.DataFrame(liste_dist, columns=["Residu", "C4'-P", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2"])
1853 - df.to_csv(runDir + '/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA '+name+'.csv')
1854 - df = pd.DataFrame(liste_angl, columns=["Residu", "C4'-P-O5'", "C1'-C4'-P", "C5'-C4'-P", "P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2"])
1855 - df.to_csv(runDir + '/results/geometry/HiRE-RNA/angles/angles_hire_RNA ' + name + ".csv")
1856 - df=pd.DataFrame(liste_tors, columns=["Residu", "P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "O5'-C5'-C4'-P°", "C5'-C4'-P°-O5'°", "C4'-P°-O5'°-C5'°", "C1'-C4'-P°-O5'°"])
1857 - df.to_csv(runDir + '/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA '+name+'.csv')
1858 -
1859 -@trace_unhandled_exceptions
1860 -def measures_hrna_basepairs(name, s, thr_idx):
1861 - """
1862 - Open a rna_only/ file, and run measures_hrna_basepairs_chain() on every chain
1863 - """
1864 -
1865 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_hrna_basepairs({name})")
1866 -
1867 - l=[]
1868 - chain = next(s[0].get_chains())
1869 -
1870 - # do not recompute something already computed
1871 - if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs_"+name+".csv"):
1872 - return
1873 -
1874 - df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name))
1875 -
1876 - if df['index_chain'][0] == 1: # ignore files with numbering errors : TODO : remove when we get DSSR Pro, there should not be numbering errors anymore
1877 - l = measures_hrna_basepairs_chain(name, chain, df, thr_idx)
1878 - df_calc = pd.DataFrame(l, columns=["type_LW", "nt1_idx", "nt1_res", "nt2_idx", "nt2_res", "Distance",
1879 - "211_angle", "112_angle", "dB1", "dB2", "alpha1", "alpha2", "3211_torsion", "1123_torsion"])
1880 - df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs_' + name + '.csv', float_format="%.3f")
1881 -
1882 -@trace_unhandled_exceptions
1883 -def measures_hrna_basepairs_chain(name, chain, df, thr_idx):
1884 - """
1885 - Cleanup of the dataset
1886 - measurements of distances and angles between paired nucleotides in the chain
1887 - """
1888 -
1889 - results = []
1890 - warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
1891 -
1892 - pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep
1893 - for i in range(pairs.shape[0]): # we remove the lines where no pairing (NaN in paired)
1894 - index_with_nan = pairs.index[pairs.iloc[:,2].isnull()]
1895 - pairs.drop(index_with_nan, 0, inplace=True)
1896 -
1897 - paired_int = []
1898 - for i in pairs.index: # convert values ​​from paired to integers or lists of integers
1899 - paired = pairs.at[i, 'paired']
1900 - if type(paired) is np.int64 or type(paired) is np.float64:
1901 - paired_int.append(int(paired))
1902 - else : #strings
1903 - if len(paired) < 3: # a single pairing
1904 - paired_int.append(int(paired))
1905 - else : # several pairings
1906 - paired = paired.split(',')
1907 - l = [ int(i) for i in paired ]
1908 - paired_int.append(l)
1909 -
1910 - pair_type_LW_bis = []
1911 - for j in pairs.index:
1912 - pair_type_LW = pairs.at[j, 'pair_type_LW']
1913 - if len(pair_type_LW) < 4 : # a single pairing
1914 - pair_type_LW_bis.append(pair_type_LW)
1915 - else : # several pairings
1916 - pair_type_LW = pair_type_LW.split(',')
1917 - l = [ i for i in pair_type_LW ]
1918 - pair_type_LW_bis.append(pair_type_LW)
1919 -
1920 - # addition of these new columns
1921 - pairs.insert(4, "paired_int", paired_int, True)
1922 - pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True)
1923 -
1924 - indexNames = pairs[pairs['paired_int'] == 0].index
1925 - pairs.drop(indexNames, inplace=True) # deletion of lines with a 0 in paired_int (matching to another RNA chain)
1926 -
1927 - for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna_basepairs_chain", unit="res", leave=False):
1928 - # calculations for each row of the pairs dataset
1929 - index = pairs.at[i, 'index_chain']
1930 - res1 = chain[(' ', index, ' ')].get_resname()
1931 - if res1 not in ['A','C','G','U']:
1932 - continue
1933 - type_LW = pairs.at[i, 'pair_type_LW_bis'] # pairing type
1934 - num_paired = pairs.at[i, 'paired_int'] # number (index_chain) of the paired nucleotide
1935 -
1936 - if type(num_paired) is int or type(num_paired) is np.int64:
1937 - res2 = chain[(' ', num_paired, ' ')].get_resname()
1938 - if res2 not in ["A","C","G","U"]:
1939 - continue
1940 - measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')])
1941 - if measures is not None:
1942 - results.append([type_LW, index, res1, num_paired, res2] + measures)
1943 - else:
1944 - for j in range(len(num_paired)): # if several pairings, process them one by one
1945 - if num_paired[j] != 0:
1946 - res2 = chain[(' ', num_paired[j], ' ')].get_resname()
1947 - if res2 not in ["A","C","G","U"]:
1948 - continue
1949 - measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')])
1950 - if measures is not None:
1951 - results.append([type_LW[j], index, res1, num_paired[j], res2] + measures)
1952 -
1953 - return results
1954 -
1955 -@trace_unhandled_exceptions
1956 -def pyle_measures(name, s, thr_idx):
1957 -
1958 - if (path.isfile(runDir + '/results/geometry/Pyle/distances/distances_pyle_'+name+'.csv')):
1959 - return
1960 -
1961 - liste_dist=[]
1962 - #classes=[]
1963 - #for i in range(0, 150, 5):
1964 - #classes.append([i, i+5])
1965 - #classes.append([150, 300])
1966 - #occur_p_p=len(classes)*[0]
1967 - #occur_p_c1=len(classes)*[0]
1968 - #occur_p_c4=len(classes)*[0]
1969 - #occur_c1_c1=len(classes)*[0]
1970 - #occur_c4_c4=len(classes)*[0]
1971 - #nb_occurs=[]
1972 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} pyle_measures({name})")
1973 -
1974 - chain = next(s[0].get_chains())
1975 - #residues=list(chain.get_residues())
1976 - for res1 in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} pyle_measures", unit="res", leave=False):
1977 - #res1=chain[i]
1978 - if res1.get_resname() in ["A", "C", "G", "U"]:
1979 - resnum1=list(res1.get_id())[1]
1980 - atom_p_1 = [ atom.get_coord() for atom in res1 if atom.get_name() == "P"]
1981 - atom_c1p_1 = [ atom.get_coord() for atom in res1 if "C1'" in atom.get_fullname() ]
1982 - atom_c4p_1 = [ atom.get_coord() for atom in res1 if "C4'" in atom.get_fullname() ]
1983 - for res2 in chain:
1984 - resnum2=list(res2.get_id())[1]
1985 - if resnum2-resnum1 < 4 :
1986 - continue
1987 - p_p=np.nan
1988 - p_c4p=np.nan
1989 - p_c1p=np.nan
1990 - c4p_c4p=np.nan
1991 - c1p_c1p=np.nan
1992 - #res2=chain[j]
1993 - if res2.get_resname() in ["A", "C", "G", "U"]:
1994 -
1995 - atom_p_2 = [ atom.get_coord() for atom in res2 if atom.get_name() == "P"]
1996 - atom_c1p_2 = [ atom.get_coord() for atom in res2 if "C1'" in atom.get_fullname() ]
1997 - atom_c4p_2 = [ atom.get_coord() for atom in res2 if "C4'" in atom.get_fullname() ]
1998 -
1999 - p_p = get_euclidian_distance(atom_p_1, atom_p_2)
2000 - p_c4p= get_euclidian_distance(atom_p_1, atom_c4p_2)
2001 - p_c1p= get_euclidian_distance(atom_p_1, atom_c1p_2)
2002 - c4p_c4p= get_euclidian_distance(atom_c4p_1, atom_c4p_2)
2003 - c1p_c1p= get_euclidian_distance(atom_c1p_1, atom_c1p_2)
2004 -
2005 - liste_dist.append([res1.get_resname(), int(resnum1), res2.get_resname(), int(resnum2), p_p, p_c4p, p_c1p, c4p_c4p, c1p_c1p])
2006 - '''
2007 - for x in range(len(classes)):
2008 - if classes[x][0] <= p_p <= classes[x][1]:
2009 - occur_p_p[x]=occur_p_p[x]+1
2010 - if classes[x][0] <= p_c4p <= classes[x][1]:
2011 - occur_p_c4[x]=occur_p_c4[x]+1
2012 - if classes[x][0] <= p_c1p <= classes[x][1]:
2013 - occur_p_c1[x]=occur_p_c1[x]+1
2014 - if classes[x][0] <= c4p_c4p <= classes[x][1]:
2015 - occur_c4_c4[x]=occur_c4_c4[x]+1
2016 - if classes[x][0] <= c1p_c1p <= classes[x][1]:
2017 - occur_c1_c1[x]=occur_c1_c1[x]+1
2018 - '''
2019 - #for x in range(len(classes)):
2020 - # for i in range(len(liste_dist)):
2021 - # if classes[x][0] <= liste_dist[i][4] <= classes[x][1]:
2022 - # occur_p_p[x]=occur_p_p[x]+1
2023 - # if classes[x][0] <= liste_dist[i][5] <= classes[x][1]:
2024 - # occur_p_c4[x]=occur_p_c4[x]+1
2025 - # if classes[x][0] <= liste_dist[i][6] <= classes[x][1]:
2026 - # occur_p_c1[x]=occur_p_c1[x]+1
2027 - # if classes[x][0] <= liste_dist[i][7] <= classes[x][1]:
2028 - # occur_c4_c4[x]=occur_c4_c4[x]+1
2029 - # if classes[x][0] <= liste_dist[i][8] <= classes[x][1]:
2030 - # occur_c1_c1[x]=occur_c1_c1[x]+1
2031 - #nb_occurs.append([classes[x], occur_p_p[x], occur_p_c1[x], occur_p_c4[x], occur_c1_c1[x], occur_c4_c4[x]])
2032 - #df = pd.DataFrame(nb_occurs, columns=["classe", "P-P", "P-C1'", "P-C4'", "C1'-C1'", "C4'-C4'"])
2033 - # return df
2034 - # nb_occurs.append([classes, occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4])
2035 - # print(nb_occurs)
2036 - # return nb_occurs
2037 -
2038 -
2039 - df = pd.DataFrame(liste_dist, columns=["res1", "resnum1", "res2", "resnum2", "P-P", "P-C4'", "P-C1'", "C4'-C4'", "C1'-C1'"])
2040 - df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_pyle_" + name + ".csv")
2041 -
2042 -@trace_unhandled_exceptions
2043 -def count_occur_pyle_dist(fpath):
2044 -
2045 - global idxQueue
2046 - thr_idx = idxQueue.get()
2047 - setproctitle(f"Worker {thr_idx+1} : Extract occurences of {fpath}")
2048 -
2049 - liste=os.listdir(fpath)
2050 - pbar = tqdm(total=len(liste), position=thr_idx, desc="Preparing ", leave=False)
2051 - df = pd.read_csv(os.path.abspath(fpath + liste.pop()))
2052 - occur_p_p=list(df["P-P"])
2053 - occur_p_c1=list(df["P-C1'"])
2054 - occur_p_c4=list(df["P-C4'"])
2055 - occur_c1_c1=list(df["C1'-C1'"])
2056 - occur_c4_c4=list(df["C4'-C4'"])
2057 - nb_occurs=[]
2058 - for f in range(len(liste)):
2059 - df = pd.read_csv(os.path.abspath(fpath + liste.pop()))
2060 - # print(liste[f])
2061 - for k in range(df.shape[0]):
2062 - occur_p_p[k]=occur_p_p[k]+df["P-P"][k]
2063 - occur_p_c1[k]=occur_p_c1[k]+df["P-C1'"][k]
2064 - occur_p_c4[k]=occur_p_c4[k]+df["P-C4'"][k]
2065 - occur_c1_c1[k]=occur_c1_c1[k]+df["C1'-C1'"][k]
2066 - occur_c4_c4[k]=occur_c4_c4[k]+df["C4'-C4'"][k]
2067 - pbar.update(1)
2068 - nb_occurs=[occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4]
2069 - # return(list(df["classe"]), occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4)
2070 - df = pd.DataFrame(nb_occurs, columns=list(df["classe"]))
2071 -
2072 - df.to_csv(runDir + "/results/geometry/Pyle/classes_dist/occurences_dist.csv")
2073 - idxQueue.put(thr_idx) # replace the thread index in the queue
2074 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
2075 -
2076 -
2077 -@trace_unhandled_exceptions
2078 -def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) :
2079 - """
2080 - Plot Gaussian-Mixture-Model (with or without histograms)
2081 - """
2082 - data_ori = np.array(data_ori)
2083 -
2084 - if toric:
2085 - # Extend the data on the right and on the left (for angles)
2086 - data = np.concatenate([data_ori, data_ori-360.0, data_ori+360.0])
2087 - else:
2088 - data = data_ori
2089 -
2090 - # chooses the number of components based on the maximum likelihood value (maxlogv)
2091 - n_components_range = np.arange(8)+1
2092 - # aic = []
2093 - # bic = []
2094 - maxlogv=[]
2095 - md = np.array(data).reshape(-1,1)
2096 - nb_components = 1
2097 - nb_log_max = n_components_range[0]
2098 - log_max = 0
2099 - for n_comp in n_components_range:
2100 - gmm = GaussianMixture(n_components=n_comp).fit(md)
2101 - # aic.append(abs(gmm.aic(md)))
2102 - # bic.append(abs(gmm.bic(md)))
2103 - maxlogv.append(gmm.lower_bound_)
2104 - if gmm.lower_bound_== max(maxlogv) : # takes the maximum
2105 - nb_components = n_comp
2106 - # if there is convergence, keep the first maximum found
2107 - if abs(gmm.lower_bound_-log_max) < 0.02 : #threshold=0.02
2108 - nb_components = nb_log_max
2109 - break
2110 - log_max = max(maxlogv)
2111 - nb_log_max = n_comp
2112 -
2113 -
2114 - # Now compute the final GMM
2115 - obs = np.array(data).reshape(-1,1) # still on extended data
2116 - g = GaussianMixture(n_components=nb_components)
2117 - g.fit(obs)
2118 -
2119 - if toric:
2120 - # Now decide which to keep
2121 - keep = []
2122 - weights = []
2123 - means = []
2124 - covariances = []
2125 - sum_weights = 0.0
2126 - for m in g.means_:
2127 - keep.append(m > -180 and m <= 180)
2128 - for i, w in enumerate(g.weights_):
2129 - if not keep[i]:
2130 - continue
2131 - sum_weights += w
2132 - for i in range(nb_components):
2133 - if not keep[i]:
2134 - continue
2135 - means.append(g.means_[i])
2136 - covariances.append(g.covariances_[i])
2137 - weights.append(g.weights_[i]/sum_weights)
2138 - nb_components = len(means)
2139 - else:
2140 - weights = g.weights_
2141 - means = g.means_
2142 - covariances = g.covariances_
2143 -
2144 - # plot histograms if asked, with the appropriate number of components
2145 - if hist:
2146 - plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True)
2147 - if toric:
2148 - plt.xlabel("Angle (Degrees)")
2149 - else:
2150 - plt.xlabel("Distance (Angströms)")
2151 - plt.ylabel("Density")
2152 -
2153 - # Prepare the GMM curve with some absciss points
2154 - if toric:
2155 - x = np.linspace(-360.0,360.0,721)
2156 - else:
2157 - D = obs.ravel()
2158 - xmin = D.min()
2159 - #xmax = min(10.0, D.max())
2160 - xmax = D.max()
2161 - x = np.linspace(xmin,xmax,1000)
2162 - colors=['red', 'blue', 'gold', 'cyan', 'magenta', 'white', 'black', 'green']
2163 -
2164 - # prepare the dictionary to save the parameters
2165 - summary_data = {}
2166 - summary_data["measure"] = name_data
2167 - summary_data["weights"] = []
2168 - summary_data["means"] = []
2169 - summary_data["std"] = []
2170 -
2171 - # plot
2172 - curves = []
2173 - for i in range(nb_components):
2174 -
2175 - # store the parameters
2176 - mean = means[i]
2177 - sigma = np.sqrt(covariances[i])
2178 - weight = weights[i]
2179 - summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]"))))
2180 - summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]"))))
2181 - summary_data["weights"].append("{:.2f}".format(float(str(weight).strip("[]"))))
2182 -
2183 - # compute the right x and y data to plot
2184 - y = weight*st.norm.pdf(x, mean, sigma)
2185 - if toric:
2186 - y_mod = (((y[0]+180.0)%360.0)-180.0)
2187 - x_mod = (((x+180.0)%360.0)-180.0)
2188 - s = sorted(zip(x_mod,y_mod))
2189 - newx = []
2190 - newy = []
2191 - for k in range(0, len(s), 2):
2192 - if k == 362.0:
2193 - continue # this value is dealt with when k = 360.0
2194 - # print(k, "summing: ", s[k-int(k>360)], s[k+1-int(k>360)])
2195 - newx.append(s[k-int(k>360)][0])
2196 - if k == 360.0:
2197 - newy.append(s[k][1]+s[k+1][1]+s[k+2][1])
2198 - else:
2199 - newy.append(s[k-int(k>360)][1]+s[k+1-int(k>360)][1])
2200 - else:
2201 - newx = x
2202 - newy = y[0]
2203 -
2204 - if hist:
2205 - # plot on top of the histograms
2206 - plt.plot(newx, newy, c=colors[i])
2207 - else:
2208 - # store for later summation
2209 - curves.append(np.array(newy))
2210 -
2211 - if hist:
2212 - plt.title(f"Histogram of {name_data} with GMM of {nb_components} components (" + str(len(data_ori))+" values)")
2213 - if save:
2214 - plt.savefig(f"Histogram_{name_data}_{nb_components}_comps.png")
2215 - plt.close()
2216 - else:
2217 - # Plot their sum, do not save figure yet
2218 - try:
2219 - plt.plot(newx, sum(curves), c=col, label=name_data)
2220 - except TypeError:
2221 - print("N curves:", len(curves))
2222 - for c in curves:
2223 - print(c)
2224 - plt.legend()
2225 -
2226 - # Save the json
2227 - with open(runDir + "/results/geometry/json/" +name_data + ".json", 'w', encoding='utf-8') as f:
2228 - json.dump(summary_data, f, indent=4)
2229 -
2230 -@trace_unhandled_exceptions
2231 -def gmm_aa_dists():
2232 - """
2233 - Draw the figures representing the data on the measurements of distances between atoms
2234 - """
2235 -
2236 - setproctitle("GMM (all atoms, distances)")
2237 -
2238 - df=pd.read_csv(os.path.abspath(runDir + "/results/geometry/all-atoms/distances/dist_atoms.csv"))
2239 -
2240 - last_o3p_p=list(df["O3'-P"][~ np.isnan(df["O3'-P"])])
2241 - #print(last_o3p_p)
2242 - op3_p=list(df["OP3-P"][~ np.isnan(df["OP3-P"])])
2243 - p_op1=list(df["P-OP1"][~ np.isnan(df["P-OP1"])])
2244 - p_op2=list(df["P-OP2"][~ np.isnan(df["P-OP2"])])
2245 - p_o5p=list(df["P-O5'"][~ np.isnan(df["P-O5'"])])
2246 - o5p_c5p=list(df["O5'-C5'"][~ np.isnan(df["O5'-C5'"])])
2247 - c5p_c4p=list(df["C5'-C4'"][~ np.isnan(df["C5'-C4'"])])
2248 - c4p_o4p=list(df["C4'-O4'"][~ np.isnan(df["C4'-O4'"])])
2249 - o4p_c1p=list(df["O4'-C1'"][~ np.isnan(df["O4'-C1'"])])
2250 - c1p_c2p=list(df["C1'-C2'"][~ np.isnan(df["C1'-C2'"])])
2251 - c2p_o2p=list(df["C2'-O2'"][~ np.isnan(df["C2'-O2'"])])
2252 - c2p_c3p=list(df["C2'-C3'"][~ np.isnan(df["C2'-C3'"])])
2253 - c3p_o3p=list(df["C3'-O3'"][~ np.isnan(df["C3'-O3'"])])
2254 - c4p_c3p=list(df["C4'-C3'"][~ np.isnan(df["C4'-C3'"])])
2255 -
2256 - #if res = A ou G
2257 - c1p_n9=list(df["C1'-N9"][~ np.isnan(df["C1'-N9"])])
2258 - n9_c8=list(df["N9-C8"][~ np.isnan(df["N9-C8"])])
2259 - c8_n7=list(df["C8-N7"][~ np.isnan(df["C8-N7"])])
2260 - n7_c5=list(df["N7-C5"][~ np.isnan(df["N7-C5"])])
2261 - c5_c6=list(df["C5-C6"][~ np.isnan(df["C5-C6"])])
2262 - c6_n1=list(df["C6-N1"][~ np.isnan(df["C6-N1"])])
2263 - n1_c2=list(df["N1-C2"][~ np.isnan(df["N1-C2"])])
2264 - c2_n3=list(df["C2-N3"][~ np.isnan(df["C2-N3"])])
2265 - n3_c4=list(df["N3-C4"][~ np.isnan(df["N3-C4"])])
2266 - c4_n9=list(df["C4-N9"][~ np.isnan(df["C4-N9"])])
2267 - c4_c5=list(df["C4-C5"][~ np.isnan(df["C4-C5"])])
2268 - #if res=G
2269 - c6_o6=list(df["C6-O6"][~ np.isnan(df["C6-O6"])])
2270 - c2_n2=list(df["C2-N2"][~ np.isnan(df["C2-N2"])])
2271 - #if res = A
2272 - c6_n6=list(df["C6-N6"][~ np.isnan(df["C6-N6"])])
2273 -
2274 - #if res = C ou U
2275 - c1p_n1=list(df["C1'-N1"][~ np.isnan(df["C1'-N1"])])
2276 - n1_c6=list(df["N1-C6"][~ np.isnan(df["N1-C6"])])
2277 - c6_c5=list(df["C6-C5"][~ np.isnan(df["C6-C5"])])
2278 - c5_c4=list(df["C5-C4"][~ np.isnan(df["C5-C4"])])
2279 - c4_n3=list(df["C4-N3"][~ np.isnan(df["C4-N3"])])
2280 - n3_c2=list(df["N3-C2"][~ np.isnan(df["N3-C2"])])
2281 - c2_n1=list(df["C2-N1"][~ np.isnan(df["C2-N1"])])
2282 - c2_o2=list(df["C2-O2"][~ np.isnan(df["C2-O2"])])
2283 - #if res =C
2284 - c4_n4=list(df["C4-N4"][~ np.isnan(df["C4-N4"])])
2285 - #if res=U
2286 - c4_o4=list(df["C4-O4"][~ np.isnan(df["C4-O4"])])
2287 -
2288 - os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/commun/", exist_ok=True)
2289 - os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/commun/")
2290 - # draw figures for atoms common to all nucleotides
2291 - GMM_histo(last_o3p_p, "O3'-P")
2292 - if len(op3_p) > 0 :
2293 - GMM_histo(op3_p, "OP3-P")
2294 - GMM_histo(p_op1, "P-OP1")
2295 - GMM_histo(p_op2, "P-OP2")
2296 -
2297 - GMM_histo(p_o5p, "P-O5'")
2298 - GMM_histo(o5p_c5p, "O5'-C5'")
2299 - GMM_histo(c5p_c4p, "C5'-C4'")
2300 - GMM_histo(c4p_o4p, "C4'-O4'")
2301 - GMM_histo(c4p_c3p, "C4'-C3'")
2302 - GMM_histo(c3p_o3p, "C3'-O3'")
2303 - GMM_histo(o4p_c1p, "O4'-C1'")
2304 - GMM_histo(c1p_c2p, "C1'-C2'")
2305 - GMM_histo(c2p_c3p, "C2'-C3'")
2306 - GMM_histo(c2p_o2p, "C2'-O2'")
2307 -
2308 - if len(op3_p) > 0 :
2309 - GMM_histo(op3_p, "OP3-P", toric=False, hist=False, col= 'lightcoral')
2310 - GMM_histo(p_op1, "P-OP1", toric=False, hist=False, col='gold')
2311 - GMM_histo(p_op2, "P-OP2", toric=False, hist=False, col='lightseagreen')
2312 - GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, col='saddlebrown')
2313 - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkturquoise')
2314 - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='darkkhaki')
2315 - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='indigo')
2316 - GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, col='maroon')
2317 - GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, col='burlywood')
2318 - GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, col='steelblue')
2319 - GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, col='tomato')
2320 - GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, col='darkolivegreen')
2321 - GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, col='orchid')
2322 - GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, col='deeppink')
2323 - axes=plt.gca()
2324 - axes.set_ylim(0, 100)
2325 - plt.xlabel("Distance (Angströms)")
2326 - plt.title("GMM of distances between common atoms ")
2327 - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM_distances_common_atoms.png")
2328 - plt.close()
2329 -
2330 - os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True)
2331 - os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/purines/")
2332 - # purines
2333 - GMM_histo(c1p_n9, "C1'-N9")
2334 - GMM_histo(n9_c8, "N9-C8")
2335 - GMM_histo(c8_n7, "C8-N7")
2336 - GMM_histo(n7_c5, "N7-C5")
2337 - GMM_histo(c5_c6, "C5-C6")
2338 - GMM_histo(c6_o6, "C6-O6")
2339 - GMM_histo(c6_n6, "C6-N6")
2340 - GMM_histo(c6_n1, "C6-N1")
2341 - GMM_histo(n1_c2, "N1-C2")
2342 - GMM_histo(c2_n2, "C2-N2")
2343 - GMM_histo(c2_n3, "C2-N3")
2344 - GMM_histo(n3_c4, "N3-C4")
2345 - GMM_histo(c4_n9, "C4-N9")
2346 - GMM_histo(c4_c5, "C4-C5")
2347 -
2348 - GMM_histo(c1p_n9, "C1'-N9", hist=False, col='lightcoral')
2349 - GMM_histo(n9_c8, "N9-C8", hist=False, col='gold')
2350 - GMM_histo(c8_n7, "C8-N7", hist=False, col='lightseagreen')
2351 - GMM_histo(n7_c5, "N7-C5", hist=False, col='saddlebrown')
2352 - GMM_histo(c5_c6, "C5-C6", hist=False, col='darkturquoise')
2353 - GMM_histo(c6_o6, "C6-O6", hist=False, col='darkkhaki')
2354 - GMM_histo(c6_n6, "C6-N6", hist=False, col='indigo')
2355 - GMM_histo(c6_n1, "C6-N1", hist=False, col='maroon')
2356 - GMM_histo(n1_c2, "N1-C2", hist=False, col='burlywood')
2357 - GMM_histo(c2_n2, "C2-N2", hist=False, col='steelblue')
2358 - GMM_histo(c2_n3, "C2-N3", hist=False, col='tomato')
2359 - GMM_histo(n3_c4, "N3-C4", hist=False, col='darkolivegreen')
2360 - GMM_histo(c4_n9, "C4-N9", hist=False, col='orchid')
2361 - GMM_histo(c4_c5, "C4-C5", hist=False, col='deeppink')
2362 - axes=plt.gca()
2363 - axes.set_ylim(0, 100)
2364 - plt.xlabel("Distance (Angströms)")
2365 - plt.title("GMM of distances between atoms of the purine cycles", fontsize=10)
2366 - plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM_distances_purine_cycles.png")
2367 - plt.close()
2368 -
2369 - os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True)
2370 - os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/")
2371 - # pyrimidines
2372 -
2373 - GMM_histo(c1p_n1, "C1'-N1")
2374 - GMM_histo(n1_c6, "N1-C6")
2375 - GMM_histo(c6_c5, "C6-C5")
2376 - GMM_histo(c5_c4, "C5-C4")
2377 - GMM_histo(c4_n3, "C4-N3")
2378 - GMM_histo(n3_c2, "N3-C2")
2379 - GMM_histo(c2_o2, "C2-O2")
2380 - GMM_histo(c2_n1, "C2-N1")
2381 - GMM_histo(c4_n4, "C4-N4")
2382 - GMM_histo(c4_o4, "C4-O4")
2383 -
2384 - GMM_histo(c1p_n1, "C1'-N1", hist=False, col='lightcoral')
2385 - GMM_histo(n1_c6, "N1-C6", hist=False, col='gold')
2386 - GMM_histo(c6_c5, "C6-C5", hist=False, col='lightseagreen')
2387 - GMM_histo(c5_c4, "C5-C4", hist=False, col='deeppink')
2388 - GMM_histo(c4_n3, "C4-N3", hist=False, col='red')
2389 - GMM_histo(n3_c2, "N3-C2", hist=False, col='lime')
2390 - GMM_histo(c2_o2, "C2-O2", hist=False, col='indigo')
2391 - GMM_histo(c2_n1, "C2-N1", hist=False, col='maroon')
2392 - GMM_histo(c4_n4, "C4-N4", hist=False, col='burlywood')
2393 - GMM_histo(c4_o4, "C4-O4", hist=False, col='steelblue')
2394 - axes=plt.gca()
2395 - #axes.set_xlim(1, 2)
2396 - axes.set_ylim(0, 100)
2397 - plt.xlabel("Distance (Angströms")
2398 - plt.title("GMM of distances between atoms of the pyrimidine cycles", fontsize=10)
2399 - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM_distances_pyrimidine_cycles.png")
2400 - plt.close()
2401 -
2402 - os.chdir(runDir)
2403 - setproctitle("GMM (all atoms, distances) finished")
2404 -
2405 -@trace_unhandled_exceptions
2406 -def gmm_aa_torsions():
2407 - """
2408 - Separates the torsion angle measurements by angle type and plots the figures representing the data
2409 - """
2410 - setproctitle("GMM (all atoms, torsions)")
2411 -
2412 - # we create lists to store the values ​​of each angle
2413 - alpha=[]
2414 - beta=[]
2415 - gamma=[]
2416 - delta=[]
2417 - epsilon=[]
2418 - zeta=[]
2419 - chi = []
2420 - for angles_deg in conversion_angles(runDir + "/results/RNANet.db"):
2421 - alpha.append(angles_deg[2])
2422 - beta.append(angles_deg[3])
2423 - gamma.append(angles_deg[4])
2424 - delta.append(angles_deg[5])
2425 - epsilon.append(angles_deg[6])
2426 - zeta.append(angles_deg[7])
2427 - chi.append(angles_deg[8])
2428 -
2429 - # we remove the null values
2430 - alpha=[i for i in alpha if i != None]
2431 - beta=[i for i in beta if i != None]
2432 - gamma=[i for i in gamma if i != None]
2433 - delta=[i for i in delta if i != None]
2434 - epsilon=[i for i in epsilon if i != None]
2435 - zeta=[i for i in zeta if i != None]
2436 - chi=[i for i in chi if i != None]
2437 -
2438 - os.makedirs(runDir + "/results/figures/GMM/all-atoms/torsions/", exist_ok=True)
2439 - os.chdir(runDir + "/results/figures/GMM/all-atoms/torsions/")
2440 -
2441 - """
2442 - We plot the GMMs with histogram for each angle
2443 - We create the corresponding json with the means and standard deviations of each Gaussian
2444 - We draw the figure grouping the GMMs of all angles without histogram to compare them with each other
2445 - """
2446 -
2447 - GMM_histo(alpha, "Alpha", toric=True)
2448 - GMM_histo(beta, "Beta", toric=True)
2449 - GMM_histo(gamma, "Gamma", toric=True)
2450 - GMM_histo(delta, "Delta", toric=True)
2451 - GMM_histo(epsilon, "Epsilon", toric=True)
2452 - GMM_histo(zeta, "Zeta", toric=True)
2453 - GMM_histo(chi, "Xhi", toric=True)
2454 -
2455 - GMM_histo(alpha, "Alpha", toric=True, hist=False, col='red')
2456 - GMM_histo(beta, "Beta", toric=True, hist=False, col='firebrick')
2457 - GMM_histo(gamma, "Gamma", toric=True, hist=False, col='limegreen')
2458 - GMM_histo(delta, "Delta", toric=True, hist=False, col='darkslateblue')
2459 - GMM_histo(epsilon, "Epsilon", toric=True, hist=False, col='goldenrod')
2460 - GMM_histo(zeta, "Zeta", toric=True, hist=False, col='teal')
2461 - GMM_histo(chi, "Xhi", toric=True, hist=False, col='hotpink')
2462 - plt.xlabel("Angle (Degrees)")
2463 - plt.title("GMM of torsion angles")
2464 - plt.savefig("GMM_torsions.png")
2465 - plt.close()
2466 -
2467 - os.chdir(runDir)
2468 - setproctitle("GMM (all atoms, torsions) finished")
2469 -
2470 -@trace_unhandled_exceptions
2471 -def gmm_wadley():
2472 -
2473 - setproctitle("GMM (Pyle model)")
2474 -
2475 - # Distances
2476 - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/distances/distances_wadley.csv"))
2477 -
2478 - p_c1p = list(df["C1'-P"][~ np.isnan(df["C1'-P"])])
2479 - c1p_p = list(df["P-C1'"][~ np.isnan(df["P-C1'"])])
2480 - p_c4p = list(df["C4'-P"][~ np.isnan(df["C4'-P"])])
2481 - c4p_p = list(df["P-C4'"][~ np.isnan(df["P-C4'"])])
2482 -
2483 - os.makedirs(runDir + "/results/figures/GMM/Pyle/distances/", exist_ok=True)
2484 - os.chdir(runDir + "/results/figures/GMM/Pyle/distances/")
2485 -
2486 - GMM_histo(p_c1p, "P-C1'")
2487 - GMM_histo(c1p_p, "C1'-P")
2488 - GMM_histo(p_c4p, "P-C4'")
2489 - GMM_histo(c4p_p, "C4'-P")
2490 -
2491 - GMM_histo(p_c4p, "P-C4'", toric=False, hist=False, col='gold')
2492 - GMM_histo(c4p_p, "C4'-P", toric=False, hist=False, col='indigo')
2493 - GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, col='firebrick')
2494 - GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, col='seagreen')
2495 - plt.xlabel("Distance (Angströms)")
2496 - plt.title("GMM of distances (Pyle model)")
2497 - plt.savefig("GMM_distances_pyle_model.png")
2498 - plt.close()
2499 -
2500 - # Flat Angles
2501 - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle.csv"))
2502 -
2503 - p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])])
2504 - c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])])
2505 -
2506 -
2507 - os.makedirs(runDir + "/results/figures/GMM/Pyle/angles/", exist_ok=True)
2508 - os.chdir(runDir + "/results/figures/GMM/Pyle/angles/")
2509 -
2510 - GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True)
2511 - GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True)
2512 -
2513 - GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, col='firebrick')
2514 - GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, col='seagreen')
2515 - plt.xlabel("Angle (Degrees)")
2516 - plt.title("GMM of flat angles (Pyle model)")
2517 - plt.savefig("GMM_flat_angles_pyle_model.png")
2518 - plt.close()
2519 -
2520 - # Torsion angles
2521 - eta=[]
2522 - theta=[]
2523 - eta_prime=[]
2524 - theta_prime=[]
2525 - eta_base=[]
2526 - theta_base=[]
2527 -
2528 - for angles_deg in conversion_eta_theta(runDir + "/results/RNANet.db"):
2529 - eta.append(angles_deg[2])
2530 - theta.append(angles_deg[3])
2531 - eta_prime.append(angles_deg[4])
2532 - theta_prime.append(angles_deg[5])
2533 - eta_base.append(angles_deg[6])
2534 - theta_base.append(angles_deg[7])
2535 -
2536 - eta=[i for i in eta if i != None]
2537 - theta=[i for i in theta if i != None]
2538 - eta_prime=[i for i in eta_prime if i != None]
2539 - theta_prime=[i for i in theta_prime if i != None]
2540 - eta_base=[i for i in eta_base if i != None]
2541 - theta_base=[i for i in theta_base if i != None]
2542 -
2543 -
2544 - os.makedirs(runDir + "/results/figures/GMM/Pyle/pseudotorsions/", exist_ok=True)
2545 - os.chdir(runDir + "/results/figures/GMM/Pyle/pseudotorsions/")
2546 -
2547 - GMM_histo(eta, "Eta", toric=True)
2548 - GMM_histo(theta, "Theta", toric=True)
2549 - GMM_histo(eta_prime, "Eta'", toric=True)
2550 - GMM_histo(theta_prime, "Theta'", toric=True)
2551 - GMM_histo(eta_base, "Eta''", toric=True)
2552 - GMM_histo(theta_base, "Theta''", toric=True)
2553 -
2554 - GMM_histo(eta, "Eta", toric=True, hist=False, col='mediumaquamarine')
2555 - GMM_histo(theta, "Theta", toric=True, hist=False, col='darkorchid')
2556 - GMM_histo(eta_prime, "Eta'", toric=True, hist=False, col='cyan')
2557 - GMM_histo(theta_prime, "Theta'", toric=True, hist=False, col='crimson')
2558 - GMM_histo(eta_base, "Eta''", toric=True, hist=False, col='royalblue')
2559 - GMM_histo(theta_base, "Theta''", toric=True, hist=False, col='palevioletred')
2560 - plt.xlabel("Angle (Degrees)")
2561 - plt.title("GMM of pseudo-torsion angles (Pyle Model)")
2562 - plt.savefig("GMM_pseudotorsion_angles_pyle_model.png")
2563 - plt.close()
2564 -
2565 - os.chdir(runDir)
2566 - setproctitle("GMM (Pyle model) finished")
2567 -
2568 -def gmm_pyle_type(ntpair, data):
2569 -
2570 - setproctitle(f"GMM (Pyle {ntpair} )")
2571 -
2572 - os.makedirs(runDir + "/results/figures/GMM/Pyle/distances/", exist_ok=True)
2573 - os.chdir(runDir + "/results/figures/GMM/Pyle/distances/")
2574 -
2575 - p_p=list(data["P-P"][~ np.isnan(data["P-P"])])
2576 - p_c4p=list(data["P-C4'"][~ np.isnan(data["P-C4'"])])
2577 - p_c1p=list(data["P-C1'"][~ np.isnan(data["P-C1'"])])
2578 - c4p_c4p=list(data["C4'-C4'"][~ np.isnan(data["C4'-C4'"])])
2579 - c1p_c1p=list(data["C1'-C1'"][~ np.isnan(data["C1'-C1'"])])
2580 - print(len(p_p))
2581 - # res2=list(data["resnum2"])
2582 - # res1=list(data["resnum1"])
2583 - # diff=[]
2584 - # for i in range(len(res1)):
2585 - # diff.append(res2[i]-res1[i])
2586 - # print(diff[:100])
2587 -
2588 - GMM_histo(p_p, f"Distance P-P between {ntpair} tips for {str(len(p_p))} values", toric=False, hist=False, col="cyan")
2589 - GMM_histo(p_c4p, f"Distance P-C4' between {ntpair} tips", toric=False, hist=False, col="tomato")
2590 - GMM_histo(p_c1p, f"Distance P-C1' between {ntpair} tips", toric=False, hist=False, col="goldenrod")
2591 - GMM_histo(c4p_c4p, f"Distance C4'-C4' between {ntpair} tips", toric=False, hist=False, col="magenta")
2592 - GMM_histo(c1p_c1p, f"Distance C1'-C1' between {ntpair} tips", toric=False, hist=False, col="black")
2593 - # GMM_histo(diff, f"Gap between {ntpair} tips", toric=False, hist=False, col="tomato")
2594 - plt.xlabel("Distance (Angströms)")
2595 -
2596 - # plt.xlabel("Number of residues")
2597 - plt.ylabel("Distance (Angströms)")
2598 - plt.title(f"GMM of distances for {ntpair} ", fontsize=10)
2599 -
2600 - # plt.savefig(f"Longueurs_Pyle_{ntpair}.png" )
2601 - plt.savefig(f"Distances_Pyle_{ntpair}.png" )
2602 - plt.close()
2603 - setproctitle(f"GMM (Pyle {ntpair} distances) finished")
2604 -
2605 -def gmm_pyle():
2606 -
2607 - setproctitle("GMM (Pyle model)")
2608 -
2609 - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/distances/distances.csv"))
2610 -
2611 - # dist = ["P-P", "P-C4'", "P-C1'", "C4'-C4'", "C1'-C1'"]
2612 - data=df
2613 - if len(data):
2614 - for b1 in ['A','C','G','U']:
2615 - for b2 in ['A','C','G','U']:
2616 - thisbases = data[(data.res1 == b1)&(data.res2 == b2)]
2617 - if len(thisbases):
2618 - gmm_pyle_type(b1+b2, thisbases)
2619 -
2620 -@trace_unhandled_exceptions
2621 -def gmm_hrna():
2622 - """
2623 - Draw the figures representing the data on the measurements between atoms of the HiRE-RNA model
2624 - """
2625 -
2626 - setproctitle("GMM (HiRE-RNA)")
2627 -
2628 - # Distances
2629 - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA.csv"))
2630 -
2631 - last_c4p_p = list(df["C4'-P"][~ np.isnan(df["C4'-P"])])
2632 - p_o5p = list(df["P-O5'"][~ np.isnan(df["P-O5'"])])
2633 - o5p_c5p = list(df["O5'-C5'"][~ np.isnan(df["O5'-C5'"])])
2634 - c5p_c4p = list(df["C5'-C4'"][~ np.isnan(df["C5'-C4'"])])
2635 - c4p_c1p = list(df["C4'-C1'"][~ np.isnan(df["C4'-C1'"])])
2636 - c1p_b1 = list(df["C1'-B1"][~ np.isnan(df["C1'-B1"])])
2637 - b1_b2 = list(df["B1-B2"][~ np.isnan(df["B1-B2"])])
2638 -
2639 - os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/distances/", exist_ok=True)
2640 - os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/distances/")
2641 -
2642 - GMM_histo(o5p_c5p, "O5'-C5'")
2643 - GMM_histo(b1_b2, "B1-B2")
2644 - GMM_histo(c1p_b1, "C1'-B1")
2645 - GMM_histo(c5p_c4p, "C5'-C4'")
2646 - GMM_histo(c4p_c1p, "C4'-C1'")
2647 - GMM_histo(p_o5p, "P-O5'")
2648 - GMM_histo(last_c4p_p, "C4'-P")
2649 -
2650 - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='lightcoral')
2651 - GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, col='limegreen')
2652 - GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, col='tomato')
2653 - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='aquamarine')
2654 - GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, col='goldenrod')
2655 - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkcyan')
2656 - GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, col='deeppink')
2657 - axes = plt.gca()
2658 - axes.set_ylim(0, 100)
2659 - plt.xlabel("Distance (Angströms)")
2660 - plt.title("GMM of distances between HiRE-RNA beads")
2661 - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM_distances_HiRE_RNA.png")
2662 - plt.close()
2663 -
2664 - # Angles
2665 - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/angles/angles_hire_RNA.csv"))
2666 -
2667 - lastc4p_p_o5p = list(df["C4'-P-O5'"][~ np.isnan(df["C4'-P-O5'"])])
2668 - lastc1p_lastc4p_p = list(df["C1'-C4'-P"][~ np.isnan(df["C1'-C4'-P"])])
2669 - lastc5p_lastc4p_p = list(df["C5'-C4'-P"][~ np.isnan(df["C5'-C4'-P"])])
2670 - p_o5p_c5p = list(df["P-O5'-C5'"][~ np.isnan(df["P-O5'-C5'"])])
2671 - o5p_c5p_c4p = list(df["O5'-C5'-C4'"][~ np.isnan(df["O5'-C5'-C4'"])])
2672 - c5p_c4p_c1p = list(df["C5'-C4'-C1'"][~ np.isnan(df["C5'-C4'-C1'"])])
2673 - c4p_c1p_b1 = list(df["C4'-C1'-B1"][~ np.isnan(df["C4'-C1'-B1"])])
2674 - c1p_b1_b2 = list(df["C1'-B1-B2"][~ np.isnan(df["C1'-B1-B2"])])
2675 -
2676 - os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True)
2677 - os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/angles/")
2678 -
2679 - GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True)
2680 - GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True)
2681 - GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True)
2682 - GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True)
2683 - GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True)
2684 - GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True)
2685 - GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True)
2686 - GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True)
2687 -
2688 - GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, col='lightcoral')
2689 - GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, col='limegreen')
2690 - GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, col='tomato')
2691 - GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, col='aquamarine')
2692 - GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, col='goldenrod')
2693 - GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, col='darkcyan')
2694 - GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, col='deeppink')
2695 - GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, col='indigo')
2696 - axes = plt.gca()
2697 - axes.set_ylim(0, 100)
2698 - plt.xlabel("Angle (Degres)")
2699 - plt.title("GMM of angles between HiRE-RNA beads")
2700 - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM_angles_HiRE_RNA.png")
2701 - plt.close()
2702 -
2703 - # Torsions
2704 - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA.csv"))
2705 -
2706 - p_o5_c5_c4 = list(df["P-O5'-C5'-C4'"][~ np.isnan(df["P-O5'-C5'-C4'"])])
2707 - o5_c5_c4_c1 = list(df["O5'-C5'-C4'-C1'"][~ np.isnan(df["O5'-C5'-C4'-C1'"])])
2708 - c5_c4_c1_b1 = list(df["C5'-C4'-C1'-B1"][~ np.isnan(df["C5'-C4'-C1'-B1"])])
2709 - c4_c1_b1_b2 = list(df["C4'-C1'-B1-B2"][~ np.isnan(df["C4'-C1'-B1-B2"])])
2710 - o5_c5_c4_psuiv = list(df["O5'-C5'-C4'-P°"][~ np.isnan(df["O5'-C5'-C4'-P°"])])
2711 - c5_c4_psuiv_o5suiv = list(df["C5'-C4'-P°-O5'°"][~ np.isnan(df["C5'-C4'-P°-O5'°"])])
2712 - c4_psuiv_o5suiv_c5suiv = list(df["C4'-P°-O5'°-C5'°"][~ np.isnan(df["C4'-P°-O5'°-C5'°"])])
2713 - c1_c4_psuiv_o5suiv = list(df["C1'-C4'-P°-O5'°"][~ np.isnan(df["C1'-C4'-P°-O5'°"])])
2714 -
2715 - os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True)
2716 - os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/torsions/")
2717 -
2718 - GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True)
2719 - GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True)
2720 - GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True)
2721 - GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True)
2722 - GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True)
2723 - GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True)
2724 - GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True)
2725 - GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True)
2726 -
2727 - GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, col='darkred')
2728 - GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, col='chocolate')
2729 - GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, col='mediumvioletred')
2730 - GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, col='cadetblue')
2731 - GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, col='darkkhaki')
2732 - GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, col='springgreen')
2733 - GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, col='indigo')
2734 - GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, col='gold')
2735 - plt.xlabel("Angle (Degrees)")
2736 - plt.title("GMM of torsion angles between HiRE-RNA beads")
2737 - plt.savefig("GMM_torsions_HiRE_RNA.png")
2738 - plt.close()
2739 -
2740 - os.chdir(runDir)
2741 - setproctitle("GMM (HiRE-RNA) finished")
2742 -
2743 -@trace_unhandled_exceptions
2744 -def gmm_hrna_basepair_type(type_LW, ntpair, data):
2745 - """
2746 - function to plot the statistical figures you want
2747 - By type of pairing:
2748 - Superposition of GMMs of plane angles
2749 - Superposition of the histogram and the GMM of the distances
2750 - all in the same window
2751 - """
2752 -
2753 - setproctitle(f"GMM (HiRE-RNA {type_LW} basepairs)")
2754 -
2755 - figure = plt.figure(figsize = (10, 10))
2756 - plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5)
2757 -
2758 - plt.subplot(2, 1, 1)
2759 - GMM_histo(data["211_angle"], f"{type_LW}_{ntpair}_C1'-B1-B1pair", toric=True, hist=False, col='cyan' )
2760 - GMM_histo(data["112_angle"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair", toric=True, hist=False, col='magenta')
2761 - GMM_histo(data["3211_torsion"], f"{type_LW}_{ntpair}_C4'-C1'-B1-B1pair", toric=True, hist=False, col='black' )
2762 - GMM_histo(data["1123_torsion"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair-C4'pair", toric=True, hist=False, col='maroon')
2763 - GMM_histo(data["alpha1"], f"{type_LW}_{ntpair}_alpha_1", toric=True, hist=False, col="yellow")
2764 - GMM_histo(data["alpha2"], f"{type_LW}_{ntpair}_alpha_2", toric=True, hist=False, col='olive')
2765 - plt.xlabel("Angle (degree)")
2766 - plt.title(f"GMM of plane angles for {type_LW} {ntpair} basepairs", fontsize=10)
2767 -
2768 - plt.subplot(2, 1, 2)
2769 - GMM_histo(data["Distance"], f"Distance between {type_LW} {ntpair} tips", toric=False, hist=False, col="cyan")
2770 - GMM_histo(data["dB1"], f"{type_LW} {ntpair} dB1", toric=False, hist=False, col="tomato")
2771 - GMM_histo(data["dB2"], f"{type_LW} {ntpair} dB2", toric=False, hist=False, col="goldenrod")
2772 - plt.xlabel("Distance (Angströms)")
2773 - plt.title(f"GMM of distances for {type_LW} {ntpair} basepairs", fontsize=10)
2774 -
2775 - plt.savefig(f"{type_LW}_{ntpair}_basepairs.png" )
2776 - plt.close()
2777 - setproctitle(f"GMM (HiRE-RNA {type_LW} {ntpair} basepairs) finished")
2778 -
2779 -@trace_unhandled_exceptions
2780 -def gmm_hrna_basepairs():
2781 -
2782 - setproctitle("GMM (HiRE-RNA basepairs)")
2783 -
2784 - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv"))
2785 -
2786 - lw = ["cWW", "tWW", "cWH", "tWH", "cHW", "tHW", "cWS", "tWS", "cSW", "tSW", "cHH", "tHH", "cSH", "tSH", "cHS", "tHS", "cSS", "tSS"]
2787 -
2788 - os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True)
2789 - os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/")
2790 -
2791 - for lw_type in lw:
2792 - data = df[df['type_LW'] == lw_type ]
2793 - if len(data):
2794 - for b1 in ['A','C','G','U']:
2795 - for b2 in ['A','C','G','U']:
2796 - thisbases = data[(data.nt1_res == b1)&(data.nt2_res == b2)]
2797 - if len(thisbases):
2798 - gmm_hrna_basepair_type(lw_type, b1+b2, thisbases)
2799 -
2800 - # colors = ['lightcoral', "lightseagreen", "black", "goldenrod", "olive", "steelblue", "silver", "deeppink", "navy",
2801 - # "sienna", "maroon", "orange", "mediumaquamarine", "tomato", "indigo", "orchid", "tan", "lime"]
2802 - # for lw_type, col in zip(lw, colors):
2803 - # data = df[df['type LW'] == lw_type]
2804 - # GMM_histo(data.Distance, lw_type, toric=False, hist=False, col=col)
2805 - # plt.xlabel('Distance (Angströms)')
2806 - # plt.title("GMM of distances between base tips ("+str(nt)+ " values)", fontsize=8)
2807 - # plt.savefig("distances_between_tips.png")
2808 - # plt.close()
2809 -
2810 - os.chdir(runDir)
2811 - setproctitle(f"GMM (HiRE-RNA basepairs) finished")
2812 -
2813 -def merge_jsons():
2814 - # All atom distances
2815 - bonds = ["O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'", "C1'-N9",
2816 - "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5",
2817 - "C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"]
2818 - bonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in bonds ]
2819 - concat_jsons(bonds, runDir + "/results/geometry/json/all_atom_distances.json")
2820 -
2821 -
2822 - # All atom torsions
2823 - torsions = ["Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Xhi", "Zeta"]
2824 - torsions = [ runDir + "/results/geometry/json/" + x + ".json" for x in torsions ]
2825 - concat_jsons(torsions, runDir + "/results/geometry/json/all_atom_torsions.json")
2826 -
2827 - # HiRE-RNA distances
2828 - hrnabonds = ["P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2", "C4'-P"]
2829 - hrnabonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnabonds ]
2830 - concat_jsons(hrnabonds, runDir + "/results/geometry/json/hirerna_distances.json")
2831 -
2832 - # HiRE-RNA angles
2833 - hrnaangles = ["P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2", "C4'-P-O5'", "C5'-C4'-P", "C1'-C4'-P"]
2834 - hrnaangles = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnaangles ]
2835 - concat_jsons(hrnaangles, runDir + "/results/geometry/json/hirerna_angles.json")
2836 -
2837 - # HiRE-RNA torsions
2838 - hrnators = ["P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "C4'-P°-O5'°-C5'°", "C5'-C4'-P°-O5'°", "C1'-C4'-P°-O5'°", "O5'-C5'-C4'-P°"]
2839 - hrnators = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnators ]
2840 - concat_jsons(hrnators, runDir + "/results/geometry/json/hirerna_torsions.json")
2841 -
2842 - # HiRE-RNA basepairs
2843 - for nt1 in ['A', 'C', 'G', 'U']:
2844 - for nt2 in ['A', 'C', 'G', 'U']:
2845 - bps = glob.glob(runDir + f"/results/geometry/json/*{nt1}{nt2}*.json")
2846 - concat_jsons(bps, runDir + f"/results/geometry/json/hirerna_{nt1}{nt2}_basepairs.json")
2847 -
2848 - # Delete previous files
2849 - for f in bonds + torsions + hrnabonds + hrnaangles + hrnators:
2850 - try:
2851 - os.remove(f)
2852 - except FileNotFoundError:
2853 - pass
2854 - for f in glob.glob(runDir + "/results/geometry/json/t*.json"):
2855 - try:
2856 - os.remove(f)
2857 - except FileNotFoundError:
2858 - pass
2859 - for f in glob.glob(runDir + "/results/geometry/json/c*.json"):
2860 - try:
2861 - os.remove(f)
2862 - except FileNotFoundError:
2863 - pass
2864 - for f in glob.glob(runDir + "/results/geometry/json/Distance*.json"):
2865 - try:
2866 - os.remove(f)
2867 - except FileNotFoundError:
2868 - pass
2869 -
2870 -@trace_unhandled_exceptions
2871 -def loop(f):
2872 - return pd.read_csv(f)
2873 -
2874 -@trace_unhandled_exceptions
2875 -def concat_dataframes(fpath, outfilename):
2876 - """
2877 - Concatenates the dataframes containing measures
2878 - and creates a new dataframe gathering all
2879 - """
2880 - global idxQueue
2881 - thr_idx = idxQueue.get()
2882 - setproctitle(f"Worker {thr_idx+1} : Concatenation of {fpath}")
2883 -
2884 - liste = os.listdir(fpath)
2885 - pbar = tqdm(total=len(liste), position=thr_idx, desc="Preparing "+outfilename, leave=False)
2886 - df_tot = pd.read_csv(os.path.abspath(fpath + liste.pop()), engine="python")
2887 - #df=Parallel(n_jobs=-1, verbose=20)(delayed(loop)(os.path.abspath(fpath+liste[f])) for f in range (len(liste)))
2888 - #except :
2889 - # print(liste[f])
2890 -
2891 - pbar.update(1)
2892 - for f in range(len(liste)):
2893 - # try :
2894 - df = pd.read_csv(os.path.abspath(fpath + liste.pop()), engine='python')
2895 - # except :
2896 - # print(liste[f])
2897 - # continue
2898 - df_tot = pd.concat([df_tot, df], ignore_index=True)
2899 pbar.update(1) 1198 pbar.update(1)
2900 - #df = pd.concat(df, ignore_index=True) 1199 + return update
2901 - #pbar.update(1)
2902 - #df.to_csv(fpath + outfilename)
2903 - df_tot.to_csv(fpath + outfilename)
2904 - idxQueue.put(thr_idx) # replace the thread index in the queue
2905 - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
2906 -
2907 -def concat_jsons(flist, outfilename):
2908 - """
2909 - Reads JSON files computed by the geometry jobs and merge them into a smaller
2910 - number of files
2911 - """
2912 -
2913 - result = []
2914 - for f in flist:
2915 - # if not path.isfile(f):
2916 - # continue:
2917 - with open(f, "rb") as infile:
2918 - result.append(json.load(infile))
2919 -
2920 - # write the files
2921 - with open(outfilename, 'w', encoding='utf-8') as f:
2922 - json.dump(result, f, indent=4)
2923 1200
2924 def process_jobs(joblist): 1201 def process_jobs(joblist):
2925 """ 1202 """
...@@ -2952,13 +1229,16 @@ if __name__ == "__main__": ...@@ -2952,13 +1229,16 @@ if __name__ == "__main__":
2952 DO_WADLEY_ANALYSIS = False 1229 DO_WADLEY_ANALYSIS = False
2953 DO_AVG_DISTANCE_MATRIX = False 1230 DO_AVG_DISTANCE_MATRIX = False
2954 DO_HIRE_RNA_MEASURES = False 1231 DO_HIRE_RNA_MEASURES = False
1232 + RESCAN_GMM_COMP_NUM = False
2955 try: 1233 try:
2956 - opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", "3d-folder=", "seq-folder=", "hire-rna" ]) 1234 + opts, _ = getopt.getopt( sys.argv[1:], "r:h",
1235 + [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=",
1236 + "3d-folder=", "seq-folder=", "hire-rna", "rescan-nmodes" ])
2957 except getopt.GetoptError as err: 1237 except getopt.GetoptError as err:
2958 print(err) 1238 print(err)
2959 sys.exit(2) 1239 sys.exit(2)
2960 - for opt, arg in opts:
2961 1240
1241 + for opt, arg in opts:
2962 if opt == "-h" or opt == "--help": 1242 if opt == "-h" or opt == "--help":
2963 print( "RNANet statistics, a script to build a multiscale RNA dataset from public data\n" 1243 print( "RNANet statistics, a script to build a multiscale RNA dataset from public data\n"
2964 "Developed by Louis Becquey, Khodor Hannoush, and Aglaé Tabot 2019/2021") 1244 "Developed by Louis Becquey, Khodor Hannoush, and Aglaé Tabot 2019/2021")
...@@ -2975,28 +1255,28 @@ if __name__ == "__main__": ...@@ -2975,28 +1255,28 @@ if __name__ == "__main__":
2975 print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.") 1255 print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.")
2976 print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.") 1256 print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.")
2977 print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data.") 1257 print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data.")
2978 - 1258 + print("--rescan-nmodes\t\tDo not assume the number of modes in distances and angles distributions, measure it.")
2979 sys.exit() 1259 sys.exit()
2980 - elif opt == '--version': 1260 + elif opt == "--version":
2981 print("RNANet statistics 1.6 beta") 1261 print("RNANet statistics 1.6 beta")
2982 sys.exit() 1262 sys.exit()
2983 elif opt == "-r" or opt == "--resolution": 1263 elif opt == "-r" or opt == "--resolution":
2984 assert float(arg) > 0.0 and float(arg) <= 20.0 1264 assert float(arg) > 0.0 and float(arg) <= 20.0
2985 res_thr = float(arg) 1265 res_thr = float(arg)
2986 - elif opt=='--3d-folder': 1266 + elif opt == "--3d-folder":
2987 path_to_3D_data = path.abspath(arg) 1267 path_to_3D_data = path.abspath(arg)
2988 if path_to_3D_data[-1] != '/': 1268 if path_to_3D_data[-1] != '/':
2989 path_to_3D_data += '/' 1269 path_to_3D_data += '/'
2990 - elif opt=='--seq-folder': 1270 + elif opt == "--seq-folder":
2991 path_to_seq_data = path.abspath(arg) 1271 path_to_seq_data = path.abspath(arg)
2992 if path_to_seq_data[-1] != '/': 1272 if path_to_seq_data[-1] != '/':
2993 path_to_seq_data += '/' 1273 path_to_seq_data += '/'
2994 - elif opt=='--from-scratch': 1274 + elif opt == "--from-scratch":
2995 DELETE_OLD_DATA = True 1275 DELETE_OLD_DATA = True
2996 DO_WADLEY_ANALYSIS = True 1276 DO_WADLEY_ANALYSIS = True
2997 - elif opt=="--distance-matrices": 1277 + elif opt == "--distance-matrices":
2998 DO_AVG_DISTANCE_MATRIX = True 1278 DO_AVG_DISTANCE_MATRIX = True
2999 - elif opt=='--wadley': 1279 + elif opt == "--wadley":
3000 DO_WADLEY_ANALYSIS = True 1280 DO_WADLEY_ANALYSIS = True
3001 os.makedirs(runDir+"/results/geometry/Pyle/distances/", exist_ok=True) 1281 os.makedirs(runDir+"/results/geometry/Pyle/distances/", exist_ok=True)
3002 os.makedirs(runDir+"/results/geometry/Pyle/classes_dist/", exist_ok=True) 1282 os.makedirs(runDir+"/results/geometry/Pyle/classes_dist/", exist_ok=True)
...@@ -3005,7 +1285,7 @@ if __name__ == "__main__": ...@@ -3005,7 +1285,7 @@ if __name__ == "__main__":
3005 os.makedirs(runDir+"/results/figures/GMM/Pyle/distances/", exist_ok=True) 1285 os.makedirs(runDir+"/results/figures/GMM/Pyle/distances/", exist_ok=True)
3006 os.makedirs(runDir+"/results/figures/GMM/Pyle/angles/", exist_ok=True) 1286 os.makedirs(runDir+"/results/figures/GMM/Pyle/angles/", exist_ok=True)
3007 os.makedirs(runDir+"/results/figures/GMM/Pyle/pseudotorsions/", exist_ok=True) 1287 os.makedirs(runDir+"/results/figures/GMM/Pyle/pseudotorsions/", exist_ok=True)
3008 - elif opt=='--hire-rna': 1288 + elif opt == "--hire-rna":
3009 DO_HIRE_RNA_MEASURES = True 1289 DO_HIRE_RNA_MEASURES = True
3010 os.makedirs(runDir + "/results/geometry/HiRE-RNA/distances/", exist_ok=True) 1290 os.makedirs(runDir + "/results/geometry/HiRE-RNA/distances/", exist_ok=True)
3011 os.makedirs(runDir + "/results/geometry/HiRE-RNA/angles/", exist_ok=True) 1291 os.makedirs(runDir + "/results/geometry/HiRE-RNA/angles/", exist_ok=True)
...@@ -3015,7 +1295,8 @@ if __name__ == "__main__": ...@@ -3015,7 +1295,8 @@ if __name__ == "__main__":
3015 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True) 1295 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True)
3016 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True) 1296 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True)
3017 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) 1297 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True)
3018 - 1298 + elif opt == "rescan-nmodes":
1299 + RESCAN_GMM_COMP_NUM = True
3019 1300
3020 # Load mappings. famlist will contain only families with structures at this resolution threshold. 1301 # Load mappings. famlist will contain only families with structures at this resolution threshold.
3021 1302
...@@ -3053,7 +1334,8 @@ if __name__ == "__main__": ...@@ -3053,7 +1334,8 @@ if __name__ == "__main__":
3053 print("Old data deleted.") 1334 print("Old data deleted.")
3054 1335
3055 # Prepare the multiprocessing execution environment 1336 # Prepare the multiprocessing execution environment
3056 - nworkers = min(read_cpu_number()-1, 50) 1337 + global nworkers
1338 + nworkers = read_cpu_number()-1
3057 print("Using", nworkers, "threads...") 1339 print("Using", nworkers, "threads...")
3058 thr_idx_mgr = Manager() 1340 thr_idx_mgr = Manager()
3059 idxQueue = thr_idx_mgr.Queue() 1341 idxQueue = thr_idx_mgr.Queue()
...@@ -3063,26 +1345,25 @@ if __name__ == "__main__": ...@@ -3063,26 +1345,25 @@ if __name__ == "__main__":
3063 # Define the tasks 1345 # Define the tasks
3064 joblist = [] 1346 joblist = []
3065 1347
3066 - # Do eta/theta plots 1348 + # # Do eta/theta plots
3067 - #if n_unmapped_chains and DO_WADLEY_ANALYSIS: 1349 + # if n_unmapped_chains and DO_WADLEY_ANALYSIS:
3068 # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) 1350 # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr)))
3069 # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) 1351 # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr)))
3070 1352
3071 - # Do distance matrices for each family excl. LSU/SSU (will be processed later) 1353 + # # Do distance matrices for each family excl. LSU/SSU (will be processed later)
3072 - if DO_AVG_DISTANCE_MATRIX: 1354 + # if DO_AVG_DISTANCE_MATRIX:
3073 - extracted_chains = [] 1355 + # extracted_chains = []
3074 - for file in os.listdir(path_to_3D_data + "rna_mapped_to_Rfam"): 1356 + # for file in os.listdir(path_to_3D_data + "rna_mapped_to_Rfam"):
3075 - if os.path.isfile(os.path.join(path_to_3D_data + "rna_mapped_to_Rfam", file)): 1357 + # if os.path.isfile(os.path.join(path_to_3D_data + "rna_mapped_to_Rfam", file)):
3076 - e1 = file.split('_')[0] 1358 + # e1 = file.split('_')[0]
3077 - e2 = file.split('_')[1] 1359 + # e2 = file.split('_')[1]
3078 - e3 = file.split('_')[2] 1360 + # e3 = file.split('_')[2]
3079 - extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3) 1361 + # extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3)
3080 - for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]: # Process the rRNAs later only 3 by 3 1362 + # for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]: # Process the rRNAs later only 3 by 3
3081 - joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) 1363 + # joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False)))
3082 - joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) 1364 + # joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False)))
3083 - 1365 +
3084 - # Do general family statistics 1366 + # # Do general family statistics
3085 -
3086 # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths 1367 # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths
3087 # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) 1368 # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families)
3088 # for f in famlist: 1369 # for f in famlist:
...@@ -3091,25 +1372,18 @@ if __name__ == "__main__": ...@@ -3091,25 +1372,18 @@ if __name__ == "__main__":
3091 # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) 1372 # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families)
3092 1373
3093 1374
3094 - # Do geometric measures on all chains 1375 + # Do geometric measures
3095 - #print(liste_repres('/home/data/RNA/3D/latest_nr_list_4.0A.csv'))
3096 -
3097 - # print(measure_from_structure(os.listdir(path_to_3D_data + "rna_only")[0]))
3098 if n_unmapped_chains: 1376 if n_unmapped_chains:
3099 - # os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) 1377 + os.makedirs(runDir + "/results/geometry/all-atoms/distances/", exist_ok=True)
3100 - # liste_struct = os.listdir(path_to_3D_data + "rna_only") 1378 + # structure_list = os.listdir(path_to_3D_data + "rna_only")
3101 - liste_struct=liste_repres('/home/data/RNA/3D/latest_nr_list_4.0A.csv') 1379 + structure_list = representatives_from_nrlist(res_thr)
3102 - # if '4zdo_1_E.cif' in liste_struct: 1380 + for f in structure_list:
3103 - # liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now
3104 - # if '4zdp_1_E.cif' in liste_struct:
3105 - # liste_struct.remove('4zdp_1_E.cif')
3106 - for f in liste_struct:
3107 if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]): 1381 if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]):
3108 joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances 1382 joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances
3109 1383
3110 1384
3111 - process_jobs(joblist) 1385 + # process_jobs(joblist)
3112 - #count_occur_pyle_dist(runDir + '/results/geometry/Pyle/classes_dist/') 1386 +
3113 # Now process the memory-heavy tasks family by family 1387 # Now process the memory-heavy tasks family by family
3114 if DO_AVG_DISTANCE_MATRIX: 1388 if DO_AVG_DISTANCE_MATRIX:
3115 for f in LSU_set: 1389 for f in LSU_set:
...@@ -3124,36 +1398,31 @@ if __name__ == "__main__": ...@@ -3124,36 +1398,31 @@ if __name__ == "__main__":
3124 1398
3125 # finish the work after the parallel portions 1399 # finish the work after the parallel portions
3126 1400
3127 - # per_chain_stats() # per chain base frequencies en basepair types 1401 + # per_chain_stats() # per chain base frequencies and basepair types
3128 # seq_idty() # identity matrices from pre-computed .npy matrices 1402 # seq_idty() # identity matrices from pre-computed .npy matrices
3129 # stats_pairs() 1403 # stats_pairs()
3130 - concat_dataframes(runDir + '/results/geometry/Pyle/distances/', 'distances.csv')
3131 if n_unmapped_chains: 1404 if n_unmapped_chains:
3132 # general_stats() 1405 # general_stats()
3133 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) 1406 os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True)
3134 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) 1407 os.makedirs(runDir+"/results/geometry/json/", exist_ok=True)
1408 + concat_dataframes(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')
1409 + if DO_HIRE_RNA_MEASURES:
1410 + concat_dataframes(runDir + '/results/geometry/HiRE-RNA/distances/', 'distances_HiRERNA.csv')
1411 + concat_dataframes(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_HiRERNA.csv')
1412 + concat_dataframes(runDir + '/results/geometry/HiRE-RNA/torsions/', 'torsions_HiRERNA.csv')
1413 + concat_dataframes(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs_HiRERNA.csv')
1414 + if DO_WADLEY_ANALYSIS:
1415 + concat_dataframes(runDir + '/results/geometry/Pyle/distances/', 'distances_pyle.csv')
1416 + concat_dataframes(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv')
3135 joblist = [] 1417 joblist = []
3136 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances.csv'))) 1418 + joblist.append(Job(function=gmm_aa_dists, args=(RESCAN_GMM_COMP_NUM)))
3137 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) 1419 + joblist.append(Job(function=gmm_aa_torsions, args=(RESCAN_GMM_COMP_NUM)))
3138 - # if DO_HIRE_RNA_MEASURES:
3139 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')))
3140 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv')))
3141 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv')))
3142 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv')))
3143 - # if DO_WADLEY_ANALYSIS:
3144 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv')))
3145 - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv')))
3146 - # process_jobs(joblist)
3147 - joblist = []
3148 - # joblist.append(Job(function=gmm_aa_dists, args=()))
3149 - # joblist.append(Job(function=gmm_aa_torsions, args=()))
3150 if DO_HIRE_RNA_MEASURES: 1420 if DO_HIRE_RNA_MEASURES:
3151 - # joblist.append(Job(function=gmm_hrna, args=())) 1421 + joblist.append(Job(function=gmm_hrna, args=(RESCAN_GMM_COMP_NUM)))
3152 - joblist.append(Job(function=gmm_hrna_basepairs, args=())) 1422 + joblist.append(Job(function=gmm_hrna_basepairs, args=(RESCAN_GMM_COMP_NUM)))
3153 - # if DO_WADLEY_ANALYSIS: 1423 + if DO_WADLEY_ANALYSIS:
3154 - # joblist.append(Job(function=gmm_wadley, args=())) 1424 + joblist.append(Job(function=gmm_pyle, args=(RESCAN_GMM_COMP_NUM)))
3155 - # joblist.append(Job(function=gmm_pyle, args=()))
3156 if len(joblist): 1425 if len(joblist):
3157 process_jobs(joblist) 1426 process_jobs(joblist)
3158 - #merge_jsons() 1427 + merge_jsons()
3159 1428
......