Showing
7 changed files
with
1062 additions
and
2517 deletions
... | @@ -31,7 +31,7 @@ import time | ... | @@ -31,7 +31,7 @@ import time |
31 | import traceback | 31 | import traceback |
32 | import warnings | 32 | import warnings |
33 | from functools import partial, wraps | 33 | from functools import partial, wraps |
34 | -from multiprocessing import Pool, Manager | 34 | +from multiprocessing import Pool, Manager, Value |
35 | from time import sleep | 35 | from time import sleep |
36 | from tqdm import tqdm | 36 | from tqdm import tqdm |
37 | from setproctitle import setproctitle | 37 | from setproctitle import setproctitle |
... | @@ -45,6 +45,12 @@ from Bio.PDB.PDBIO import Select | ... | @@ -45,6 +45,12 @@ from Bio.PDB.PDBIO import Select |
45 | runDir = os.getcwd() | 45 | runDir = os.getcwd() |
46 | 46 | ||
47 | def trace_unhandled_exceptions(func): | 47 | def trace_unhandled_exceptions(func): |
48 | + """ | ||
49 | + Captures exceptions even in parallel sections of the code and child processes, | ||
50 | + and throws logs in red to stderr and to errors.txt. | ||
51 | + | ||
52 | + Should be defined before the classes that use it. | ||
53 | + """ | ||
48 | @wraps(func) | 54 | @wraps(func) |
49 | def wrapped_func(*args, **kwargs): | 55 | def wrapped_func(*args, **kwargs): |
50 | try: | 56 | try: |
... | @@ -60,27 +66,27 @@ def trace_unhandled_exceptions(func): | ... | @@ -60,27 +66,27 @@ def trace_unhandled_exceptions(func): |
60 | print(s) | 66 | print(s) |
61 | return wrapped_func | 67 | return wrapped_func |
62 | 68 | ||
63 | - | ||
64 | pd.set_option('display.max_rows', None) | 69 | pd.set_option('display.max_rows', None) |
65 | sqlite3.enable_callback_tracebacks(True) | 70 | sqlite3.enable_callback_tracebacks(True) |
66 | sqlite3.register_adapter(np.int64, lambda val: int(val)) # Tell Sqlite what to do with <class numpy.int64> objects ---> convert to int | 71 | sqlite3.register_adapter(np.int64, lambda val: int(val)) # Tell Sqlite what to do with <class numpy.int64> objects ---> convert to int |
67 | sqlite3.register_adapter(np.float64, lambda val: float(val)) # Tell Sqlite what to do with <class numpy.float64> objects ---> convert to float | 72 | sqlite3.register_adapter(np.float64, lambda val: float(val)) # Tell Sqlite what to do with <class numpy.float64> objects ---> convert to float |
68 | 73 | ||
69 | -m = Manager() | 74 | +# m = Manager() |
70 | -running_stats = m.list() | 75 | +# running_stats = m.list() |
71 | -running_stats.append(0) # n_launched | 76 | +# running_stats.append(0) # n_launched |
72 | -running_stats.append(0) # n_finished | 77 | +# running_stats.append(0) # n_finished |
73 | -running_stats.append(0) # n_skipped | 78 | +# running_stats.append(0) # n_skipped |
79 | +n_launched = Value('i', 0) | ||
80 | +n_finished = Value('i', 0) | ||
81 | +n_skipped = Value('i', 0) | ||
74 | path_to_3D_data = "tobedefinedbyoptions" | 82 | path_to_3D_data = "tobedefinedbyoptions" |
75 | path_to_seq_data = "tobedefinedbyoptions" | 83 | path_to_seq_data = "tobedefinedbyoptions" |
76 | python_executable = "python"+".".join(platform.python_version().split('.')[:2]) # Cuts python3.8.1 into python3.8 for example. | 84 | python_executable = "python"+".".join(platform.python_version().split('.')[:2]) # Cuts python3.8.1 into python3.8 for example. |
77 | validsymb = '\U00002705' | 85 | validsymb = '\U00002705' |
78 | warnsymb = '\U000026A0' | 86 | warnsymb = '\U000026A0' |
79 | errsymb = '\U0000274C' | 87 | errsymb = '\U0000274C' |
80 | -LSU_set = {"RF00002", "RF02540", "RF02541", | 88 | +LSU_set = {"RF00002", "RF02540", "RF02541", "RF02543", "RF02546"} # From Rfam CLAN 00112 |
81 | - "RF02543", "RF02546"} # From Rfam CLAN 00112 | 89 | +SSU_set = {"RF00177", "RF02542", "RF02545", "RF01959", "RF01960"} # From Rfam CLAN 00111 |
82 | -SSU_set = {"RF00177", "RF02542", "RF02545", | ||
83 | - "RF01959", "RF01960"} # From Rfam CLAN 00111 | ||
84 | 90 | ||
85 | no_nts_set = set() | 91 | no_nts_set = set() |
86 | weird_mappings = set() | 92 | weird_mappings = set() |
... | @@ -103,17 +109,15 @@ class MutableFastaIterator(FastaIterator): | ... | @@ -103,17 +109,15 @@ class MutableFastaIterator(FastaIterator): |
103 | first_word = title.split(None, 1)[0] | 109 | first_word = title.split(None, 1)[0] |
104 | except IndexError: | 110 | except IndexError: |
105 | assert not title, repr(title) | 111 | assert not title, repr(title) |
106 | - # Should we use SeqRecord default for no ID? | ||
107 | first_word = "" | 112 | first_word = "" |
108 | - yield SeqRecord( | 113 | + yield SeqRecord(MutableSeq(sequence), id=first_word, name=first_word, description=title) |
109 | - MutableSeq(sequence), id=first_word, name=first_word, description=title, | ||
110 | - ) | ||
111 | 114 | ||
112 | 115 | ||
113 | class SelectivePortionSelector(object): | 116 | class SelectivePortionSelector(object): |
114 | """Class passed to MMCIFIO to select some chain portions in an MMCIF file. | 117 | """Class passed to MMCIFIO to select some chain portions in an MMCIF file. |
115 | 118 | ||
116 | Validates every chain, residue, nucleotide, to say if it is in the selection or not. | 119 | Validates every chain, residue, nucleotide, to say if it is in the selection or not. |
120 | + The primary use is to select the portion of a chain which is mapped to a family. | ||
117 | """ | 121 | """ |
118 | 122 | ||
119 | def __init__(self, model_id, chain_id, valid_resnums, khetatm): | 123 | def __init__(self, model_id, chain_id, valid_resnums, khetatm): |
... | @@ -156,123 +160,6 @@ class SelectivePortionSelector(object): | ... | @@ -156,123 +160,6 @@ class SelectivePortionSelector(object): |
156 | return 1 | 160 | return 1 |
157 | 161 | ||
158 | 162 | ||
159 | -_select=Select() | ||
160 | - | ||
161 | -def save_mmcif(ioobj, out_file, select=_select, preserve_atom_numbering=False): | ||
162 | - # reuse and modification of the source code of Biopython | ||
163 | - # to have the 2 columns of numbering of residues numbered with the index_chain of DSSR | ||
164 | - if isinstance(out_file, str): | ||
165 | - fp = open(out_file, "w") | ||
166 | - close_file = True | ||
167 | - else: | ||
168 | - fp = out_file | ||
169 | - close_file = False | ||
170 | - atom_dict = defaultdict(list) | ||
171 | - | ||
172 | - for model in ioobj.structure.get_list(): | ||
173 | - if not select.accept_model(model): | ||
174 | - continue | ||
175 | - # mmCIF files with a single model have it specified as model 1 | ||
176 | - if model.serial_num == 0: | ||
177 | - model_n = "1" | ||
178 | - else: | ||
179 | - model_n = str(model.serial_num) | ||
180 | - # This is used to write label_entity_id and label_asym_id and | ||
181 | - # increments from 1, changing with each molecule | ||
182 | - entity_id = 0 | ||
183 | - if not preserve_atom_numbering: | ||
184 | - atom_number = 1 | ||
185 | - for chain in model.get_list(): | ||
186 | - if not select.accept_chain(chain): | ||
187 | - continue | ||
188 | - chain_id = chain.get_id() | ||
189 | - if chain_id == " ": | ||
190 | - chain_id = "." | ||
191 | - # This is used to write label_seq_id, | ||
192 | - # remaining blank for hetero residues | ||
193 | - | ||
194 | - prev_residue_type = "" | ||
195 | - prev_resname = "" | ||
196 | - for residue in chain.get_unpacked_list(): | ||
197 | - if not select.accept_residue(residue): | ||
198 | - continue | ||
199 | - hetfield, resseq, icode = residue.get_id() | ||
200 | - if hetfield == " ": | ||
201 | - residue_type = "ATOM" | ||
202 | - label_seq_id = str(resseq) | ||
203 | - | ||
204 | - else: | ||
205 | - residue_type = "HETATM" | ||
206 | - label_seq_id = "." | ||
207 | - resseq = str(resseq) | ||
208 | - if icode == " ": | ||
209 | - icode = "?" | ||
210 | - resname = residue.get_resname() | ||
211 | - # Check if the molecule changes within the chain | ||
212 | - # This will always increment for the first residue in a | ||
213 | - # chain due to the starting values above | ||
214 | - if residue_type != prev_residue_type or ( | ||
215 | - residue_type == "HETATM" and resname != prev_resname | ||
216 | - ): | ||
217 | - entity_id += 1 | ||
218 | - prev_residue_type = residue_type | ||
219 | - prev_resname = resname | ||
220 | - label_asym_id = ioobj._get_label_asym_id(entity_id) | ||
221 | - for atom in residue.get_unpacked_list(): | ||
222 | - if select.accept_atom(atom): | ||
223 | - atom_dict["_atom_site.group_PDB"].append(residue_type) | ||
224 | - if preserve_atom_numbering: | ||
225 | - atom_number = atom.get_serial_number() | ||
226 | - atom_dict["_atom_site.id"].append(str(atom_number)) | ||
227 | - if not preserve_atom_numbering: | ||
228 | - atom_number += 1 | ||
229 | - element = atom.element.strip() | ||
230 | - if element == "": | ||
231 | - element = "?" | ||
232 | - atom_dict["_atom_site.type_symbol"].append(element) | ||
233 | - atom_dict["_atom_site.label_atom_id"].append( | ||
234 | - atom.get_name().strip() | ||
235 | - ) | ||
236 | - altloc = atom.get_altloc() | ||
237 | - if altloc == " ": | ||
238 | - altloc = "." | ||
239 | - atom_dict["_atom_site.label_alt_id"].append(altloc) | ||
240 | - atom_dict["_atom_site.label_comp_id"].append( | ||
241 | - resname.strip() | ||
242 | - ) | ||
243 | - atom_dict["_atom_site.label_asym_id"].append(label_asym_id) | ||
244 | - # The entity ID should be the same for similar chains | ||
245 | - # However this is non-trivial to calculate so we write "?" | ||
246 | - atom_dict["_atom_site.label_entity_id"].append("?") | ||
247 | - atom_dict["_atom_site.label_seq_id"].append(label_seq_id) | ||
248 | - atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode) | ||
249 | - coord = atom.get_coord() | ||
250 | - atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0]) | ||
251 | - atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1]) | ||
252 | - atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2]) | ||
253 | - atom_dict["_atom_site.occupancy"].append( | ||
254 | - str(atom.get_occupancy()) | ||
255 | - ) | ||
256 | - atom_dict["_atom_site.B_iso_or_equiv"].append( | ||
257 | - str(atom.get_bfactor()) | ||
258 | - ) | ||
259 | - atom_dict["_atom_site.auth_seq_id"].append(resseq) | ||
260 | - atom_dict["_atom_site.auth_asym_id"].append(chain_id) | ||
261 | - atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n) | ||
262 | - | ||
263 | - # Data block name is the structure ID with special characters removed | ||
264 | - structure_id = ioobj.structure.id | ||
265 | - for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]: | ||
266 | - structure_id = structure_id.replace(c, "") | ||
267 | - atom_dict["data_"] = structure_id | ||
268 | - | ||
269 | - # Set the dictionary and write out using the generic dictionary method | ||
270 | - ioobj.dic = atom_dict | ||
271 | - ioobj._save_dict(fp) | ||
272 | - if close_file: | ||
273 | - fp.close() | ||
274 | - | ||
275 | - | ||
276 | class Chain: | 163 | class Chain: |
277 | """ | 164 | """ |
278 | The object which stores all our data and the methods to process it. | 165 | The object which stores all our data and the methods to process it. |
... | @@ -424,13 +311,11 @@ class Chain: | ... | @@ -424,13 +311,11 @@ class Chain: |
424 | for atom in list(res.get_atoms()): | 311 | for atom in list(res.get_atoms()): |
425 | # rename the remaining phosphate group to P, OP1, OP2, OP3 | 312 | # rename the remaining phosphate group to P, OP1, OP2, OP3 |
426 | if atom.get_name() in ['PA', 'O1A', 'O2A', 'O3A'] and res_name != 'RIA': | 313 | if atom.get_name() in ['PA', 'O1A', 'O2A', 'O3A'] and res_name != 'RIA': |
427 | - | 314 | + # RIA is a residue made up of 2 riboses and 2 phosphates, |
428 | - # RIA is a residue made up of 2 riboses and 2 phosphates, | 315 | + # so it has an O2A atom between the C2A and C1 'atoms, |
429 | - # so it has an O2A atom between the C2A and C1 'atoms, | 316 | + # and it also has an OP2 atom attached to one of its phosphates |
430 | - # and it also has an OP2 atom attached to one of its phosphates | 317 | + # (see chains 6fyx_1_1, 6zu9_1_1, 6fyy_1_1, 6gsm_1_1 , 3jaq_1_1 and 1yfg_1_A) |
431 | - # (see chains 6fyx_1_1, 6zu9_1_1, 6fyy_1_1, 6gsm_1_1 , 3jaq_1_1 and 1yfg_1_A) | 318 | + # we do not modify the atom names of RIA residue |
432 | - # we do not modify the atom names of RIA residue | ||
433 | - | ||
434 | if atom.get_name() == 'PA': | 319 | if atom.get_name() == 'PA': |
435 | atom_name = 'P' | 320 | atom_name = 'P' |
436 | if atom.get_name() == 'O1A': | 321 | if atom.get_name() == 'O1A': |
... | @@ -440,7 +325,7 @@ class Chain: | ... | @@ -440,7 +325,7 @@ class Chain: |
440 | if atom.get_name() == 'O3A': | 325 | if atom.get_name() == 'O3A': |
441 | atom_name = 'OP3' | 326 | atom_name = 'OP3' |
442 | new_atom_t = pdb.Atom.Atom(atom_name, atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom_name, atom.get_serial_number()) | 327 | new_atom_t = pdb.Atom.Atom(atom_name, atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom_name, atom.get_serial_number()) |
443 | - else : | 328 | + else: |
444 | new_atom_t=atom.copy() | 329 | new_atom_t=atom.copy() |
445 | new_residu_t.add(new_atom_t) | 330 | new_residu_t.add(new_atom_t) |
446 | new_chain_t.add(new_residu_t) | 331 | new_chain_t.add(new_residu_t) |
... | @@ -787,7 +672,8 @@ class Chain: | ... | @@ -787,7 +672,8 @@ class Chain: |
787 | return df | 672 | return df |
788 | 673 | ||
789 | def register_chain(self, df): | 674 | def register_chain(self, df): |
790 | - """Saves the extracted 3D data to the database. | 675 | + """ |
676 | + Saves the extracted 3D data to the database. | ||
791 | """ | 677 | """ |
792 | 678 | ||
793 | setproctitle(f"RNANet.py {self.chain_label} register_chain()") | 679 | setproctitle(f"RNANet.py {self.chain_label} register_chain()") |
... | @@ -920,6 +806,10 @@ class Monitor: | ... | @@ -920,6 +806,10 @@ class Monitor: |
920 | 806 | ||
921 | 807 | ||
922 | class Downloader: | 808 | class Downloader: |
809 | + """ | ||
810 | + An object with methods to download public data from the internet. | ||
811 | + """ | ||
812 | + | ||
923 | def download_Rfam_PDB_mappings(self): | 813 | def download_Rfam_PDB_mappings(self): |
924 | """Query the Rfam public MySQL database for mappings between their RNA families and PDB structures. | 814 | """Query the Rfam public MySQL database for mappings between their RNA families and PDB structures. |
925 | 815 | ||
... | @@ -1170,6 +1060,10 @@ class Mapping: | ... | @@ -1170,6 +1060,10 @@ class Mapping: |
1170 | 1060 | ||
1171 | 1061 | ||
1172 | class Pipeline: | 1062 | class Pipeline: |
1063 | + """ | ||
1064 | + The RNANet pipeline steps. | ||
1065 | + """ | ||
1066 | + | ||
1173 | def __init__(self): | 1067 | def __init__(self): |
1174 | self.dl = Downloader() | 1068 | self.dl = Downloader() |
1175 | self.known_issues = [] # list of chain_labels to ignore | 1069 | self.known_issues = [] # list of chain_labels to ignore |
... | @@ -1189,6 +1083,7 @@ class Pipeline: | ... | @@ -1189,6 +1083,7 @@ class Pipeline: |
1189 | self.REUSE_ALL = False | 1083 | self.REUSE_ALL = False |
1190 | self.REDUNDANT = False | 1084 | self.REDUNDANT = False |
1191 | self.ALIGNOPTS = None | 1085 | self.ALIGNOPTS = None |
1086 | + self.RRNAALIGNOPTS = "--mxsize 8192 --cpu 10 --maxtau 0.1" | ||
1192 | self.STATSOPTS = None | 1087 | self.STATSOPTS = None |
1193 | self.USESINA = False | 1088 | self.USESINA = False |
1194 | self.SELECT_ONLY = None | 1089 | self.SELECT_ONLY = None |
... | @@ -1207,7 +1102,7 @@ class Pipeline: | ... | @@ -1207,7 +1102,7 @@ class Pipeline: |
1207 | 1102 | ||
1208 | try: | 1103 | try: |
1209 | opts, _ = getopt.getopt(sys.argv[1:], "r:fhs", ["help", "resolution=", "3d-folder=", "seq-folder=", "keep-hetatm=", | 1104 | opts, _ = getopt.getopt(sys.argv[1:], "r:fhs", ["help", "resolution=", "3d-folder=", "seq-folder=", "keep-hetatm=", |
1210 | - "only=", "cmalign-opts=", "stats-opts=", "maxcores=", "sina", "from-scratch", | 1105 | + "only=", "cmalign-opts=", "cmalign-rrna-opts=", "stats-opts=", "maxcores=", "sina", "from-scratch", |
1211 | "full-inference", "no-homology", "redundant", "ignore-issues", "extract", | 1106 | "full-inference", "no-homology", "redundant", "ignore-issues", "extract", |
1212 | "all", "no-logs", "archive", "update-homologous", "version"]) | 1107 | "all", "no-logs", "archive", "update-homologous", "version"]) |
1213 | except getopt.GetoptError as err: | 1108 | except getopt.GetoptError as err: |
... | @@ -1323,6 +1218,8 @@ class Pipeline: | ... | @@ -1323,6 +1218,8 @@ class Pipeline: |
1323 | self.REUSE_ALL = True | 1218 | self.REUSE_ALL = True |
1324 | elif opt == "cmalign-opts": | 1219 | elif opt == "cmalign-opts": |
1325 | self.ALIGNOPTS = arg | 1220 | self.ALIGNOPTS = arg |
1221 | + elif opt == "cmalign-rrna-opts": | ||
1222 | + self.RRNAALIGNOPTS = arg | ||
1326 | elif opt == "stats-opts": | 1223 | elif opt == "stats-opts": |
1327 | self.STATSOPTS = " ".split(arg) | 1224 | self.STATSOPTS = " ".split(arg) |
1328 | elif opt == "--all": | 1225 | elif opt == "--all": |
... | @@ -1382,7 +1279,7 @@ class Pipeline: | ... | @@ -1382,7 +1279,7 @@ class Pipeline: |
1382 | # If self.FULLINFERENCE is False, the extended list is already filtered to remove | 1279 | # If self.FULLINFERENCE is False, the extended list is already filtered to remove |
1383 | # the chains which already are in the database. | 1280 | # the chains which already are in the database. |
1384 | print("> Building list of structures...", flush=True) | 1281 | print("> Building list of structures...", flush=True) |
1385 | - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=ncores) | 1282 | + p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=ncores) |
1386 | try: | 1283 | try: |
1387 | 1284 | ||
1388 | pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, | 1285 | pbar = tqdm(full_structures_list, maxinterval=1.0, miniters=1, |
... | @@ -1491,7 +1388,7 @@ class Pipeline: | ... | @@ -1491,7 +1388,7 @@ class Pipeline: |
1491 | else: | 1388 | else: |
1492 | mmcif_list = sorted(set([c.pdb_id for c in self.update])) | 1389 | mmcif_list = sorted(set([c.pdb_id for c in self.update])) |
1493 | try: | 1390 | try: |
1494 | - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=int(coeff_ncores*ncores)) | 1391 | + p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=int(coeff_ncores*ncores)) |
1495 | pbar = tqdm(mmcif_list, maxinterval=1.0, miniters=1, desc="mmCIF files") | 1392 | pbar = tqdm(mmcif_list, maxinterval=1.0, miniters=1, desc="mmCIF files") |
1496 | for _ in p.imap_unordered(work_mmcif, mmcif_list, chunksize=1): | 1393 | for _ in p.imap_unordered(work_mmcif, mmcif_list, chunksize=1): |
1497 | pbar.update(1) # Everytime the iteration finishes, update the global progress bar | 1394 | pbar.update(1) # Everytime the iteration finishes, update the global progress bar |
... | @@ -1634,7 +1531,11 @@ class Pipeline: | ... | @@ -1634,7 +1531,11 @@ class Pipeline: |
1634 | joblist = [] | 1531 | joblist = [] |
1635 | for f in self.fam_list: | 1532 | for f in self.fam_list: |
1636 | # the function already uses all CPUs so launch them one by one (how_many_in_parallel=1) | 1533 | # the function already uses all CPUs so launch them one by one (how_many_in_parallel=1) |
1637 | - joblist.append(Job(function=work_realign, args=[self.USESINA, self.ALIGNOPTS, f], how_many_in_parallel=1, label=f)) | 1534 | + if f in LSU_set or f in SSU_set: |
1535 | + opts = self.RRNAALIGNOPTS | ||
1536 | + else: | ||
1537 | + opts = self.ALIGNOPTS | ||
1538 | + joblist.append(Job(function=work_realign, args=[self.USESINA, opts, f], how_many_in_parallel=1, label=f)) | ||
1638 | 1539 | ||
1639 | # Execute the jobs | 1540 | # Execute the jobs |
1640 | try: | 1541 | try: |
... | @@ -1684,7 +1585,7 @@ class Pipeline: | ... | @@ -1684,7 +1585,7 @@ class Pipeline: |
1684 | 1585 | ||
1685 | # Start a process pool to dispatch the RNA families, | 1586 | # Start a process pool to dispatch the RNA families, |
1686 | # over multiple CPUs (one family by CPU) | 1587 | # over multiple CPUs (one family by CPU) |
1687 | - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | 1588 | + p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=nworkers) |
1688 | 1589 | ||
1689 | try: | 1590 | try: |
1690 | fam_pbar = tqdm(total=len(self.fam_list), desc="RNA families", position=0, leave=True) | 1591 | fam_pbar = tqdm(total=len(self.fam_list), desc="RNA families", position=0, leave=True) |
... | @@ -1741,7 +1642,7 @@ class Pipeline: | ... | @@ -1741,7 +1642,7 @@ class Pipeline: |
1741 | os.makedirs(path_to_3D_data + "datapoints/") | 1642 | os.makedirs(path_to_3D_data + "datapoints/") |
1742 | 1643 | ||
1743 | # Save to by-chain CSV files | 1644 | # Save to by-chain CSV files |
1744 | - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=3) | 1645 | + p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=3) |
1745 | try: | 1646 | try: |
1746 | pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True) | 1647 | pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True) |
1747 | for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains)): | 1648 | for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains)): |
... | @@ -1867,6 +1768,7 @@ class Pipeline: | ... | @@ -1867,6 +1768,7 @@ class Pipeline: |
1867 | 1768 | ||
1868 | conn.close() | 1769 | conn.close() |
1869 | 1770 | ||
1771 | +# ==================== General helper functions ===================== | ||
1870 | 1772 | ||
1871 | def read_cpu_number(): | 1773 | def read_cpu_number(): |
1872 | """This function reads the number of CPU cores available from /proc/cpuinfo. | 1774 | """This function reads the number of CPU cores available from /proc/cpuinfo. |
... | @@ -1876,13 +1778,29 @@ def read_cpu_number(): | ... | @@ -1876,13 +1778,29 @@ def read_cpu_number(): |
1876 | p = subprocess.run(['grep', '-Ec', '(Intel|AMD)', '/proc/cpuinfo'], stdout=subprocess.PIPE) | 1778 | p = subprocess.run(['grep', '-Ec', '(Intel|AMD)', '/proc/cpuinfo'], stdout=subprocess.PIPE) |
1877 | return int(int(p.stdout.decode('utf-8')[:-1])/2) | 1779 | return int(int(p.stdout.decode('utf-8')[:-1])/2) |
1878 | 1780 | ||
1879 | -def init_worker(tqdm_lock=None): | 1781 | +def init_with_tqdm(tqdm_lock=None): |
1782 | + """ | ||
1783 | + This initiation method kills the children when signal is received, | ||
1784 | + and the children progress is followed using TQDM progress bars. | ||
1785 | + """ | ||
1880 | signal.signal(signal.SIGINT, signal.SIG_IGN) | 1786 | signal.signal(signal.SIGINT, signal.SIG_IGN) |
1881 | if tqdm_lock is not None: | 1787 | if tqdm_lock is not None: |
1882 | tqdm.set_lock(tqdm_lock) | 1788 | tqdm.set_lock(tqdm_lock) |
1883 | 1789 | ||
1790 | +def init_no_tqdm(arg1, arg2, arg3): | ||
1791 | + """ | ||
1792 | + This initiaiton method does not kill the children when signal is received, | ||
1793 | + they will complete and die even after the main process stops. | ||
1794 | + The children progress is followed using stdout text logs (notify(), warn(), etc) | ||
1795 | + """ | ||
1796 | + global n_launched, n_finished, n_skipped | ||
1797 | + n_launched = arg1 | ||
1798 | + n_finished = arg2 | ||
1799 | + n_skipped = arg3 | ||
1800 | + | ||
1884 | def warn(message, error=False): | 1801 | def warn(message, error=False): |
1885 | - """Pretty-print warnings and error messages. | 1802 | + """ |
1803 | + Pretty-print warnings and error messages. | ||
1886 | """ | 1804 | """ |
1887 | # Cut if too long | 1805 | # Cut if too long |
1888 | if len(message) > 66: | 1806 | if len(message) > 66: |
... | @@ -1900,20 +1818,133 @@ def warn(message, error=False): | ... | @@ -1900,20 +1818,133 @@ def warn(message, error=False): |
1900 | print(f"\t> \033[33mWARN: {message:64s}\033[0m\t{warnsymb}", flush=True) | 1818 | print(f"\t> \033[33mWARN: {message:64s}\033[0m\t{warnsymb}", flush=True) |
1901 | 1819 | ||
1902 | def notify(message, post=''): | 1820 | def notify(message, post=''): |
1821 | + """ | ||
1822 | + Pretty-print successful finished tasks. | ||
1823 | + """ | ||
1903 | if len(post): | 1824 | if len(post): |
1904 | post = '(' + post + ')' | 1825 | post = '(' + post + ')' |
1905 | print(f"\t> {message:70s}\t{validsymb}\t{post}", flush=True) | 1826 | print(f"\t> {message:70s}\t{validsymb}\t{post}", flush=True) |
1906 | 1827 | ||
1907 | -def _mutable_SeqIO_to_alignment_iterator(handle): | 1828 | +# ========================= Biopython overloads ===================== |
1908 | - records = list(MutableFastaIterator(handle)) | ||
1909 | - if records: | ||
1910 | - yield MultipleSeqAlignment(records) | ||
1911 | 1829 | ||
1912 | -def parse(handle): | 1830 | +def save_mmcif(ioobj, out_file, select=Select(), preserve_atom_numbering=False): |
1913 | - with open(handle, 'r') as fp: | 1831 | + """ |
1914 | - yield from _mutable_SeqIO_to_alignment_iterator(fp) | 1832 | + MMCIF writer which renumbers residues according to the RNANet index_chain (coming from DSSR). |
1833 | + """ | ||
1834 | + | ||
1835 | + if isinstance(out_file, str): | ||
1836 | + fp = open(out_file, "w") | ||
1837 | + close_file = True | ||
1838 | + else: | ||
1839 | + fp = out_file | ||
1840 | + close_file = False | ||
1841 | + atom_dict = defaultdict(list) | ||
1842 | + | ||
1843 | + # Iterate on models | ||
1844 | + for model in ioobj.structure.get_list(): | ||
1845 | + if not select.accept_model(model): | ||
1846 | + continue | ||
1847 | + | ||
1848 | + # mmCIF files with a single model have it specified as model 1 | ||
1849 | + if model.serial_num == 0: | ||
1850 | + model_n = "1" | ||
1851 | + else: | ||
1852 | + model_n = str(model.serial_num) | ||
1853 | + | ||
1854 | + # This is used to write label_entity_id and label_asym_id and | ||
1855 | + # increments from 1, changing with each molecule | ||
1856 | + entity_id = 0 | ||
1857 | + if not preserve_atom_numbering: | ||
1858 | + atom_number = 1 | ||
1859 | + | ||
1860 | + # Iterate on chains | ||
1861 | + for chain in model.get_list(): | ||
1862 | + if not select.accept_chain(chain): | ||
1863 | + continue | ||
1864 | + chain_id = chain.get_id() | ||
1865 | + if chain_id == " ": | ||
1866 | + chain_id = "." | ||
1867 | + | ||
1868 | + # This is used to write label_seq_id, remaining blank for hetero residues | ||
1869 | + prev_residue_type = "" | ||
1870 | + prev_resname = "" | ||
1871 | + | ||
1872 | + # Iterate on residues | ||
1873 | + for residue in chain.get_unpacked_list(): | ||
1874 | + if not select.accept_residue(residue): | ||
1875 | + continue | ||
1876 | + hetfield, resseq, icode = residue.get_id() | ||
1877 | + if hetfield == " ": | ||
1878 | + residue_type = "ATOM" | ||
1879 | + label_seq_id = str(resseq) | ||
1880 | + else: | ||
1881 | + residue_type = "HETATM" | ||
1882 | + label_seq_id = "." | ||
1883 | + resseq = str(resseq) | ||
1884 | + if icode == " ": | ||
1885 | + icode = "?" | ||
1886 | + resname = residue.get_resname() | ||
1887 | + | ||
1888 | + # Check if the molecule changes within the chain. | ||
1889 | + # This will always increment for the first residue in a | ||
1890 | + # chain due to the starting values above | ||
1891 | + if residue_type != prev_residue_type or (residue_type == "HETATM" and resname != prev_resname): | ||
1892 | + entity_id += 1 | ||
1893 | + prev_residue_type = residue_type | ||
1894 | + prev_resname = resname | ||
1895 | + label_asym_id = ioobj._get_label_asym_id(entity_id) | ||
1896 | + | ||
1897 | + # Iterate on atoms | ||
1898 | + for atom in residue.get_unpacked_list(): | ||
1899 | + if select.accept_atom(atom): | ||
1900 | + atom_dict["_atom_site.group_PDB"].append(residue_type) | ||
1901 | + if preserve_atom_numbering: | ||
1902 | + atom_number = atom.get_serial_number() | ||
1903 | + atom_dict["_atom_site.id"].append(str(atom_number)) | ||
1904 | + if not preserve_atom_numbering: | ||
1905 | + atom_number += 1 | ||
1906 | + element = atom.element.strip() | ||
1907 | + if element == "": | ||
1908 | + element = "?" | ||
1909 | + atom_dict["_atom_site.type_symbol"].append(element) | ||
1910 | + atom_dict["_atom_site.label_atom_id"].append(atom.get_name().strip()) | ||
1911 | + altloc = atom.get_altloc() | ||
1912 | + if altloc == " ": | ||
1913 | + altloc = "." | ||
1914 | + atom_dict["_atom_site.label_alt_id"].append(altloc) | ||
1915 | + atom_dict["_atom_site.label_comp_id"].append(resname.strip()) | ||
1916 | + atom_dict["_atom_site.label_asym_id"].append(label_asym_id) | ||
1917 | + # The entity ID should be the same for similar chains | ||
1918 | + # However this is non-trivial to calculate so we write "?" | ||
1919 | + atom_dict["_atom_site.label_entity_id"].append("?") | ||
1920 | + atom_dict["_atom_site.label_seq_id"].append(label_seq_id) | ||
1921 | + atom_dict["_atom_site.pdbx_PDB_ins_code"].append(icode) | ||
1922 | + coord = atom.get_coord() | ||
1923 | + atom_dict["_atom_site.Cartn_x"].append("%.3f" % coord[0]) | ||
1924 | + atom_dict["_atom_site.Cartn_y"].append("%.3f" % coord[1]) | ||
1925 | + atom_dict["_atom_site.Cartn_z"].append("%.3f" % coord[2]) | ||
1926 | + atom_dict["_atom_site.occupancy"].append(str(atom.get_occupancy())) | ||
1927 | + atom_dict["_atom_site.B_iso_or_equiv"].append(str(atom.get_bfactor()) ) | ||
1928 | + atom_dict["_atom_site.auth_seq_id"].append(resseq) | ||
1929 | + atom_dict["_atom_site.auth_asym_id"].append(chain_id) | ||
1930 | + atom_dict["_atom_site.pdbx_PDB_model_num"].append(model_n) | ||
1931 | + | ||
1932 | + # Data block name is the structure ID with special characters removed | ||
1933 | + structure_id = ioobj.structure.id | ||
1934 | + for c in ["#", "$", "'", '"', "[", "]", " ", "\t", "\n"]: | ||
1935 | + structure_id = structure_id.replace(c, "") | ||
1936 | + atom_dict["data_"] = structure_id | ||
1937 | + | ||
1938 | + # Set the dictionary and write out using the generic dictionary method | ||
1939 | + ioobj.dic = atom_dict | ||
1940 | + ioobj._save_dict(fp) | ||
1941 | + if close_file: | ||
1942 | + fp.close() | ||
1915 | 1943 | ||
1916 | def read(handle): | 1944 | def read(handle): |
1945 | + """ | ||
1946 | + A shortcut to parse alignment files with our custom class MutableFastaIterator. | ||
1947 | + """ | ||
1917 | iterator = parse(handle) | 1948 | iterator = parse(handle) |
1918 | try: | 1949 | try: |
1919 | alignment = next(iterator) | 1950 | alignment = next(iterator) |
... | @@ -1926,6 +1957,25 @@ def read(handle): | ... | @@ -1926,6 +1957,25 @@ def read(handle): |
1926 | pass | 1957 | pass |
1927 | return alignment | 1958 | return alignment |
1928 | 1959 | ||
1960 | +def parse(handle): | ||
1961 | + """ | ||
1962 | + A shortcut to parse alignment files with our custom class MutableFastaIterator. | ||
1963 | + Called by function read(). | ||
1964 | + """ | ||
1965 | + with open(handle, 'r') as fp: | ||
1966 | + yield from _mutable_SeqIO_to_alignment_iterator(fp) | ||
1967 | + | ||
1968 | +def _mutable_SeqIO_to_alignment_iterator(handle): | ||
1969 | + """ | ||
1970 | + A shortcut to parse alignment files with our custom class MutableFastaIterator. | ||
1971 | + Used by the parse() function. | ||
1972 | + """ | ||
1973 | + records = list(MutableFastaIterator(handle)) | ||
1974 | + if records: | ||
1975 | + yield MultipleSeqAlignment(records) | ||
1976 | + | ||
1977 | +# ========================== SQL related ============================ | ||
1978 | + | ||
1929 | def sql_define_tables(conn): | 1979 | def sql_define_tables(conn): |
1930 | conn.executescript( | 1980 | conn.executescript( |
1931 | """ PRAGMA foreign_keys = on; | 1981 | """ PRAGMA foreign_keys = on; |
... | @@ -2085,12 +2135,19 @@ def sql_execute(conn, sql, many=False, data=None, warn_every=10): | ... | @@ -2085,12 +2135,19 @@ def sql_execute(conn, sql, many=False, data=None, warn_every=10): |
2085 | time.sleep(0.2) | 2135 | time.sleep(0.2) |
2086 | warn("Tried to reach database 100 times and failed. Aborting.", error=True) | 2136 | warn("Tried to reach database 100 times and failed. Aborting.", error=True) |
2087 | 2137 | ||
2138 | +# ======================= RNANet Jobs and tasks ====================== | ||
2139 | + | ||
2088 | @trace_unhandled_exceptions | 2140 | @trace_unhandled_exceptions |
2089 | def execute_job(j, jobcount): | 2141 | def execute_job(j, jobcount): |
2090 | - """Run a Job object. | ||
2091 | """ | 2142 | """ |
2143 | + Run a Job object. | ||
2144 | + """ | ||
2145 | + | ||
2146 | + global n_launched, n_skipped, n_finished | ||
2147 | + | ||
2092 | # increase the counter of running jobs | 2148 | # increase the counter of running jobs |
2093 | - running_stats[0] += 1 | 2149 | + with n_launched.get_lock(): |
2150 | + n_launched.value += 1 | ||
2094 | 2151 | ||
2095 | # Monitor this process | 2152 | # Monitor this process |
2096 | m = -1 | 2153 | m = -1 |
... | @@ -2098,7 +2155,7 @@ def execute_job(j, jobcount): | ... | @@ -2098,7 +2155,7 @@ def execute_job(j, jobcount): |
2098 | 2155 | ||
2099 | if len(j.cmd_): # The job is a system command | 2156 | if len(j.cmd_): # The job is a system command |
2100 | 2157 | ||
2101 | - print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.label}") | 2158 | + print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.label}") |
2102 | 2159 | ||
2103 | # Add the command to logfile | 2160 | # Add the command to logfile |
2104 | os.makedirs(runDir+"/logs", exist_ok=True) | 2161 | os.makedirs(runDir+"/logs", exist_ok=True) |
... | @@ -2114,9 +2171,20 @@ def execute_job(j, jobcount): | ... | @@ -2114,9 +2171,20 @@ def execute_job(j, jobcount): |
2114 | 2171 | ||
2115 | # run the command. subprocess.run will be a child of this process, and stays monitored. | 2172 | # run the command. subprocess.run will be a child of this process, and stays monitored. |
2116 | start_time = time.time() | 2173 | start_time = time.time() |
2117 | - r = subprocess.run(j.cmd_, timeout=j.timeout_, | 2174 | + r = subprocess.run(j.cmd_, timeout=j.timeout_, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
2118 | - stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
2119 | end_time = time.time() | 2175 | end_time = time.time() |
2176 | + if r.returncode != 0: | ||
2177 | + if r.stderr is not None: | ||
2178 | + print(r.stderr, flush=True) | ||
2179 | + print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\tIssue faced with {j.label}, skipping it and adding it to known issues (if not known).") | ||
2180 | + with n_launched.get_lock(): | ||
2181 | + n_launched.value -= 1 | ||
2182 | + with n_skipped.get_lock(): | ||
2183 | + n_skipped.value += 1 | ||
2184 | + if j.label not in issues: | ||
2185 | + issues.add(j.label) | ||
2186 | + with open("known_issues.txt", "a") as iss: | ||
2187 | + iss.write(j.label+"\n") | ||
2120 | 2188 | ||
2121 | # Stop the Monitor, then get its result | 2189 | # Stop the Monitor, then get its result |
2122 | monitor.keep_watching = False | 2190 | monitor.keep_watching = False |
... | @@ -2124,7 +2192,7 @@ def execute_job(j, jobcount): | ... | @@ -2124,7 +2192,7 @@ def execute_job(j, jobcount): |
2124 | 2192 | ||
2125 | elif j.func_ is not None: | 2193 | elif j.func_ is not None: |
2126 | 2194 | ||
2127 | - print(f"[{running_stats[0]+running_stats[2]}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if type(a) != list])})", flush=True) | 2195 | + print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_ if type(a) != list])})", flush=True) |
2128 | 2196 | ||
2129 | with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: | 2197 | with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: |
2130 | # put the monitor in a different thread | 2198 | # put the monitor in a different thread |
... | @@ -2193,7 +2261,7 @@ def execute_joblist(fulljoblist): | ... | @@ -2193,7 +2261,7 @@ def execute_joblist(fulljoblist): |
2193 | 2261 | ||
2194 | print("using", n, "processes:") | 2262 | print("using", n, "processes:") |
2195 | # execute jobs of priority i that should be processed n by n: | 2263 | # execute jobs of priority i that should be processed n by n: |
2196 | - p = Pool(processes=n, maxtasksperchild=1, initializer=init_worker) | 2264 | + p = Pool(processes=n, maxtasksperchild=1, initializer=init_no_tqdm, initargs=(n_launched, n_finished, n_skipped)) |
2197 | try: | 2265 | try: |
2198 | raw_results = p.map(partial(execute_job, jobcount=jobcount), bunch, chunksize=2) | 2266 | raw_results = p.map(partial(execute_job, jobcount=jobcount), bunch, chunksize=2) |
2199 | p.close() | 2267 | p.close() |
... | @@ -2207,7 +2275,11 @@ def execute_joblist(fulljoblist): | ... | @@ -2207,7 +2275,11 @@ def execute_joblist(fulljoblist): |
2207 | for j, r in zip(bunch, raw_results): | 2275 | for j, r in zip(bunch, raw_results): |
2208 | j.comp_time = round(r[0], 2) # seconds | 2276 | j.comp_time = round(r[0], 2) # seconds |
2209 | j.max_mem = int(r[1]/1000000) # MB | 2277 | j.max_mem = int(r[1]/1000000) # MB |
2210 | - results.append((j.label, r[2], round(r[0], 2), int(r[1]/1000000))) | 2278 | + results.append((j.label, r[2], j.comp_time, j.max_mem)) |
2279 | + | ||
2280 | + # Job is finished | ||
2281 | + with n_finished.get_lock(): | ||
2282 | + n_finished.value += 1 | ||
2211 | 2283 | ||
2212 | # throw back the money | 2284 | # throw back the money |
2213 | return results | 2285 | return results |
... | @@ -2679,8 +2751,8 @@ def use_infernal(rfam_acc, alignopts): | ... | @@ -2679,8 +2751,8 @@ def use_infernal(rfam_acc, alignopts): |
2679 | 2751 | ||
2680 | # Convert Stockholm to aligned FASTA | 2752 | # Convert Stockholm to aligned FASTA |
2681 | subprocess.run(["esl-reformat", "-o", path_to_seq_data + f"realigned/{rfam_acc}++.afa", | 2753 | subprocess.run(["esl-reformat", "-o", path_to_seq_data + f"realigned/{rfam_acc}++.afa", |
2682 | - "--informat", "stockholm", | 2754 | + "--informat", "stockholm", |
2683 | - "afa", path_to_seq_data + f"realigned/{rfam_acc}++.stk"]) | 2755 | + "afa", path_to_seq_data + f"realigned/{rfam_acc}++.stk"]) |
2684 | subprocess.run(["rm", "-f", "esltmp*"]) # We can use a joker here, because we are not running in parallel for this part. | 2756 | subprocess.run(["rm", "-f", "esltmp*"]) # We can use a joker here, because we are not running in parallel for this part. |
2685 | 2757 | ||
2686 | @trace_unhandled_exceptions | 2758 | @trace_unhandled_exceptions |
... | @@ -3037,6 +3109,8 @@ def work_save(c, homology=True): | ... | @@ -3037,6 +3109,8 @@ def work_save(c, homology=True): |
3037 | 3109 | ||
3038 | df.to_csv(filename, float_format="%.2f", index=False) | 3110 | df.to_csv(filename, float_format="%.2f", index=False) |
3039 | 3111 | ||
3112 | +# =========================== Main function ============================= | ||
3113 | + | ||
3040 | if __name__ == "__main__": | 3114 | if __name__ == "__main__": |
3041 | 3115 | ||
3042 | fileDir = os.path.dirname(os.path.realpath(__file__)) | 3116 | fileDir = os.path.dirname(os.path.realpath(__file__)) | ... | ... |
... | @@ -7,6 +7,15 @@ In `cmalign` alignments, - means a nucleotide is missing compared to the covaria | ... | @@ -7,6 +7,15 @@ In `cmalign` alignments, - means a nucleotide is missing compared to the covaria |
7 | 7 | ||
8 | In the final filtered alignment that we provide for download, the same rule applies, but on top of that, some '.' are replaced by '-' when a gap in the 3D structure (a missing, unresolved nucleotide) is mapped to an insertion gap. | 8 | In the final filtered alignment that we provide for download, the same rule applies, but on top of that, some '.' are replaced by '-' when a gap in the 3D structure (a missing, unresolved nucleotide) is mapped to an insertion gap. |
9 | 9 | ||
10 | +* **What are the cmalign options for ?** | ||
11 | + | ||
12 | +From Infernal's user guide, we can quote that Infernal uses an HMM banding technique to accelerate alignment by default. It also takes care of 3' or 5' truncated sequences to be aligned correctly (and we have some). | ||
13 | +First, one can choose an algorithm, between `--optacc` (maximizing posterior probabilities, the default) and `--cyk` (maximizing likelihood). | ||
14 | + | ||
15 | +Then, the use of bands allows faster and more memory efficient computation, at the price of the guarantee of determining the optimal alignment. Bands can be disabled using the `--nonbanded` option. A best idea would be to control the threshold of probability mass to be considered negligible during HMM band calculation with the `--tau` parameter. Higher values of Tau yield greater speedups and lower memory usage, but a greater chance to miss the optimal alignment. In practice, the algorithm explores several Tau values (increasing it by a factor 2.0 from the original `--tau` value) until the DP matrix size falls below the threshold given by `--mxsize` (default 1028 Mb) or the value of `--maxtau` is reached (in this case, the program fails). One can disable this exploration with option `--fixedtau`. The default value of `--tau` is 1e-7, the default `--maxtau` is 0.05. Basically, you may decide on a value of `--mxsize` by dividing your available RAM by the number of cores used with cmalign. If necessary, you may use less cores than you have, using option `--cpu`. | ||
16 | + | ||
17 | +Finally, if using `--cyk --nonbanded --notrunc --noprob`, one can use the `--small` option to align using the divide-and-conquer CYK algorithm from Eddy 2002, requiring a very few memory but a lot of time. The major drawback of this is that it requires `--notrunc` and `--noprob`, so we give up on the correct alignment of truncated sequences, and the computation of posterior probabilities. | ||
18 | + | ||
10 | * **Why are there some gap-only columns in the alignment ?** | 19 | * **Why are there some gap-only columns in the alignment ?** |
11 | 20 | ||
12 | These columns are not completely gap-only, they contain at least one dash-gap '-'. This means an actual, physical nucleotide which should exist in the 3D structure should be located there. The previous and following nucleotides are **not** contiguous in space in 3D. | 21 | These columns are not completely gap-only, they contain at least one dash-gap '-'. This means an actual, physical nucleotide which should exist in the 3D structure should be located there. The previous and following nucleotides are **not** contiguous in space in 3D. |
... | @@ -31,5 +40,5 @@ We first remove the nucleotides whose number is outside the family mapping (if a | ... | @@ -31,5 +40,5 @@ We first remove the nucleotides whose number is outside the family mapping (if a |
31 | 40 | ||
32 | * **What are the versions of the dependencies you use ?** | 41 | * **What are the versions of the dependencies you use ?** |
33 | 42 | ||
34 | -`cmalign` is v1.1.4, `sina` is v1.6.0, `x3dna-dssr` is v1.9.9, Biopython is v1.78. | 43 | +`cmalign` is v1.1.4, `sina` is v1.6.0, `x3dna-dssr` is v2.3.2-2021jun29, Biopython is v1.78. |
35 | 44 | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -6,23 +6,16 @@ | ... | @@ -6,23 +6,16 @@ |
6 | * Some chains are not correctly renamed A in the produced separate files (e.g. 1d4r-B) | 6 | * Some chains are not correctly renamed A in the produced separate files (e.g. 1d4r-B) |
7 | 7 | ||
8 | ## Alignment issues | 8 | ## Alignment issues |
9 | -* [SOLVED] Filtered alignments are shorter than the number of alignment columns saved to the SQL table `align_column` | ||
10 | * Chain names appear in triple in the FASTA header (e.g. 1d4r[1]-B 1d4r[1]-B 1d4r[1]-B) | 9 | * Chain names appear in triple in the FASTA header (e.g. 1d4r[1]-B 1d4r[1]-B 1d4r[1]-B) |
11 | 10 | ||
12 | -## Technical running issues | ||
13 | -* [SOLVED] Files produced by Docker containers are owned by root and require root permissions to be read | ||
14 | -* [SOLVED] SQLite WAL files are not deleted properly | ||
15 | - | ||
16 | # Known feature requests | 11 | # Known feature requests |
17 | -* [DONE] Get filtered versions of the sequence alignments containing the 3D chains, publicly available for download | 12 | +* Automated annotation of detected Recurrent Interaction Networks (RINs), see http://carnaval.lri.fr/ . |
18 | -* [DONE] Get a consensus residue for each alignement column | 13 | +* Possibly, automated detection of HLs and ILs from the 3D Motif Atlas (BGSU). Maybe. Their own website already does the job. |
19 | -* [DONE] Get an option to limit the number of cores | 14 | +* Weight sequences in alignment to give more importance to rarer sequences |
20 | -* [DONE] Move to SILVA LSU release 138.1 | 15 | +* Give both gap_percent and insertion_gap_percent |
21 | -* [UPCOMING] Automated annotation of detected Recurrent Interaction Networks (RINs), see http://carnaval.lri.fr/ . | ||
22 | -* [UPCOMING] Possibly, automated detection of HLs and ILs from the 3D Motif Atlas (BGSU). Maybe. Their own website already does the job. | ||
23 | -* [UPCOMING] Weight sequences in alignment to give more importance to rarer sequences | ||
24 | -* [UPCOMING] Give both gap_percent and insertion_gap_percent | ||
25 | * A field estimating the quality of the sequence alignment in table family. | 16 | * A field estimating the quality of the sequence alignment in table family. |
26 | * Possibly, more metrics about the alignments coming from Infernal. | 17 | * Possibly, more metrics about the alignments coming from Infernal. |
27 | * Run cmscan ourselves from the NDB instead of using Rfam-PDB mappings ? (Iff this actually makes a real difference, untested yet) | 18 | * Run cmscan ourselves from the NDB instead of using Rfam-PDB mappings ? (Iff this actually makes a real difference, untested yet) |
28 | * Use and save Infernal alignment bounds and truncation information | 19 | * Use and save Infernal alignment bounds and truncation information |
20 | +* Save if a chain is a representative in BGSU list | ||
21 | +* Annotate unstructured regions (on a nucleotide basis) | ... | ... |
geometric_stats.py
0 → 100644
This diff could not be displayed because it is too large.
1 | 6ydp_1_AA_1176-2737 | 1 | 6ydp_1_AA_1176-2737 |
2 | 6ydw_1_AA_1176-2737 | 2 | 6ydw_1_AA_1176-2737 |
3 | 2z9q_1_A_1-72 | 3 | 2z9q_1_A_1-72 |
4 | -1ml5_1_b_5-121 | ||
5 | -1ml5_1_a_1-2914 | ||
6 | 3ep2_1_Y_1-72 | 4 | 3ep2_1_Y_1-72 |
7 | 3eq3_1_Y_1-72 | 5 | 3eq3_1_Y_1-72 |
8 | 4v48_1_A6_1-73 | 6 | 4v48_1_A6_1-73 |
9 | -1ml5_1_A_2-1520 | 7 | +1ml5_1_b_5-121 |
8 | +1ml5_1_a_1-2914 | ||
10 | 1qzb_1_B_1-73 | 9 | 1qzb_1_B_1-73 |
11 | 1qza_1_B_1-73 | 10 | 1qza_1_B_1-73 |
12 | 1ls2_1_B_1-73 | 11 | 1ls2_1_B_1-73 |
12 | +1ml5_1_A_2-1520 | ||
13 | 1gsg_1_T_1-72 | 13 | 1gsg_1_T_1-72 |
14 | 7d1a_1_A_805-902 | 14 | 7d1a_1_A_805-902 |
15 | 7d0g_1_A_805-913 | 15 | 7d0g_1_A_805-913 |
... | @@ -22,15 +22,12 @@ | ... | @@ -22,15 +22,12 @@ |
22 | 2rdo_1_A_3-118 | 22 | 2rdo_1_A_3-118 |
23 | 4v48_1_A9_3-118 | 23 | 4v48_1_A9_3-118 |
24 | 4v47_1_A9_3-118 | 24 | 4v47_1_A9_3-118 |
25 | +4v42_1_BA_1-2914 | ||
26 | +4v42_1_BB_5-121 | ||
25 | 2ob7_1_A_10-319 | 27 | 2ob7_1_A_10-319 |
26 | 1x1l_1_A_1-130 | 28 | 1x1l_1_A_1-130 |
27 | 1zc8_1_Z_1-91 | 29 | 1zc8_1_Z_1-91 |
28 | 2ob7_1_D_1-130 | 30 | 2ob7_1_D_1-130 |
29 | -4v42_1_BA_1-2914 | ||
30 | -4v42_1_BB_5-121 | ||
31 | -1r2x_1_C_1-58 | ||
32 | -1r2w_1_C_1-58 | ||
33 | -1eg0_1_L_1-56 | ||
34 | 3dg2_1_A_1-1542 | 31 | 3dg2_1_A_1-1542 |
35 | 3dg0_1_A_1-1542 | 32 | 3dg0_1_A_1-1542 |
36 | 4v48_1_BA_1-1543 | 33 | 4v48_1_BA_1-1543 |
... | @@ -46,11 +43,14 @@ | ... | @@ -46,11 +43,14 @@ |
46 | 3dg4_1_B_1-2904 | 43 | 3dg4_1_B_1-2904 |
47 | 3dg5_1_B_1-2904 | 44 | 3dg5_1_B_1-2904 |
48 | 1eg0_1_O_1-73 | 45 | 1eg0_1_O_1-73 |
49 | -1zc8_1_A_1-59 | 46 | +1r2x_1_C_1-58 |
47 | +1r2w_1_C_1-58 | ||
48 | +1eg0_1_L_1-56 | ||
50 | 1jgq_1_A_2-1520 | 49 | 1jgq_1_A_2-1520 |
51 | 4v42_1_AA_2-1520 | 50 | 4v42_1_AA_2-1520 |
52 | 1jgo_1_A_2-1520 | 51 | 1jgo_1_A_2-1520 |
53 | 1jgp_1_A_2-1520 | 52 | 1jgp_1_A_2-1520 |
53 | +1zc8_1_A_1-59 | ||
54 | 1mvr_1_D_1-59 | 54 | 1mvr_1_D_1-59 |
55 | 4c9d_1_D_29-1 | 55 | 4c9d_1_D_29-1 |
56 | 4c9d_1_C_29-1 | 56 | 4c9d_1_C_29-1 |
... | @@ -61,12 +61,6 @@ | ... | @@ -61,12 +61,6 @@ |
61 | 3ep2_1_B_1-50 | 61 | 3ep2_1_B_1-50 |
62 | 3eq3_1_B_1-50 | 62 | 3eq3_1_B_1-50 |
63 | 3eq4_1_B_1-50 | 63 | 3eq4_1_B_1-50 |
64 | -3pgw_1_R_1-164 | ||
65 | -3pgw_1_N_1-164 | ||
66 | -3cw1_1_x_1-138 | ||
67 | -3cw1_1_w_1-138 | ||
68 | -3cw1_1_V_1-138 | ||
69 | -3cw1_1_v_1-138 | ||
70 | 2iy3_1_B_9-105 | 64 | 2iy3_1_B_9-105 |
71 | 3jcr_1_N_1-106 | 65 | 3jcr_1_N_1-106 |
72 | 2vaz_1_A_64-177 | 66 | 2vaz_1_A_64-177 |
... | @@ -78,6 +72,12 @@ | ... | @@ -78,6 +72,12 @@ |
78 | 4v5z_1_BY_2-113 | 72 | 4v5z_1_BY_2-113 |
79 | 4v5z_1_BZ_1-70 | 73 | 4v5z_1_BZ_1-70 |
80 | 4v5z_1_B1_2-123 | 74 | 4v5z_1_B1_2-123 |
75 | +3pgw_1_R_1-164 | ||
76 | +3pgw_1_N_1-164 | ||
77 | +3cw1_1_x_1-138 | ||
78 | +3cw1_1_w_1-138 | ||
79 | +3cw1_1_V_1-138 | ||
80 | +3cw1_1_v_1-138 | ||
81 | 1mvr_1_B_1-96 | 81 | 1mvr_1_B_1-96 |
82 | 4adx_1_0_1-2923 | 82 | 4adx_1_0_1-2923 |
83 | 3eq4_1_Y_1-69 | 83 | 3eq4_1_Y_1-69 |
... | @@ -295,7 +295,12 @@ | ... | @@ -295,7 +295,12 @@ |
295 | 6ucq_1_2Y | 295 | 6ucq_1_2Y |
296 | 4w2e_1_X | 296 | 4w2e_1_X |
297 | 6ucq_1_2X | 297 | 6ucq_1_2X |
298 | +7n1p_1_DT | ||
299 | +7n2u_1_DT | ||
298 | 6yss_1_W | 300 | 6yss_1_W |
301 | +7n30_1_DT | ||
302 | +7n31_1_DT | ||
303 | +7n2c_1_DT | ||
299 | 5afi_1_Y | 304 | 5afi_1_Y |
300 | 5uq8_1_Z | 305 | 5uq8_1_Z |
301 | 5wdt_1_Y | 306 | 5wdt_1_Y |
... | @@ -333,6 +338,22 @@ | ... | @@ -333,6 +338,22 @@ |
333 | 4v4j_1_X | 338 | 4v4j_1_X |
334 | 4v4i_1_X | 339 | 4v4i_1_X |
335 | 4v42_1_BB | 340 | 4v42_1_BB |
341 | +4jrc_1_B | ||
342 | +4jrc_1_A | ||
343 | +6lkq_1_S | ||
344 | +5h5u_1_H | ||
345 | +7d6z_1_F | ||
346 | +5lze_1_Y | ||
347 | +5lze_1_V | ||
348 | +5lze_1_X | ||
349 | +3jcj_1_G | ||
350 | +6o7k_1_G | ||
351 | +3dg2_1_A | ||
352 | +3dg0_1_A | ||
353 | +4v48_1_BA | ||
354 | +4v47_1_BA | ||
355 | +3dg4_1_A | ||
356 | +3dg5_1_A | ||
336 | 6d30_1_C | 357 | 6d30_1_C |
337 | 6j7z_1_C | 358 | 6j7z_1_C |
338 | 3er9_1_D | 359 | 3er9_1_D |
... | @@ -437,25 +458,22 @@ | ... | @@ -437,25 +458,22 @@ |
437 | 6doc_1_B | 458 | 6doc_1_B |
438 | 6doe_1_B | 459 | 6doe_1_B |
439 | 6n6g_1_D | 460 | 6n6g_1_D |
440 | -6lkq_1_S | ||
441 | -5h5u_1_H | ||
442 | -7d6z_1_F | ||
443 | -5lze_1_Y | ||
444 | -5lze_1_V | ||
445 | -5lze_1_X | ||
446 | -3jcj_1_G | ||
447 | -6o7k_1_G | ||
448 | -3dg2_1_A | ||
449 | -3dg0_1_A | ||
450 | -4v48_1_BA | ||
451 | -4v47_1_BA | ||
452 | -3dg4_1_A | ||
453 | -3dg5_1_A | ||
454 | 4b3r_1_W | 461 | 4b3r_1_W |
455 | 4b3t_1_W | 462 | 4b3t_1_W |
456 | 4b3s_1_W | 463 | 4b3s_1_W |
464 | +7b5k_1_X | ||
457 | 5o2r_1_X | 465 | 5o2r_1_X |
458 | 5kcs_1_1X | 466 | 5kcs_1_1X |
467 | +7n1p_1_PT | ||
468 | +7n2u_1_PT | ||
469 | +7n30_1_PT | ||
470 | +7n31_1_PT | ||
471 | +7n2c_1_PT | ||
472 | +6yl5_1_I | ||
473 | +6yl5_1_E | ||
474 | +6yl5_1_A | ||
475 | +6yl5_1_K | ||
476 | +6yl5_1_G | ||
459 | 6zvk_1_E2 | 477 | 6zvk_1_E2 |
460 | 6zvk_1_H2 | 478 | 6zvk_1_H2 |
461 | 7a01_1_E2 | 479 | 7a01_1_E2 |
... | @@ -526,6 +544,7 @@ | ... | @@ -526,6 +544,7 @@ |
526 | 6w6l_1_V | 544 | 6w6l_1_V |
527 | 6olf_1_V | 545 | 6olf_1_V |
528 | 3erc_1_G | 546 | 3erc_1_G |
547 | +4qjd_1_D | ||
529 | 6of1_1_1W | 548 | 6of1_1_1W |
530 | 6cae_1_1Y | 549 | 6cae_1_1Y |
531 | 6o97_1_1W | 550 | 6o97_1_1W |
... | @@ -557,7 +576,9 @@ | ... | @@ -557,7 +576,9 @@ |
557 | 4v48_1_A6 | 576 | 4v48_1_A6 |
558 | 2z9q_1_A | 577 | 2z9q_1_A |
559 | 4hot_1_X | 578 | 4hot_1_X |
579 | +5ns4_1_C | ||
560 | 6d2z_1_C | 580 | 6d2z_1_C |
581 | +7eh0_1_I | ||
561 | 4tu0_1_F | 582 | 4tu0_1_F |
562 | 4tu0_1_G | 583 | 4tu0_1_G |
563 | 6r9o_1_B | 584 | 6r9o_1_B |
... | @@ -578,20 +599,23 @@ | ... | @@ -578,20 +599,23 @@ |
578 | 6sv4_1_NC | 599 | 6sv4_1_NC |
579 | 6i7o_1_NB | 600 | 6i7o_1_NB |
580 | 1ml5_1_A | 601 | 1ml5_1_A |
602 | +7nsq_1_V | ||
581 | 6swa_1_Q | 603 | 6swa_1_Q |
582 | 6swa_1_R | 604 | 6swa_1_R |
583 | -3j6x_1_IR | ||
584 | -3j6y_1_IR | ||
585 | 6ole_1_T | 605 | 6ole_1_T |
586 | 6om0_1_T | 606 | 6om0_1_T |
587 | 6oli_1_T | 607 | 6oli_1_T |
588 | 6om7_1_T | 608 | 6om7_1_T |
589 | 6olf_1_T | 609 | 6olf_1_T |
590 | 6w6l_1_T | 610 | 6w6l_1_T |
611 | +6tnu_1_M | ||
612 | +5mc6_1_M | ||
613 | +7nrc_1_SM | ||
591 | 6tb3_1_N | 614 | 6tb3_1_N |
592 | 7b7d_1_SM | 615 | 7b7d_1_SM |
593 | 7b7d_1_SN | 616 | 7b7d_1_SN |
594 | 6tnu_1_N | 617 | 6tnu_1_N |
618 | +7nrc_1_SN | ||
595 | 7nrd_1_SN | 619 | 7nrd_1_SN |
596 | 6zot_1_C | 620 | 6zot_1_C |
597 | 2uxb_1_X | 621 | 2uxb_1_X |
... | @@ -602,6 +626,9 @@ | ... | @@ -602,6 +626,9 @@ |
602 | 1eg0_1_M | 626 | 1eg0_1_M |
603 | 3eq4_1_D | 627 | 3eq4_1_D |
604 | 5o1y_1_B | 628 | 5o1y_1_B |
629 | +4kzy_1_I | ||
630 | +4kzz_1_I | ||
631 | +4kzx_1_I | ||
605 | 3jcr_1_H | 632 | 3jcr_1_H |
606 | 6dzi_1_H | 633 | 6dzi_1_H |
607 | 5zeu_1_A | 634 | 5zeu_1_A |
... | @@ -705,7 +732,6 @@ | ... | @@ -705,7 +732,6 @@ |
705 | 6ip6_1_ZZ | 732 | 6ip6_1_ZZ |
706 | 6uu3_1_333 | 733 | 6uu3_1_333 |
707 | 6uu1_1_333 | 734 | 6uu1_1_333 |
708 | -1pn8_1_D | ||
709 | 3er8_1_H | 735 | 3er8_1_H |
710 | 3er8_1_G | 736 | 3er8_1_G |
711 | 3er8_1_F | 737 | 3er8_1_F |
... | @@ -744,9 +770,8 @@ | ... | @@ -744,9 +770,8 @@ |
744 | 4wtl_1_T | 770 | 4wtl_1_T |
745 | 4wtl_1_P | 771 | 4wtl_1_P |
746 | 1xnq_1_W | 772 | 1xnq_1_W |
747 | -1x18_1_C | 773 | +7n2v_1_DT |
748 | -1x18_1_B | 774 | +4peh_1_Z |
749 | -1x18_1_D | ||
750 | 1vq6_1_4 | 775 | 1vq6_1_4 |
751 | 4am3_1_D | 776 | 4am3_1_D |
752 | 4am3_1_H | 777 | 4am3_1_H |
... | @@ -758,12 +783,45 @@ | ... | @@ -758,12 +783,45 @@ |
758 | 4wtj_1_T | 783 | 4wtj_1_T |
759 | 4wtj_1_P | 784 | 4wtj_1_P |
760 | 4xbf_1_D | 785 | 4xbf_1_D |
786 | +5w1h_1_B | ||
761 | 6n6d_1_D | 787 | 6n6d_1_D |
762 | 6n6k_1_C | 788 | 6n6k_1_C |
763 | 6n6k_1_D | 789 | 6n6k_1_D |
764 | 3rtj_1_D | 790 | 3rtj_1_D |
765 | 6ty9_1_M | 791 | 6ty9_1_M |
766 | 6tz1_1_N | 792 | 6tz1_1_N |
793 | +6q1h_1_D | ||
794 | +6q1h_1_H | ||
795 | +6p7p_1_F | ||
796 | +6p7p_1_E | ||
797 | +6p7p_1_D | ||
798 | +6vm6_1_J | ||
799 | +6vm6_1_G | ||
800 | +6wan_1_K | ||
801 | +6wan_1_H | ||
802 | +6wan_1_G | ||
803 | +6wan_1_L | ||
804 | +6wan_1_I | ||
805 | +6ywo_1_F | ||
806 | +6wan_1_J | ||
807 | +4oau_1_A | ||
808 | +6ywo_1_E | ||
809 | +6ywo_1_K | ||
810 | +6vm6_1_I | ||
811 | +6vm6_1_H | ||
812 | +6ywo_1_I | ||
813 | +2a1r_1_C | ||
814 | +6m6v_1_F | ||
815 | +6m6v_1_E | ||
816 | +2a1r_1_D | ||
817 | +3gpq_1_E | ||
818 | +3gpq_1_F | ||
819 | +6o79_1_C | ||
820 | +6vm6_1_K | ||
821 | +6m6v_1_G | ||
822 | +6hyu_1_D | ||
823 | +1laj_1_R | ||
824 | +6ybv_1_K | ||
767 | 6sce_1_B | 825 | 6sce_1_B |
768 | 6xl1_1_C | 826 | 6xl1_1_C |
769 | 6scf_1_I | 827 | 6scf_1_I |
... | @@ -809,11 +867,12 @@ | ... | @@ -809,11 +867,12 @@ |
809 | 1y1y_1_P | 867 | 1y1y_1_P |
810 | 5zuu_1_I | 868 | 5zuu_1_I |
811 | 5zuu_1_G | 869 | 5zuu_1_G |
870 | +7am2_1_R1 | ||
812 | 4peh_1_W | 871 | 4peh_1_W |
813 | 4peh_1_V | 872 | 4peh_1_V |
814 | 4peh_1_X | 873 | 4peh_1_X |
815 | 4peh_1_Y | 874 | 4peh_1_Y |
816 | -4peh_1_Z | 875 | +7d8c_1_C |
817 | 6mkn_1_W | 876 | 6mkn_1_W |
818 | 7kl3_1_B | 877 | 7kl3_1_B |
819 | 4cxg_1_C | 878 | 4cxg_1_C |
... | @@ -826,14 +885,7 @@ | ... | @@ -826,14 +885,7 @@ |
826 | 4eya_1_F | 885 | 4eya_1_F |
827 | 4eya_1_Q | 886 | 4eya_1_Q |
828 | 4eya_1_R | 887 | 4eya_1_R |
829 | -1qzc_1_B | ||
830 | -1t1o_1_B | ||
831 | 1mvr_1_C | 888 | 1mvr_1_C |
832 | -1t1m_1_B | ||
833 | -1t1o_1_C | ||
834 | -1t1m_1_A | ||
835 | -1t1o_1_A | ||
836 | -2r1g_1_B | ||
837 | 4ht9_1_E | 889 | 4ht9_1_E |
838 | 6z1p_1_AB | 890 | 6z1p_1_AB |
839 | 6z1p_1_AA | 891 | 6z1p_1_AA |
... | @@ -844,11 +896,9 @@ | ... | @@ -844,11 +896,9 @@ |
844 | 5uk4_1_W | 896 | 5uk4_1_W |
845 | 5uk4_1_U | 897 | 5uk4_1_U |
846 | 5f6c_1_E | 898 | 5f6c_1_E |
899 | +7nwh_1_HH | ||
847 | 4rcj_1_B | 900 | 4rcj_1_B |
848 | 1xnr_1_W | 901 | 1xnr_1_W |
849 | -2agn_1_A | ||
850 | -2agn_1_C | ||
851 | -2agn_1_B | ||
852 | 6e0o_1_C | 902 | 6e0o_1_C |
853 | 6o75_1_D | 903 | 6o75_1_D |
854 | 6o75_1_C | 904 | 6o75_1_C |
... | @@ -866,8 +916,7 @@ | ... | @@ -866,8 +916,7 @@ |
866 | 1ibm_1_Z | 916 | 1ibm_1_Z |
867 | 4dr5_1_V | 917 | 4dr5_1_V |
868 | 4d61_1_J | 918 | 4d61_1_J |
869 | -1trj_1_B | 919 | +7nwg_1_Q3 |
870 | -1trj_1_C | ||
871 | 5tbw_1_SR | 920 | 5tbw_1_SR |
872 | 6hhq_1_SR | 921 | 6hhq_1_SR |
873 | 6zvi_1_H | 922 | 6zvi_1_H |
... | @@ -883,6 +932,8 @@ | ... | @@ -883,6 +932,8 @@ |
883 | 5k8h_1_A | 932 | 5k8h_1_A |
884 | 5z4a_1_B | 933 | 5z4a_1_B |
885 | 3jbu_1_V | 934 | 3jbu_1_V |
935 | +4ts2_1_Y | ||
936 | +4ts0_1_Y | ||
886 | 1h2c_1_R | 937 | 1h2c_1_R |
887 | 1h2d_1_S | 938 | 1h2d_1_S |
888 | 1h2d_1_R | 939 | 1h2d_1_R |
... | @@ -909,6 +960,7 @@ | ... | @@ -909,6 +960,7 @@ |
909 | 6ppn_1_I | 960 | 6ppn_1_I |
910 | 5flx_1_Z | 961 | 5flx_1_Z |
911 | 6eri_1_AX | 962 | 6eri_1_AX |
963 | +7k5l_1_R | ||
912 | 7d80_1_Y | 964 | 7d80_1_Y |
913 | 1zc8_1_A | 965 | 1zc8_1_A |
914 | 1zc8_1_C | 966 | 1zc8_1_C |
... | @@ -916,6 +968,7 @@ | ... | @@ -916,6 +968,7 @@ |
916 | 1zc8_1_G | 968 | 1zc8_1_G |
917 | 1zc8_1_I | 969 | 1zc8_1_I |
918 | 1zc8_1_H | 970 | 1zc8_1_H |
971 | +6bfb_1_Y | ||
919 | 1zc8_1_J | 972 | 1zc8_1_J |
920 | 7du2_1_R | 973 | 7du2_1_R |
921 | 4v8z_1_CX | 974 | 4v8z_1_CX |
... | @@ -951,6 +1004,8 @@ | ... | @@ -951,6 +1004,8 @@ |
951 | 4x9e_1_H | 1004 | 4x9e_1_H |
952 | 6z1p_1_BB | 1005 | 6z1p_1_BB |
953 | 6z1p_1_BA | 1006 | 6z1p_1_BA |
1007 | +3p22_1_C | ||
1008 | +3p22_1_G | ||
954 | 2uxd_1_X | 1009 | 2uxd_1_X |
955 | 6ywe_1_BB | 1010 | 6ywe_1_BB |
956 | 3ol9_1_D | 1011 | 3ol9_1_D |
... | @@ -973,8 +1028,6 @@ | ... | @@ -973,8 +1028,6 @@ |
973 | 3ol7_1_H | 1028 | 3ol7_1_H |
974 | 3ol8_1_L | 1029 | 3ol8_1_L |
975 | 3ol8_1_P | 1030 | 3ol8_1_P |
976 | -1qzc_1_C | ||
977 | -1qzc_1_A | ||
978 | 6yrq_1_E | 1031 | 6yrq_1_E |
979 | 6yrq_1_H | 1032 | 6yrq_1_H |
980 | 6yrq_1_G | 1033 | 6yrq_1_G |
... | @@ -1054,6 +1107,7 @@ | ... | @@ -1054,6 +1107,7 @@ |
1054 | 3iy9_1_A | 1107 | 3iy9_1_A |
1055 | 4wtk_1_T | 1108 | 4wtk_1_T |
1056 | 4wtk_1_P | 1109 | 4wtk_1_P |
1110 | +6wlj_3_A | ||
1057 | 1vqn_1_4 | 1111 | 1vqn_1_4 |
1058 | 4oav_1_C | 1112 | 4oav_1_C |
1059 | 4oav_1_A | 1113 | 4oav_1_A |
... | @@ -1070,18 +1124,13 @@ | ... | @@ -1070,18 +1124,13 @@ |
1070 | 3eq3_1_B | 1124 | 3eq3_1_B |
1071 | 3eq4_1_B | 1125 | 3eq4_1_B |
1072 | 4i67_1_B | 1126 | 4i67_1_B |
1073 | -3pgw_1_R | 1127 | +4jf2_1_A |
1074 | -3pgw_1_N | ||
1075 | -3cw1_1_X | ||
1076 | -3cw1_1_W | ||
1077 | -3cw1_1_V | ||
1078 | -7b0y_1_A | ||
1079 | 6k32_1_T | 1128 | 6k32_1_T |
1080 | 6k32_1_P | 1129 | 6k32_1_P |
1081 | 5mmj_1_A | 1130 | 5mmj_1_A |
1082 | 5x8r_1_A | 1131 | 5x8r_1_A |
1083 | -2agn_1_E | 1132 | +3fu2_1_B |
1084 | -2agn_1_D | 1133 | +3fu2_1_A |
1085 | 4v5z_1_BD | 1134 | 4v5z_1_BD |
1086 | 6yw5_1_AA | 1135 | 6yw5_1_AA |
1087 | 6ywe_1_AA | 1136 | 6ywe_1_AA |
... | @@ -1117,6 +1166,17 @@ | ... | @@ -1117,6 +1166,17 @@ |
1117 | 3p6y_1_Q | 1166 | 3p6y_1_Q |
1118 | 3p6y_1_W | 1167 | 3p6y_1_W |
1119 | 5dto_1_B | 1168 | 5dto_1_B |
1169 | +6yml_1_A | ||
1170 | +6ymm_1_A | ||
1171 | +6ymi_1_M | ||
1172 | +6ymi_1_F | ||
1173 | +6ymi_1_A | ||
1174 | +6ylb_1_F | ||
1175 | +6ymi_1_C | ||
1176 | +6ymj_1_C | ||
1177 | +6ylb_1_C | ||
1178 | +6ymj_1_I | ||
1179 | +6ymj_1_O | ||
1120 | 4cxh_1_X | 1180 | 4cxh_1_X |
1121 | 1uvj_1_F | 1181 | 1uvj_1_F |
1122 | 1uvj_1_D | 1182 | 1uvj_1_D |
... | @@ -1153,6 +1213,12 @@ | ... | @@ -1153,6 +1213,12 @@ |
1153 | 4v4f_1_B4 | 1213 | 4v4f_1_B4 |
1154 | 4v4f_1_A6 | 1214 | 4v4f_1_A6 |
1155 | 4v4f_1_B2 | 1215 | 4v4f_1_B2 |
1216 | +7m4y_1_V | ||
1217 | +7m4x_1_V | ||
1218 | +6v3a_1_V | ||
1219 | +6v39_1_V | ||
1220 | +6ck5_1_A | ||
1221 | +6ck5_1_B | ||
1156 | 5it9_1_I | 1222 | 5it9_1_I |
1157 | 7jqc_1_I | 1223 | 7jqc_1_I |
1158 | 5zsb_1_C | 1224 | 5zsb_1_C |
... | @@ -1162,6 +1228,8 @@ | ... | @@ -1162,6 +1228,8 @@ |
1162 | 1cwp_1_D | 1228 | 1cwp_1_D |
1163 | 3jcr_1_N | 1229 | 3jcr_1_N |
1164 | 6gfw_1_R | 1230 | 6gfw_1_R |
1231 | +3j6x_1_IR | ||
1232 | +3j6y_1_IR | ||
1165 | 2vaz_1_A | 1233 | 2vaz_1_A |
1166 | 6zm6_1_X | 1234 | 6zm6_1_X |
1167 | 6zm5_1_X | 1235 | 6zm5_1_X |
... | @@ -1177,11 +1245,11 @@ | ... | @@ -1177,11 +1245,11 @@ |
1177 | 5uh6_1_I | 1245 | 5uh6_1_I |
1178 | 6l74_1_I | 1246 | 6l74_1_I |
1179 | 5uh9_1_I | 1247 | 5uh9_1_I |
1248 | +4v5z_1_BS | ||
1180 | 2ftc_1_R | 1249 | 2ftc_1_R |
1181 | 7a5j_1_X | 1250 | 7a5j_1_X |
1182 | 6sag_1_R | 1251 | 6sag_1_R |
1183 | 4udv_1_R | 1252 | 4udv_1_R |
1184 | -2r1g_1_E | ||
1185 | 5zsc_1_D | 1253 | 5zsc_1_D |
1186 | 5zsc_1_C | 1254 | 5zsc_1_C |
1187 | 6woy_1_I | 1255 | 6woy_1_I |
... | @@ -1209,7 +1277,7 @@ | ... | @@ -1209,7 +1277,7 @@ |
1209 | 3m85_1_X | 1277 | 3m85_1_X |
1210 | 3m85_1_Z | 1278 | 3m85_1_Z |
1211 | 3m85_1_Y | 1279 | 3m85_1_Y |
1212 | -1e8s_1_C | 1280 | +5u34_1_B |
1213 | 5wnp_1_B | 1281 | 5wnp_1_B |
1214 | 5wnv_1_B | 1282 | 5wnv_1_B |
1215 | 5yts_1_B | 1283 | 5yts_1_B |
... | @@ -1232,8 +1300,11 @@ | ... | @@ -1232,8 +1300,11 @@ |
1232 | 6ij2_1_E | 1300 | 6ij2_1_E |
1233 | 3u2e_1_D | 1301 | 3u2e_1_D |
1234 | 3u2e_1_C | 1302 | 3u2e_1_C |
1303 | +7eh1_1_I | ||
1235 | 5uef_1_C | 1304 | 5uef_1_C |
1236 | 5uef_1_D | 1305 | 5uef_1_D |
1306 | +7eh2_1_R | ||
1307 | +7eh2_1_I | ||
1237 | 4x4u_1_H | 1308 | 4x4u_1_H |
1238 | 4afy_1_D | 1309 | 4afy_1_D |
1239 | 6oy5_1_I | 1310 | 6oy5_1_I |
... | @@ -1244,13 +1315,15 @@ | ... | @@ -1244,13 +1315,15 @@ |
1244 | 6s0m_1_C | 1315 | 6s0m_1_C |
1245 | 6ymw_1_C | 1316 | 6ymw_1_C |
1246 | 7a5g_1_J | 1317 | 7a5g_1_J |
1318 | +1m5k_1_B | ||
1319 | +1m5o_1_E | ||
1320 | +1m5v_1_B | ||
1247 | 6gx6_1_B | 1321 | 6gx6_1_B |
1248 | 4k4s_1_D | 1322 | 4k4s_1_D |
1249 | 4k4s_1_H | 1323 | 4k4s_1_H |
1250 | 4k4t_1_H | 1324 | 4k4t_1_H |
1251 | 4k4t_1_D | 1325 | 4k4t_1_D |
1252 | 1zn1_1_C | 1326 | 1zn1_1_C |
1253 | -1zn0_1_C | ||
1254 | 1xpu_1_G | 1327 | 1xpu_1_G |
1255 | 1xpu_1_L | 1328 | 1xpu_1_L |
1256 | 1xpr_1_L | 1329 | 1xpr_1_L |
... | @@ -1274,7 +1347,9 @@ | ... | @@ -1274,7 +1347,9 @@ |
1274 | 6gc5_1_F | 1347 | 6gc5_1_F |
1275 | 6gc5_1_H | 1348 | 6gc5_1_H |
1276 | 6gc5_1_G | 1349 | 6gc5_1_G |
1350 | +4rne_1_C | ||
1277 | 1n1h_1_B | 1351 | 1n1h_1_B |
1352 | +7n2v_1_PT | ||
1278 | 4ohz_1_B | 1353 | 4ohz_1_B |
1279 | 6t83_1_6B | 1354 | 6t83_1_6B |
1280 | 4gv6_1_C | 1355 | 4gv6_1_C |
... | @@ -1290,6 +1365,9 @@ | ... | @@ -1290,6 +1365,9 @@ |
1290 | 4v5z_1_BC | 1365 | 4v5z_1_BC |
1291 | 5y88_1_X | 1366 | 5y88_1_X |
1292 | 4v5z_1_BB | 1367 | 4v5z_1_BB |
1368 | +5y85_1_D | ||
1369 | +5y85_1_B | ||
1370 | +5y87_1_D | ||
1293 | 3j0o_1_H | 1371 | 3j0o_1_H |
1294 | 3j0l_1_H | 1372 | 3j0l_1_H |
1295 | 3j0p_1_H | 1373 | 3j0p_1_H |
... | @@ -1351,11 +1429,11 @@ | ... | @@ -1351,11 +1429,11 @@ |
1351 | 4e6b_1_A | 1429 | 4e6b_1_A |
1352 | 4e6b_1_B | 1430 | 4e6b_1_B |
1353 | 6a6l_1_D | 1431 | 6a6l_1_D |
1354 | -4v5z_1_BS | ||
1355 | 4v8t_1_1 | 1432 | 4v8t_1_1 |
1356 | 1uvi_1_D | 1433 | 1uvi_1_D |
1357 | 1uvi_1_F | 1434 | 1uvi_1_F |
1358 | 1uvi_1_E | 1435 | 1uvi_1_E |
1436 | +3gs5_1_A | ||
1359 | 4m7d_1_P | 1437 | 4m7d_1_P |
1360 | 4k4u_1_D | 1438 | 4k4u_1_D |
1361 | 4k4u_1_H | 1439 | 4k4u_1_H |
... | @@ -1376,8 +1454,8 @@ | ... | @@ -1376,8 +1454,8 @@ |
1376 | 6ip5_1_2M | 1454 | 6ip5_1_2M |
1377 | 6ip6_1_2M | 1455 | 6ip6_1_2M |
1378 | 6qcs_1_M | 1456 | 6qcs_1_M |
1457 | +7b5k_1_Z | ||
1379 | 486d_1_G | 1458 | 486d_1_G |
1380 | -2r1g_1_C | ||
1381 | 486d_1_F | 1459 | 486d_1_F |
1382 | 4v5z_1_B0 | 1460 | 4v5z_1_B0 |
1383 | 4nia_1_O | 1461 | 4nia_1_O |
... | @@ -1391,11 +1469,11 @@ | ... | @@ -1391,11 +1469,11 @@ |
1391 | 4oq9_1_F | 1469 | 4oq9_1_F |
1392 | 4oq9_1_L | 1470 | 4oq9_1_L |
1393 | 6r9q_1_B | 1471 | 6r9q_1_B |
1472 | +7m4u_1_A | ||
1394 | 6v3a_1_SN1 | 1473 | 6v3a_1_SN1 |
1395 | 6v3b_1_SN1 | 1474 | 6v3b_1_SN1 |
1396 | 6v39_1_SN1 | 1475 | 6v39_1_SN1 |
1397 | 6v3e_1_SN1 | 1476 | 6v3e_1_SN1 |
1398 | -1pn7_1_C | ||
1399 | 1mj1_1_Q | 1477 | 1mj1_1_Q |
1400 | 1mj1_1_R | 1478 | 1mj1_1_R |
1401 | 4dr6_1_V | 1479 | 4dr6_1_V |
... | @@ -1437,14 +1515,25 @@ | ... | @@ -1437,14 +1515,25 @@ |
1437 | 6ow3_1_I | 1515 | 6ow3_1_I |
1438 | 6ovy_1_I | 1516 | 6ovy_1_I |
1439 | 6oy6_1_I | 1517 | 6oy6_1_I |
1440 | -4bbl_1_Y | ||
1441 | -4bbl_1_Z | ||
1442 | 4qvd_1_H | 1518 | 4qvd_1_H |
1443 | 5gxi_1_B | 1519 | 5gxi_1_B |
1444 | 3iy8_1_A | 1520 | 3iy8_1_A |
1445 | -6tnu_1_M | 1521 | +7n06_1_G |
1446 | -5mc6_1_M | 1522 | +7n06_1_H |
1523 | +7n06_1_I | ||
1524 | +7n06_1_J | ||
1525 | +7n06_1_K | ||
1526 | +7n06_1_L | ||
1527 | +7n33_1_G | ||
1528 | +7n33_1_H | ||
1529 | +7n33_1_I | ||
1530 | +7n33_1_J | ||
1531 | +7n33_1_K | ||
1532 | +7n33_1_L | ||
1447 | 5mc6_1_N | 1533 | 5mc6_1_N |
1534 | +2qwy_1_C | ||
1535 | +2qwy_1_A | ||
1536 | +2qwy_1_B | ||
1448 | 4eya_1_O | 1537 | 4eya_1_O |
1449 | 4eya_1_P | 1538 | 4eya_1_P |
1450 | 4eya_1_C | 1539 | 4eya_1_C |
... | @@ -1453,8 +1542,6 @@ | ... | @@ -1453,8 +1542,6 @@ |
1453 | 6htq_1_W | 1542 | 6htq_1_W |
1454 | 6htq_1_U | 1543 | 6htq_1_U |
1455 | 6uu6_1_333 | 1544 | 6uu6_1_333 |
1456 | -6v3a_1_V | ||
1457 | -6v39_1_V | ||
1458 | 5a0v_1_F | 1545 | 5a0v_1_F |
1459 | 3avt_1_T | 1546 | 3avt_1_T |
1460 | 6d1v_1_C | 1547 | 6d1v_1_C |
... | @@ -1497,6 +1584,7 @@ | ... | @@ -1497,6 +1584,7 @@ |
1497 | 6o78_1_E | 1584 | 6o78_1_E |
1498 | 6xa1_1_BV | 1585 | 6xa1_1_BV |
1499 | 6ha8_1_X | 1586 | 6ha8_1_X |
1587 | +3bnp_1_B | ||
1500 | 1m8w_1_E | 1588 | 1m8w_1_E |
1501 | 1m8w_1_F | 1589 | 1m8w_1_F |
1502 | 5udi_1_B | 1590 | 5udi_1_B |
... | @@ -1520,16 +1608,29 @@ | ... | @@ -1520,16 +1608,29 @@ |
1520 | 6een_1_H | 1608 | 6een_1_H |
1521 | 4wti_1_T | 1609 | 4wti_1_T |
1522 | 4wti_1_P | 1610 | 4wti_1_P |
1611 | +6dlr_1_A | ||
1612 | +6dlt_1_A | ||
1613 | +6dls_1_A | ||
1614 | +6dlq_1_A | ||
1615 | +6dnr_1_A | ||
1523 | 5l3p_1_Y | 1616 | 5l3p_1_Y |
1524 | 4hor_1_X | 1617 | 4hor_1_X |
1525 | 3rzo_1_R | 1618 | 3rzo_1_R |
1619 | +5wlh_1_B | ||
1526 | 2f4v_1_Z | 1620 | 2f4v_1_Z |
1621 | +5ml7_1_B | ||
1527 | 1qln_1_R | 1622 | 1qln_1_R |
1623 | +3pgw_1_R | ||
1624 | +3pgw_1_N | ||
1625 | +3cw1_1_X | ||
1626 | +3cw1_1_W | ||
1627 | +3cw1_1_V | ||
1628 | +7b0y_1_A | ||
1528 | 6ogy_1_M | 1629 | 6ogy_1_M |
1529 | 6ogy_1_N | 1630 | 6ogy_1_N |
1530 | 6uej_1_B | 1631 | 6uej_1_B |
1632 | +7kga_1_A | ||
1531 | 6ywy_1_BB | 1633 | 6ywy_1_BB |
1532 | -1x18_1_A | ||
1533 | 5ytx_1_B | 1634 | 5ytx_1_B |
1534 | 4g0a_1_H | 1635 | 4g0a_1_H |
1535 | 6r9p_1_B | 1636 | 6r9p_1_B |
... | @@ -1572,12 +1673,8 @@ | ... | @@ -1572,12 +1673,8 @@ |
1572 | 5mre_1_AA | 1673 | 5mre_1_AA |
1573 | 5mrf_1_AA | 1674 | 5mrf_1_AA |
1574 | 7jhy_1_Z | 1675 | 7jhy_1_Z |
1575 | -2r1g_1_A | ||
1576 | -2r1g_1_D | ||
1577 | -2r1g_1_F | ||
1578 | 3eq4_1_Y | 1676 | 3eq4_1_Y |
1579 | 4wkr_1_C | 1677 | 4wkr_1_C |
1580 | -2r1g_1_X | ||
1581 | 4v99_1_EC | 1678 | 4v99_1_EC |
1582 | 4v99_1_AC | 1679 | 4v99_1_AC |
1583 | 4v99_1_BH | 1680 | 4v99_1_BH |
... | @@ -1641,44 +1738,21 @@ | ... | @@ -1641,44 +1738,21 @@ |
1641 | 6rcl_1_C | 1738 | 6rcl_1_C |
1642 | 5jju_1_C | 1739 | 5jju_1_C |
1643 | 4ejt_1_G | 1740 | 4ejt_1_G |
1741 | +1et4_1_A | ||
1742 | +1et4_1_C | ||
1743 | +1et4_1_B | ||
1744 | +1et4_1_D | ||
1745 | +1et4_1_E | ||
1746 | +1ddy_1_C | ||
1747 | +1ddy_1_A | ||
1748 | +1ddy_1_E | ||
1644 | 6lkq_1_W | 1749 | 6lkq_1_W |
1750 | +6r47_1_A | ||
1645 | 3qsu_1_P | 1751 | 3qsu_1_P |
1646 | 3qsu_1_R | 1752 | 3qsu_1_R |
1647 | 2xs7_1_B | 1753 | 2xs7_1_B |
1648 | 1n38_1_B | 1754 | 1n38_1_B |
1649 | 4qvc_1_G | 1755 | 4qvc_1_G |
1650 | -6q1h_1_D | ||
1651 | -6q1h_1_H | ||
1652 | -6p7p_1_F | ||
1653 | -6p7p_1_E | ||
1654 | -6p7p_1_D | ||
1655 | -6vm6_1_J | ||
1656 | -6vm6_1_G | ||
1657 | -6wan_1_K | ||
1658 | -6wan_1_H | ||
1659 | -6wan_1_G | ||
1660 | -6wan_1_L | ||
1661 | -6wan_1_I | ||
1662 | -6ywo_1_F | ||
1663 | -6wan_1_J | ||
1664 | -4oau_1_A | ||
1665 | -6ywo_1_E | ||
1666 | -6ywo_1_K | ||
1667 | -6vm6_1_I | ||
1668 | -6vm6_1_H | ||
1669 | -6ywo_1_I | ||
1670 | -2a1r_1_C | ||
1671 | -6m6v_1_F | ||
1672 | -6m6v_1_E | ||
1673 | -2a1r_1_D | ||
1674 | -3gpq_1_E | ||
1675 | -3gpq_1_F | ||
1676 | -6o79_1_C | ||
1677 | -6vm6_1_K | ||
1678 | -6m6v_1_G | ||
1679 | -6hyu_1_D | ||
1680 | -1laj_1_R | ||
1681 | -6ybv_1_K | ||
1682 | 6mpf_1_W | 1756 | 6mpf_1_W |
1683 | 6spc_1_A | 1757 | 6spc_1_A |
1684 | 6spe_1_A | 1758 | 6spe_1_A |
... | @@ -1687,14 +1761,12 @@ | ... | @@ -1687,14 +1761,12 @@ |
1687 | 6fti_1_V | 1761 | 6fti_1_V |
1688 | 6ftj_1_V | 1762 | 6ftj_1_V |
1689 | 6ftg_1_V | 1763 | 6ftg_1_V |
1764 | +3npn_1_A | ||
1690 | 4g0a_1_G | 1765 | 4g0a_1_G |
1691 | 4g0a_1_F | 1766 | 4g0a_1_F |
1692 | 4g0a_1_E | 1767 | 4g0a_1_E |
1693 | 2b2d_1_S | 1768 | 2b2d_1_S |
1694 | 5hkc_1_C | 1769 | 5hkc_1_C |
1695 | -4kzy_1_I | ||
1696 | -4kzz_1_I | ||
1697 | -4kzx_1_I | ||
1698 | 1rmv_1_B | 1770 | 1rmv_1_B |
1699 | 4qu7_1_X | 1771 | 4qu7_1_X |
1700 | 4qu7_1_V | 1772 | 4qu7_1_V |
... | @@ -1710,25 +1782,3 @@ | ... | @@ -1710,25 +1782,3 @@ |
1710 | 6pmi_1_3 | 1782 | 6pmi_1_3 |
1711 | 6pmj_1_3 | 1783 | 6pmj_1_3 |
1712 | 5hjz_1_C | 1784 | 5hjz_1_C |
1713 | -7nrc_1_SM | ||
1714 | -7nrc_1_SN | ||
1715 | -7am2_1_R1 | ||
1716 | -7k5l_1_R | ||
1717 | -7b5k_1_X | ||
1718 | -7d8c_1_C | ||
1719 | -7m4y_1_V | ||
1720 | -7m4x_1_V | ||
1721 | -7b5k_1_Z | ||
1722 | -7m4u_1_A | ||
1723 | -7n06_1_G | ||
1724 | -7n06_1_H | ||
1725 | -7n06_1_I | ||
1726 | -7n06_1_J | ||
1727 | -7n06_1_K | ||
1728 | -7n06_1_L | ||
1729 | -7n33_1_G | ||
1730 | -7n33_1_H | ||
1731 | -7n33_1_I | ||
1732 | -7n33_1_J | ||
1733 | -7n33_1_K | ||
1734 | -7n33_1_L | ... | ... |
... | @@ -7,12 +7,6 @@ Could not find nucleotides of chain AA in annotation 6ydw.json. Either there is | ... | @@ -7,12 +7,6 @@ Could not find nucleotides of chain AA in annotation 6ydw.json. Either there is |
7 | 2z9q_1_A_1-72 | 7 | 2z9q_1_A_1-72 |
8 | DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A_1-72. | 8 | DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A_1-72. |
9 | 9 | ||
10 | -1ml5_1_b_5-121 | ||
11 | -Could not find nucleotides of chain b in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
12 | - | ||
13 | -1ml5_1_a_1-2914 | ||
14 | -Could not find nucleotides of chain a in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
15 | - | ||
16 | 3ep2_1_Y_1-72 | 10 | 3ep2_1_Y_1-72 |
17 | DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_Y_1-72. | 11 | DSSR warning 3ep2.json: no nucleotides found. Ignoring 3ep2_1_Y_1-72. |
18 | 12 | ||
... | @@ -22,8 +16,11 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_Y_1-72. | ... | @@ -22,8 +16,11 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_Y_1-72. |
22 | 4v48_1_A6_1-73 | 16 | 4v48_1_A6_1-73 |
23 | DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A6_1-73. | 17 | DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A6_1-73. |
24 | 18 | ||
25 | -1ml5_1_A_2-1520 | 19 | +1ml5_1_b_5-121 |
26 | -Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 20 | +Could not find nucleotides of chain b in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
21 | + | ||
22 | +1ml5_1_a_1-2914 | ||
23 | +Could not find nucleotides of chain a in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
27 | 24 | ||
28 | 1qzb_1_B_1-73 | 25 | 1qzb_1_B_1-73 |
29 | DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B_1-73. | 26 | DSSR warning 1qzb.json: no nucleotides found. Ignoring 1qzb_1_B_1-73. |
... | @@ -34,6 +31,9 @@ DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B_1-73. | ... | @@ -34,6 +31,9 @@ DSSR warning 1qza.json: no nucleotides found. Ignoring 1qza_1_B_1-73. |
34 | 1ls2_1_B_1-73 | 31 | 1ls2_1_B_1-73 |
35 | DSSR warning 1ls2.json: no nucleotides found. Ignoring 1ls2_1_B_1-73. | 32 | DSSR warning 1ls2.json: no nucleotides found. Ignoring 1ls2_1_B_1-73. |
36 | 33 | ||
34 | +1ml5_1_A_2-1520 | ||
35 | +Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
36 | + | ||
37 | 1gsg_1_T_1-72 | 37 | 1gsg_1_T_1-72 |
38 | DSSR warning 1gsg.json: no nucleotides found. Ignoring 1gsg_1_T_1-72. | 38 | DSSR warning 1gsg.json: no nucleotides found. Ignoring 1gsg_1_T_1-72. |
39 | 39 | ||
... | @@ -70,6 +70,12 @@ DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A9_3-118. | ... | @@ -70,6 +70,12 @@ DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_A9_3-118. |
70 | 4v47_1_A9_3-118 | 70 | 4v47_1_A9_3-118 |
71 | DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A9_3-118. | 71 | DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_A9_3-118. |
72 | 72 | ||
73 | +4v42_1_BA_1-2914 | ||
74 | +Could not find nucleotides of chain BA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
75 | + | ||
76 | +4v42_1_BB_5-121 | ||
77 | +Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
78 | + | ||
73 | 2ob7_1_A_10-319 | 79 | 2ob7_1_A_10-319 |
74 | DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_A_10-319. | 80 | DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_A_10-319. |
75 | 81 | ||
... | @@ -82,21 +88,6 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_Z_1-91. | ... | @@ -82,21 +88,6 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_Z_1-91. |
82 | 2ob7_1_D_1-130 | 88 | 2ob7_1_D_1-130 |
83 | DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_D_1-130. | 89 | DSSR warning 2ob7.json: no nucleotides found. Ignoring 2ob7_1_D_1-130. |
84 | 90 | ||
85 | -4v42_1_BA_1-2914 | ||
86 | -Could not find nucleotides of chain BA in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
87 | - | ||
88 | -4v42_1_BB_5-121 | ||
89 | -Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
90 | - | ||
91 | -1r2x_1_C_1-58 | ||
92 | -DSSR warning 1r2x.json: no nucleotides found. Ignoring 1r2x_1_C_1-58. | ||
93 | - | ||
94 | -1r2w_1_C_1-58 | ||
95 | -DSSR warning 1r2w.json: no nucleotides found. Ignoring 1r2w_1_C_1-58. | ||
96 | - | ||
97 | -1eg0_1_L_1-56 | ||
98 | -DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_L_1-56. | ||
99 | - | ||
100 | 3dg2_1_A_1-1542 | 91 | 3dg2_1_A_1-1542 |
101 | DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A_1-1542. | 92 | DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A_1-1542. |
102 | 93 | ||
... | @@ -142,8 +133,14 @@ DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B_1-2904. | ... | @@ -142,8 +133,14 @@ DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_B_1-2904. |
142 | 1eg0_1_O_1-73 | 133 | 1eg0_1_O_1-73 |
143 | DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_O_1-73. | 134 | DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_O_1-73. |
144 | 135 | ||
145 | -1zc8_1_A_1-59 | 136 | +1r2x_1_C_1-58 |
146 | -DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_A_1-59. | 137 | +DSSR warning 1r2x.json: no nucleotides found. Ignoring 1r2x_1_C_1-58. |
138 | + | ||
139 | +1r2w_1_C_1-58 | ||
140 | +DSSR warning 1r2w.json: no nucleotides found. Ignoring 1r2w_1_C_1-58. | ||
141 | + | ||
142 | +1eg0_1_L_1-56 | ||
143 | +DSSR warning 1eg0.json: no nucleotides found. Ignoring 1eg0_1_L_1-56. | ||
147 | 144 | ||
148 | 1jgq_1_A_2-1520 | 145 | 1jgq_1_A_2-1520 |
149 | Could not find nucleotides of chain A in annotation 1jgq.json. Either there is a problem with 1jgq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 146 | Could not find nucleotides of chain A in annotation 1jgq.json. Either there is a problem with 1jgq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
... | @@ -157,6 +154,9 @@ Could not find nucleotides of chain A in annotation 1jgo.json. Either there is a | ... | @@ -157,6 +154,9 @@ Could not find nucleotides of chain A in annotation 1jgo.json. Either there is a |
157 | 1jgp_1_A_2-1520 | 154 | 1jgp_1_A_2-1520 |
158 | Could not find nucleotides of chain A in annotation 1jgp.json. Either there is a problem with 1jgp mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 155 | Could not find nucleotides of chain A in annotation 1jgp.json. Either there is a problem with 1jgp mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
159 | 156 | ||
157 | +1zc8_1_A_1-59 | ||
158 | +DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_A_1-59. | ||
159 | + | ||
160 | 1mvr_1_D_1-59 | 160 | 1mvr_1_D_1-59 |
161 | DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_D_1-59. | 161 | DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_D_1-59. |
162 | 162 | ||
... | @@ -187,24 +187,6 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_B_1-50. | ... | @@ -187,24 +187,6 @@ DSSR warning 3eq3.json: no nucleotides found. Ignoring 3eq3_1_B_1-50. |
187 | 3eq4_1_B_1-50 | 187 | 3eq4_1_B_1-50 |
188 | DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B_1-50. | 188 | DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B_1-50. |
189 | 189 | ||
190 | -3pgw_1_R_1-164 | ||
191 | -DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R_1-164. | ||
192 | - | ||
193 | -3pgw_1_N_1-164 | ||
194 | -DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N_1-164. | ||
195 | - | ||
196 | -3cw1_1_x_1-138 | ||
197 | -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_x_1-138. | ||
198 | - | ||
199 | -3cw1_1_w_1-138 | ||
200 | -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_w_1-138. | ||
201 | - | ||
202 | -3cw1_1_V_1-138 | ||
203 | -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V_1-138. | ||
204 | - | ||
205 | -3cw1_1_v_1-138 | ||
206 | -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_v_1-138. | ||
207 | - | ||
208 | 2iy3_1_B_9-105 | 190 | 2iy3_1_B_9-105 |
209 | DSSR warning 2iy3.json: no nucleotides found. Ignoring 2iy3_1_B_9-105. | 191 | DSSR warning 2iy3.json: no nucleotides found. Ignoring 2iy3_1_B_9-105. |
210 | 192 | ||
... | @@ -238,6 +220,24 @@ DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BZ_1-70. | ... | @@ -238,6 +220,24 @@ DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BZ_1-70. |
238 | 4v5z_1_B1_2-123 | 220 | 4v5z_1_B1_2-123 |
239 | DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B1_2-123. | 221 | DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_B1_2-123. |
240 | 222 | ||
223 | +3pgw_1_R_1-164 | ||
224 | +DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R_1-164. | ||
225 | + | ||
226 | +3pgw_1_N_1-164 | ||
227 | +DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N_1-164. | ||
228 | + | ||
229 | +3cw1_1_x_1-138 | ||
230 | +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_x_1-138. | ||
231 | + | ||
232 | +3cw1_1_w_1-138 | ||
233 | +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_w_1-138. | ||
234 | + | ||
235 | +3cw1_1_V_1-138 | ||
236 | +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V_1-138. | ||
237 | + | ||
238 | +3cw1_1_v_1-138 | ||
239 | +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_v_1-138. | ||
240 | + | ||
241 | 1mvr_1_B_1-96 | 241 | 1mvr_1_B_1-96 |
242 | DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_B_1-96. | 242 | DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_B_1-96. |
243 | 243 | ||
... | @@ -889,9 +889,24 @@ Could not find nucleotides of chain X in annotation 4w2e.json. Either there is a | ... | @@ -889,9 +889,24 @@ Could not find nucleotides of chain X in annotation 4w2e.json. Either there is a |
889 | 6ucq_1_2X | 889 | 6ucq_1_2X |
890 | Could not find nucleotides of chain 2X in annotation 6ucq.json. Either there is a problem with 6ucq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 890 | Could not find nucleotides of chain 2X in annotation 6ucq.json. Either there is a problem with 6ucq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
891 | 891 | ||
892 | +7n1p_1_DT | ||
893 | +Could not find nucleotides of chain DT in annotation 7n1p.json. Either there is a problem with 7n1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
894 | + | ||
895 | +7n2u_1_DT | ||
896 | +Could not find nucleotides of chain DT in annotation 7n2u.json. Either there is a problem with 7n2u mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
897 | + | ||
892 | 6yss_1_W | 898 | 6yss_1_W |
893 | Could not find nucleotides of chain W in annotation 6yss.json. Either there is a problem with 6yss mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 899 | Could not find nucleotides of chain W in annotation 6yss.json. Either there is a problem with 6yss mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
894 | 900 | ||
901 | +7n30_1_DT | ||
902 | +Could not find nucleotides of chain DT in annotation 7n30.json. Either there is a problem with 7n30 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
903 | + | ||
904 | +7n31_1_DT | ||
905 | +Could not find nucleotides of chain DT in annotation 7n31.json. Either there is a problem with 7n31 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
906 | + | ||
907 | +7n2c_1_DT | ||
908 | +Could not find nucleotides of chain DT in annotation 7n2c.json. Either there is a problem with 7n2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
909 | + | ||
895 | 5afi_1_Y | 910 | 5afi_1_Y |
896 | Could not find nucleotides of chain Y in annotation 5afi.json. Either there is a problem with 5afi mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 911 | Could not find nucleotides of chain Y in annotation 5afi.json. Either there is a problem with 5afi mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
897 | 912 | ||
... | @@ -1003,6 +1018,54 @@ Could not find nucleotides of chain X in annotation 4v4i.json. Either there is a | ... | @@ -1003,6 +1018,54 @@ Could not find nucleotides of chain X in annotation 4v4i.json. Either there is a |
1003 | 4v42_1_BB | 1018 | 4v42_1_BB |
1004 | Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1019 | Could not find nucleotides of chain BB in annotation 4v42.json. Either there is a problem with 4v42 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1005 | 1020 | ||
1021 | +4jrc_1_B | ||
1022 | +Nucleotides not inserted ! | ||
1023 | + | ||
1024 | +4jrc_1_A | ||
1025 | +Nucleotides not inserted ! | ||
1026 | + | ||
1027 | +6lkq_1_S | ||
1028 | +Could not find nucleotides of chain S in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1029 | + | ||
1030 | +5h5u_1_H | ||
1031 | +Could not find nucleotides of chain H in annotation 5h5u.json. Either there is a problem with 5h5u mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1032 | + | ||
1033 | +7d6z_1_F | ||
1034 | +Could not find nucleotides of chain F in annotation 7d6z.json. Either there is a problem with 7d6z mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1035 | + | ||
1036 | +5lze_1_Y | ||
1037 | +Could not find nucleotides of chain Y in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1038 | + | ||
1039 | +5lze_1_V | ||
1040 | +Could not find nucleotides of chain V in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1041 | + | ||
1042 | +5lze_1_X | ||
1043 | +Could not find nucleotides of chain X in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1044 | + | ||
1045 | +3jcj_1_G | ||
1046 | +Could not find nucleotides of chain G in annotation 3jcj.json. Either there is a problem with 3jcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1047 | + | ||
1048 | +6o7k_1_G | ||
1049 | +Could not find nucleotides of chain G in annotation 6o7k.json. Either there is a problem with 6o7k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1050 | + | ||
1051 | +3dg2_1_A | ||
1052 | +DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A. | ||
1053 | + | ||
1054 | +3dg0_1_A | ||
1055 | +DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A. | ||
1056 | + | ||
1057 | +4v48_1_BA | ||
1058 | +DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_BA. | ||
1059 | + | ||
1060 | +4v47_1_BA | ||
1061 | +DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_BA. | ||
1062 | + | ||
1063 | +3dg4_1_A | ||
1064 | +DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A. | ||
1065 | + | ||
1066 | +3dg5_1_A | ||
1067 | +DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A. | ||
1068 | + | ||
1006 | 6d30_1_C | 1069 | 6d30_1_C |
1007 | Sequence is too short. (< 5 resolved nts) | 1070 | Sequence is too short. (< 5 resolved nts) |
1008 | 1071 | ||
... | @@ -1315,62 +1378,53 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -1315,62 +1378,53 @@ Sequence is too short. (< 5 resolved nts) |
1315 | 6n6g_1_D | 1378 | 6n6g_1_D |
1316 | Sequence is too short. (< 5 resolved nts) | 1379 | Sequence is too short. (< 5 resolved nts) |
1317 | 1380 | ||
1318 | -6lkq_1_S | 1381 | +4b3r_1_W |
1319 | -Could not find nucleotides of chain S in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1382 | +Sequence is too short. (< 5 resolved nts) |
1320 | - | ||
1321 | -5h5u_1_H | ||
1322 | -Could not find nucleotides of chain H in annotation 5h5u.json. Either there is a problem with 5h5u mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1323 | - | ||
1324 | -7d6z_1_F | ||
1325 | -Could not find nucleotides of chain F in annotation 7d6z.json. Either there is a problem with 7d6z mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1326 | - | ||
1327 | -5lze_1_Y | ||
1328 | -Could not find nucleotides of chain Y in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1329 | 1383 | ||
1330 | -5lze_1_V | 1384 | +4b3t_1_W |
1331 | -Could not find nucleotides of chain V in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1385 | +Sequence is too short. (< 5 resolved nts) |
1332 | 1386 | ||
1333 | -5lze_1_X | 1387 | +4b3s_1_W |
1334 | -Could not find nucleotides of chain X in annotation 5lze.json. Either there is a problem with 5lze mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1388 | +Sequence is too short. (< 5 resolved nts) |
1335 | 1389 | ||
1336 | -3jcj_1_G | 1390 | +7b5k_1_X |
1337 | -Could not find nucleotides of chain G in annotation 3jcj.json. Either there is a problem with 3jcj mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1391 | +Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1338 | 1392 | ||
1339 | -6o7k_1_G | 1393 | +5o2r_1_X |
1340 | -Could not find nucleotides of chain G in annotation 6o7k.json. Either there is a problem with 6o7k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1394 | +Could not find nucleotides of chain X in annotation 5o2r.json. Either there is a problem with 5o2r mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1341 | 1395 | ||
1342 | -3dg2_1_A | 1396 | +5kcs_1_1X |
1343 | -DSSR warning 3dg2.json: no nucleotides found. Ignoring 3dg2_1_A. | 1397 | +Could not find nucleotides of chain 1X in annotation 5kcs.json. Either there is a problem with 5kcs mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1344 | 1398 | ||
1345 | -3dg0_1_A | 1399 | +7n1p_1_PT |
1346 | -DSSR warning 3dg0.json: no nucleotides found. Ignoring 3dg0_1_A. | 1400 | +Could not find nucleotides of chain PT in annotation 7n1p.json. Either there is a problem with 7n1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1347 | 1401 | ||
1348 | -4v48_1_BA | 1402 | +7n2u_1_PT |
1349 | -DSSR warning 4v48.json: no nucleotides found. Ignoring 4v48_1_BA. | 1403 | +Could not find nucleotides of chain PT in annotation 7n2u.json. Either there is a problem with 7n2u mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1350 | 1404 | ||
1351 | -4v47_1_BA | 1405 | +7n30_1_PT |
1352 | -DSSR warning 4v47.json: no nucleotides found. Ignoring 4v47_1_BA. | 1406 | +Could not find nucleotides of chain PT in annotation 7n30.json. Either there is a problem with 7n30 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1353 | 1407 | ||
1354 | -3dg4_1_A | 1408 | +7n31_1_PT |
1355 | -DSSR warning 3dg4.json: no nucleotides found. Ignoring 3dg4_1_A. | 1409 | +Could not find nucleotides of chain PT in annotation 7n31.json. Either there is a problem with 7n31 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1356 | 1410 | ||
1357 | -3dg5_1_A | 1411 | +7n2c_1_PT |
1358 | -DSSR warning 3dg5.json: no nucleotides found. Ignoring 3dg5_1_A. | 1412 | +Could not find nucleotides of chain PT in annotation 7n2c.json. Either there is a problem with 7n2c mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1359 | 1413 | ||
1360 | -4b3r_1_W | 1414 | +6yl5_1_I |
1361 | -Sequence is too short. (< 5 resolved nts) | 1415 | +Nucleotides not inserted ! |
1362 | 1416 | ||
1363 | -4b3t_1_W | 1417 | +6yl5_1_E |
1364 | -Sequence is too short. (< 5 resolved nts) | 1418 | +Nucleotides not inserted ! |
1365 | 1419 | ||
1366 | -4b3s_1_W | 1420 | +6yl5_1_A |
1367 | -Sequence is too short. (< 5 resolved nts) | 1421 | +Nucleotides not inserted ! |
1368 | 1422 | ||
1369 | -5o2r_1_X | 1423 | +6yl5_1_K |
1370 | -Could not find nucleotides of chain X in annotation 5o2r.json. Either there is a problem with 5o2r mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1424 | +Nucleotides not inserted ! |
1371 | 1425 | ||
1372 | -5kcs_1_1X | 1426 | +6yl5_1_G |
1373 | -Could not find nucleotides of chain 1X in annotation 5kcs.json. Either there is a problem with 5kcs mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1427 | +Nucleotides not inserted ! |
1374 | 1428 | ||
1375 | 6zvk_1_E2 | 1429 | 6zvk_1_E2 |
1376 | Could not find nucleotides of chain E2 in annotation 6zvk.json. Either there is a problem with 6zvk mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1430 | Could not find nucleotides of chain E2 in annotation 6zvk.json. Either there is a problem with 6zvk mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
... | @@ -1582,6 +1636,9 @@ Could not find nucleotides of chain V in annotation 6olf.json. Either there is a | ... | @@ -1582,6 +1636,9 @@ Could not find nucleotides of chain V in annotation 6olf.json. Either there is a |
1582 | 3erc_1_G | 1636 | 3erc_1_G |
1583 | Sequence is too short. (< 5 resolved nts) | 1637 | Sequence is too short. (< 5 resolved nts) |
1584 | 1638 | ||
1639 | +4qjd_1_D | ||
1640 | +Nucleotides not inserted ! | ||
1641 | + | ||
1585 | 6of1_1_1W | 1642 | 6of1_1_1W |
1586 | Could not find nucleotides of chain 1W in annotation 6of1.json. Either there is a problem with 6of1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1643 | Could not find nucleotides of chain 1W in annotation 6of1.json. Either there is a problem with 6of1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1587 | 1644 | ||
... | @@ -1675,9 +1732,15 @@ DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A. | ... | @@ -1675,9 +1732,15 @@ DSSR warning 2z9q.json: no nucleotides found. Ignoring 2z9q_1_A. |
1675 | 4hot_1_X | 1732 | 4hot_1_X |
1676 | Sequence is too short. (< 5 resolved nts) | 1733 | Sequence is too short. (< 5 resolved nts) |
1677 | 1734 | ||
1735 | +5ns4_1_C | ||
1736 | +Nucleotides not inserted ! | ||
1737 | + | ||
1678 | 6d2z_1_C | 1738 | 6d2z_1_C |
1679 | Sequence is too short. (< 5 resolved nts) | 1739 | Sequence is too short. (< 5 resolved nts) |
1680 | 1740 | ||
1741 | +7eh0_1_I | ||
1742 | +Sequence is too short. (< 5 resolved nts) | ||
1743 | + | ||
1681 | 4tu0_1_F | 1744 | 4tu0_1_F |
1682 | Sequence is too short. (< 5 resolved nts) | 1745 | Sequence is too short. (< 5 resolved nts) |
1683 | 1746 | ||
... | @@ -1738,18 +1801,15 @@ Could not find nucleotides of chain NB in annotation 6i7o.json. Either there is | ... | @@ -1738,18 +1801,15 @@ Could not find nucleotides of chain NB in annotation 6i7o.json. Either there is |
1738 | 1ml5_1_A | 1801 | 1ml5_1_A |
1739 | Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1802 | Could not find nucleotides of chain A in annotation 1ml5.json. Either there is a problem with 1ml5 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1740 | 1803 | ||
1804 | +7nsq_1_V | ||
1805 | +Could not find nucleotides of chain V in annotation 7nsq.json. Either there is a problem with 7nsq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1806 | + | ||
1741 | 6swa_1_Q | 1807 | 6swa_1_Q |
1742 | Could not find nucleotides of chain Q in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1808 | Could not find nucleotides of chain Q in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1743 | 1809 | ||
1744 | 6swa_1_R | 1810 | 6swa_1_R |
1745 | Could not find nucleotides of chain R in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1811 | Could not find nucleotides of chain R in annotation 6swa.json. Either there is a problem with 6swa mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1746 | 1812 | ||
1747 | -3j6x_1_IR | ||
1748 | -Could not find nucleotides of chain IR in annotation 3j6x.json. Either there is a problem with 3j6x mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1749 | - | ||
1750 | -3j6y_1_IR | ||
1751 | -Could not find nucleotides of chain IR in annotation 3j6y.json. Either there is a problem with 3j6y mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1752 | - | ||
1753 | 6ole_1_T | 1813 | 6ole_1_T |
1754 | Could not find nucleotides of chain T in annotation 6ole.json. Either there is a problem with 6ole mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1814 | Could not find nucleotides of chain T in annotation 6ole.json. Either there is a problem with 6ole mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1755 | 1815 | ||
... | @@ -1768,6 +1828,15 @@ Could not find nucleotides of chain T in annotation 6olf.json. Either there is a | ... | @@ -1768,6 +1828,15 @@ Could not find nucleotides of chain T in annotation 6olf.json. Either there is a |
1768 | 6w6l_1_T | 1828 | 6w6l_1_T |
1769 | Could not find nucleotides of chain T in annotation 6w6l.json. Either there is a problem with 6w6l mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1829 | Could not find nucleotides of chain T in annotation 6w6l.json. Either there is a problem with 6w6l mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1770 | 1830 | ||
1831 | +6tnu_1_M | ||
1832 | +Could not find nucleotides of chain M in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1833 | + | ||
1834 | +5mc6_1_M | ||
1835 | +Could not find nucleotides of chain M in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1836 | + | ||
1837 | +7nrc_1_SM | ||
1838 | +Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1839 | + | ||
1771 | 6tb3_1_N | 1840 | 6tb3_1_N |
1772 | Could not find nucleotides of chain N in annotation 6tb3.json. Either there is a problem with 6tb3 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1841 | Could not find nucleotides of chain N in annotation 6tb3.json. Either there is a problem with 6tb3 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1773 | 1842 | ||
... | @@ -1780,6 +1849,9 @@ Could not find nucleotides of chain SN in annotation 7b7d.json. Either there is | ... | @@ -1780,6 +1849,9 @@ Could not find nucleotides of chain SN in annotation 7b7d.json. Either there is |
1780 | 6tnu_1_N | 1849 | 6tnu_1_N |
1781 | Could not find nucleotides of chain N in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1850 | Could not find nucleotides of chain N in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1782 | 1851 | ||
1852 | +7nrc_1_SN | ||
1853 | +Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1854 | + | ||
1783 | 7nrd_1_SN | 1855 | 7nrd_1_SN |
1784 | Could not find nucleotides of chain SN in annotation 7nrd.json. Either there is a problem with 7nrd mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 1856 | Could not find nucleotides of chain SN in annotation 7nrd.json. Either there is a problem with 7nrd mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
1785 | 1857 | ||
... | @@ -1810,6 +1882,15 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_D. | ... | @@ -1810,6 +1882,15 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_D. |
1810 | 5o1y_1_B | 1882 | 5o1y_1_B |
1811 | Sequence is too short. (< 5 resolved nts) | 1883 | Sequence is too short. (< 5 resolved nts) |
1812 | 1884 | ||
1885 | +4kzy_1_I | ||
1886 | +Could not find nucleotides of chain I in annotation 4kzy.json. Either there is a problem with 4kzy mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1887 | + | ||
1888 | +4kzz_1_I | ||
1889 | +Could not find nucleotides of chain I in annotation 4kzz.json. Either there is a problem with 4kzz mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1890 | + | ||
1891 | +4kzx_1_I | ||
1892 | +Could not find nucleotides of chain I in annotation 4kzx.json. Either there is a problem with 4kzx mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
1893 | + | ||
1813 | 3jcr_1_H | 1894 | 3jcr_1_H |
1814 | DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_H. | 1895 | DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_H. |
1815 | 1896 | ||
... | @@ -2119,9 +2200,6 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2119,9 +2200,6 @@ Sequence is too short. (< 5 resolved nts) |
2119 | 6uu1_1_333 | 2200 | 6uu1_1_333 |
2120 | Sequence is too short. (< 5 resolved nts) | 2201 | Sequence is too short. (< 5 resolved nts) |
2121 | 2202 | ||
2122 | -1pn8_1_D | ||
2123 | -DSSR warning 1pn8.json: no nucleotides found. Ignoring 1pn8_1_D. | ||
2124 | - | ||
2125 | 3er8_1_H | 2203 | 3er8_1_H |
2126 | Sequence is too short. (< 5 resolved nts) | 2204 | Sequence is too short. (< 5 resolved nts) |
2127 | 2205 | ||
... | @@ -2236,14 +2314,11 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2236,14 +2314,11 @@ Sequence is too short. (< 5 resolved nts) |
2236 | 1xnq_1_W | 2314 | 1xnq_1_W |
2237 | Sequence is too short. (< 5 resolved nts) | 2315 | Sequence is too short. (< 5 resolved nts) |
2238 | 2316 | ||
2239 | -1x18_1_C | 2317 | +7n2v_1_DT |
2240 | -DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_C. | 2318 | +Could not find nucleotides of chain DT in annotation 7n2v.json. Either there is a problem with 7n2v mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
2241 | - | ||
2242 | -1x18_1_B | ||
2243 | -DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_B. | ||
2244 | 2319 | ||
2245 | -1x18_1_D | 2320 | +4peh_1_Z |
2246 | -DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_D. | 2321 | +Sequence is too short. (< 5 resolved nts) |
2247 | 2322 | ||
2248 | 1vq6_1_4 | 2323 | 1vq6_1_4 |
2249 | Sequence is too short. (< 5 resolved nts) | 2324 | Sequence is too short. (< 5 resolved nts) |
... | @@ -2278,6 +2353,9 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2278,6 +2353,9 @@ Sequence is too short. (< 5 resolved nts) |
2278 | 4xbf_1_D | 2353 | 4xbf_1_D |
2279 | Sequence is too short. (< 5 resolved nts) | 2354 | Sequence is too short. (< 5 resolved nts) |
2280 | 2355 | ||
2356 | +5w1h_1_B | ||
2357 | +Nucleotides not inserted ! | ||
2358 | + | ||
2281 | 6n6d_1_D | 2359 | 6n6d_1_D |
2282 | Sequence is too short. (< 5 resolved nts) | 2360 | Sequence is too short. (< 5 resolved nts) |
2283 | 2361 | ||
... | @@ -2296,52 +2374,148 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2296,52 +2374,148 @@ Sequence is too short. (< 5 resolved nts) |
2296 | 6tz1_1_N | 2374 | 6tz1_1_N |
2297 | Sequence is too short. (< 5 resolved nts) | 2375 | Sequence is too short. (< 5 resolved nts) |
2298 | 2376 | ||
2299 | -6sce_1_B | 2377 | +6q1h_1_D |
2300 | Sequence is too short. (< 5 resolved nts) | 2378 | Sequence is too short. (< 5 resolved nts) |
2301 | 2379 | ||
2302 | -6xl1_1_C | 2380 | +6q1h_1_H |
2303 | Sequence is too short. (< 5 resolved nts) | 2381 | Sequence is too short. (< 5 resolved nts) |
2304 | 2382 | ||
2305 | -6scf_1_I | 2383 | +6p7p_1_F |
2306 | Sequence is too short. (< 5 resolved nts) | 2384 | Sequence is too short. (< 5 resolved nts) |
2307 | 2385 | ||
2308 | -6scf_1_K | 2386 | +6p7p_1_E |
2309 | Sequence is too short. (< 5 resolved nts) | 2387 | Sequence is too short. (< 5 resolved nts) |
2310 | 2388 | ||
2311 | -6yud_1_K | 2389 | +6p7p_1_D |
2312 | Sequence is too short. (< 5 resolved nts) | 2390 | Sequence is too short. (< 5 resolved nts) |
2313 | 2391 | ||
2314 | -6yud_1_O | 2392 | +6vm6_1_J |
2315 | Sequence is too short. (< 5 resolved nts) | 2393 | Sequence is too short. (< 5 resolved nts) |
2316 | 2394 | ||
2317 | -6scf_1_M | 2395 | +6vm6_1_G |
2318 | Sequence is too short. (< 5 resolved nts) | 2396 | Sequence is too short. (< 5 resolved nts) |
2319 | 2397 | ||
2320 | -6yud_1_P | 2398 | +6wan_1_K |
2321 | Sequence is too short. (< 5 resolved nts) | 2399 | Sequence is too short. (< 5 resolved nts) |
2322 | 2400 | ||
2323 | -6scf_1_L | 2401 | +6wan_1_H |
2324 | Sequence is too short. (< 5 resolved nts) | 2402 | Sequence is too short. (< 5 resolved nts) |
2325 | 2403 | ||
2326 | -6yud_1_M | 2404 | +6wan_1_G |
2327 | Sequence is too short. (< 5 resolved nts) | 2405 | Sequence is too short. (< 5 resolved nts) |
2328 | 2406 | ||
2329 | -6yud_1_Q | 2407 | +6wan_1_L |
2330 | Sequence is too short. (< 5 resolved nts) | 2408 | Sequence is too short. (< 5 resolved nts) |
2331 | 2409 | ||
2332 | -6w11_1_C | 2410 | +6wan_1_I |
2333 | Sequence is too short. (< 5 resolved nts) | 2411 | Sequence is too short. (< 5 resolved nts) |
2334 | 2412 | ||
2335 | -6o6x_1_D | 2413 | +6ywo_1_F |
2336 | Sequence is too short. (< 5 resolved nts) | 2414 | Sequence is too short. (< 5 resolved nts) |
2337 | 2415 | ||
2338 | -4ba2_1_R | 2416 | +6wan_1_J |
2339 | Sequence is too short. (< 5 resolved nts) | 2417 | Sequence is too short. (< 5 resolved nts) |
2340 | 2418 | ||
2341 | -7bdv_1_F | 2419 | +4oau_1_A |
2342 | Sequence is too short. (< 5 resolved nts) | 2420 | Sequence is too short. (< 5 resolved nts) |
2343 | 2421 | ||
2344 | -7bdv_1_H | 2422 | +6ywo_1_E |
2423 | +Sequence is too short. (< 5 resolved nts) | ||
2424 | + | ||
2425 | +6ywo_1_K | ||
2426 | +Sequence is too short. (< 5 resolved nts) | ||
2427 | + | ||
2428 | +6vm6_1_I | ||
2429 | +Sequence is too short. (< 5 resolved nts) | ||
2430 | + | ||
2431 | +6vm6_1_H | ||
2432 | +Sequence is too short. (< 5 resolved nts) | ||
2433 | + | ||
2434 | +6ywo_1_I | ||
2435 | +Sequence is too short. (< 5 resolved nts) | ||
2436 | + | ||
2437 | +2a1r_1_C | ||
2438 | +Sequence is too short. (< 5 resolved nts) | ||
2439 | + | ||
2440 | +6m6v_1_F | ||
2441 | +Sequence is too short. (< 5 resolved nts) | ||
2442 | + | ||
2443 | +6m6v_1_E | ||
2444 | +Sequence is too short. (< 5 resolved nts) | ||
2445 | + | ||
2446 | +2a1r_1_D | ||
2447 | +Sequence is too short. (< 5 resolved nts) | ||
2448 | + | ||
2449 | +3gpq_1_E | ||
2450 | +Sequence is too short. (< 5 resolved nts) | ||
2451 | + | ||
2452 | +3gpq_1_F | ||
2453 | +Sequence is too short. (< 5 resolved nts) | ||
2454 | + | ||
2455 | +6o79_1_C | ||
2456 | +Sequence is too short. (< 5 resolved nts) | ||
2457 | + | ||
2458 | +6vm6_1_K | ||
2459 | +Sequence is too short. (< 5 resolved nts) | ||
2460 | + | ||
2461 | +6m6v_1_G | ||
2462 | +Sequence is too short. (< 5 resolved nts) | ||
2463 | + | ||
2464 | +6hyu_1_D | ||
2465 | +Sequence is too short. (< 5 resolved nts) | ||
2466 | + | ||
2467 | +1laj_1_R | ||
2468 | +Sequence is too short. (< 5 resolved nts) | ||
2469 | + | ||
2470 | +6ybv_1_K | ||
2471 | +Could not find nucleotides of chain K in annotation 6ybv.json. Either there is a problem with 6ybv mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
2472 | + | ||
2473 | +6sce_1_B | ||
2474 | +Sequence is too short. (< 5 resolved nts) | ||
2475 | + | ||
2476 | +6xl1_1_C | ||
2477 | +Sequence is too short. (< 5 resolved nts) | ||
2478 | + | ||
2479 | +6scf_1_I | ||
2480 | +Sequence is too short. (< 5 resolved nts) | ||
2481 | + | ||
2482 | +6scf_1_K | ||
2483 | +Sequence is too short. (< 5 resolved nts) | ||
2484 | + | ||
2485 | +6yud_1_K | ||
2486 | +Sequence is too short. (< 5 resolved nts) | ||
2487 | + | ||
2488 | +6yud_1_O | ||
2489 | +Sequence is too short. (< 5 resolved nts) | ||
2490 | + | ||
2491 | +6scf_1_M | ||
2492 | +Sequence is too short. (< 5 resolved nts) | ||
2493 | + | ||
2494 | +6yud_1_P | ||
2495 | +Sequence is too short. (< 5 resolved nts) | ||
2496 | + | ||
2497 | +6scf_1_L | ||
2498 | +Sequence is too short. (< 5 resolved nts) | ||
2499 | + | ||
2500 | +6yud_1_M | ||
2501 | +Sequence is too short. (< 5 resolved nts) | ||
2502 | + | ||
2503 | +6yud_1_Q | ||
2504 | +Sequence is too short. (< 5 resolved nts) | ||
2505 | + | ||
2506 | +6w11_1_C | ||
2507 | +Sequence is too short. (< 5 resolved nts) | ||
2508 | + | ||
2509 | +6o6x_1_D | ||
2510 | +Sequence is too short. (< 5 resolved nts) | ||
2511 | + | ||
2512 | +4ba2_1_R | ||
2513 | +Sequence is too short. (< 5 resolved nts) | ||
2514 | + | ||
2515 | +7bdv_1_F | ||
2516 | +Sequence is too short. (< 5 resolved nts) | ||
2517 | + | ||
2518 | +7bdv_1_H | ||
2345 | Sequence is too short. (< 5 resolved nts) | 2519 | Sequence is too short. (< 5 resolved nts) |
2346 | 2520 | ||
2347 | 6o6x_1_C | 2521 | 6o6x_1_C |
... | @@ -2423,7 +2597,7 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2423,7 +2597,7 @@ Sequence is too short. (< 5 resolved nts) |
2423 | Sequence is too short. (< 5 resolved nts) | 2597 | Sequence is too short. (< 5 resolved nts) |
2424 | 2598 | ||
2425 | 1y1y_1_P | 2599 | 1y1y_1_P |
2426 | -DSSR warning 1y1y.json: no nucleotides found. Ignoring 1y1y_1_P. | 2600 | +Sequence is too short. (< 5 resolved nts) |
2427 | 2601 | ||
2428 | 5zuu_1_I | 2602 | 5zuu_1_I |
2429 | Sequence is too short. (< 5 resolved nts) | 2603 | Sequence is too short. (< 5 resolved nts) |
... | @@ -2431,6 +2605,9 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2431,6 +2605,9 @@ Sequence is too short. (< 5 resolved nts) |
2431 | 5zuu_1_G | 2605 | 5zuu_1_G |
2432 | Sequence is too short. (< 5 resolved nts) | 2606 | Sequence is too short. (< 5 resolved nts) |
2433 | 2607 | ||
2608 | +7am2_1_R1 | ||
2609 | +Sequence is too short. (< 5 resolved nts) | ||
2610 | + | ||
2434 | 4peh_1_W | 2611 | 4peh_1_W |
2435 | Sequence is too short. (< 5 resolved nts) | 2612 | Sequence is too short. (< 5 resolved nts) |
2436 | 2613 | ||
... | @@ -2443,7 +2620,7 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2443,7 +2620,7 @@ Sequence is too short. (< 5 resolved nts) |
2443 | 4peh_1_Y | 2620 | 4peh_1_Y |
2444 | Sequence is too short. (< 5 resolved nts) | 2621 | Sequence is too short. (< 5 resolved nts) |
2445 | 2622 | ||
2446 | -4peh_1_Z | 2623 | +7d8c_1_C |
2447 | Sequence is too short. (< 5 resolved nts) | 2624 | Sequence is too short. (< 5 resolved nts) |
2448 | 2625 | ||
2449 | 6mkn_1_W | 2626 | 6mkn_1_W |
... | @@ -2482,30 +2659,9 @@ Could not find nucleotides of chain Q in annotation 4eya.json. Either there is a | ... | @@ -2482,30 +2659,9 @@ Could not find nucleotides of chain Q in annotation 4eya.json. Either there is a |
2482 | 4eya_1_R | 2659 | 4eya_1_R |
2483 | Could not find nucleotides of chain R in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 2660 | Could not find nucleotides of chain R in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
2484 | 2661 | ||
2485 | -1qzc_1_B | ||
2486 | -DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_B. | ||
2487 | - | ||
2488 | -1t1o_1_B | ||
2489 | -DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_B. | ||
2490 | - | ||
2491 | 1mvr_1_C | 2662 | 1mvr_1_C |
2492 | DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_C. | 2663 | DSSR warning 1mvr.json: no nucleotides found. Ignoring 1mvr_1_C. |
2493 | 2664 | ||
2494 | -1t1m_1_B | ||
2495 | -DSSR warning 1t1m.json: no nucleotides found. Ignoring 1t1m_1_B. | ||
2496 | - | ||
2497 | -1t1o_1_C | ||
2498 | -DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_C. | ||
2499 | - | ||
2500 | -1t1m_1_A | ||
2501 | -DSSR warning 1t1m.json: no nucleotides found. Ignoring 1t1m_1_A. | ||
2502 | - | ||
2503 | -1t1o_1_A | ||
2504 | -DSSR warning 1t1o.json: no nucleotides found. Ignoring 1t1o_1_A. | ||
2505 | - | ||
2506 | -2r1g_1_B | ||
2507 | -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_B. | ||
2508 | - | ||
2509 | 4ht9_1_E | 2665 | 4ht9_1_E |
2510 | Sequence is too short. (< 5 resolved nts) | 2666 | Sequence is too short. (< 5 resolved nts) |
2511 | 2667 | ||
... | @@ -2536,21 +2692,15 @@ Could not find nucleotides of chain U in annotation 5uk4.json. Either there is a | ... | @@ -2536,21 +2692,15 @@ Could not find nucleotides of chain U in annotation 5uk4.json. Either there is a |
2536 | 5f6c_1_E | 2692 | 5f6c_1_E |
2537 | Sequence is too short. (< 5 resolved nts) | 2693 | Sequence is too short. (< 5 resolved nts) |
2538 | 2694 | ||
2695 | +7nwh_1_HH | ||
2696 | +Could not find nucleotides of chain HH in annotation 7nwh.json. Either there is a problem with 7nwh mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
2697 | + | ||
2539 | 4rcj_1_B | 2698 | 4rcj_1_B |
2540 | Sequence is too short. (< 5 resolved nts) | 2699 | Sequence is too short. (< 5 resolved nts) |
2541 | 2700 | ||
2542 | 1xnr_1_W | 2701 | 1xnr_1_W |
2543 | Sequence is too short. (< 5 resolved nts) | 2702 | Sequence is too short. (< 5 resolved nts) |
2544 | 2703 | ||
2545 | -2agn_1_A | ||
2546 | -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_A. | ||
2547 | - | ||
2548 | -2agn_1_C | ||
2549 | -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_C. | ||
2550 | - | ||
2551 | -2agn_1_B | ||
2552 | -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_B. | ||
2553 | - | ||
2554 | 6e0o_1_C | 2704 | 6e0o_1_C |
2555 | Sequence is too short. (< 5 resolved nts) | 2705 | Sequence is too short. (< 5 resolved nts) |
2556 | 2706 | ||
... | @@ -2602,11 +2752,8 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2602,11 +2752,8 @@ Sequence is too short. (< 5 resolved nts) |
2602 | 4d61_1_J | 2752 | 4d61_1_J |
2603 | Could not find nucleotides of chain J in annotation 4d61.json. Either there is a problem with 4d61 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 2753 | Could not find nucleotides of chain J in annotation 4d61.json. Either there is a problem with 4d61 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
2604 | 2754 | ||
2605 | -1trj_1_B | 2755 | +7nwg_1_Q3 |
2606 | -DSSR warning 1trj.json: no nucleotides found. Ignoring 1trj_1_B. | 2756 | +Could not find nucleotides of chain Q3 in annotation 7nwg.json. Either there is a problem with 7nwg mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
2607 | - | ||
2608 | -1trj_1_C | ||
2609 | -DSSR warning 1trj.json: no nucleotides found. Ignoring 1trj_1_C. | ||
2610 | 2757 | ||
2611 | 5tbw_1_SR | 2758 | 5tbw_1_SR |
2612 | Could not find nucleotides of chain SR in annotation 5tbw.json. Either there is a problem with 5tbw mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 2759 | Could not find nucleotides of chain SR in annotation 5tbw.json. Either there is a problem with 5tbw mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
... | @@ -2653,6 +2800,12 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2653,6 +2800,12 @@ Sequence is too short. (< 5 resolved nts) |
2653 | 3jbu_1_V | 2800 | 3jbu_1_V |
2654 | Could not find nucleotides of chain V in annotation 3jbu.json. Either there is a problem with 3jbu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 2801 | Could not find nucleotides of chain V in annotation 3jbu.json. Either there is a problem with 3jbu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
2655 | 2802 | ||
2803 | +4ts2_1_Y | ||
2804 | +Nucleotides not inserted ! | ||
2805 | + | ||
2806 | +4ts0_1_Y | ||
2807 | +Nucleotides not inserted ! | ||
2808 | + | ||
2656 | 1h2c_1_R | 2809 | 1h2c_1_R |
2657 | Sequence is too short. (< 5 resolved nts) | 2810 | Sequence is too short. (< 5 resolved nts) |
2658 | 2811 | ||
... | @@ -2731,6 +2884,9 @@ Could not find nucleotides of chain Z in annotation 5flx.json. Either there is a | ... | @@ -2731,6 +2884,9 @@ Could not find nucleotides of chain Z in annotation 5flx.json. Either there is a |
2731 | 6eri_1_AX | 2884 | 6eri_1_AX |
2732 | Could not find nucleotides of chain AX in annotation 6eri.json. Either there is a problem with 6eri mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 2885 | Could not find nucleotides of chain AX in annotation 6eri.json. Either there is a problem with 6eri mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
2733 | 2886 | ||
2887 | +7k5l_1_R | ||
2888 | +Sequence is too short. (< 5 resolved nts) | ||
2889 | + | ||
2734 | 7d80_1_Y | 2890 | 7d80_1_Y |
2735 | Could not find nucleotides of chain Y in annotation 7d80.json. Either there is a problem with 7d80 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 2891 | Could not find nucleotides of chain Y in annotation 7d80.json. Either there is a problem with 7d80 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
2736 | 2892 | ||
... | @@ -2752,6 +2908,9 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_I. | ... | @@ -2752,6 +2908,9 @@ DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_I. |
2752 | 1zc8_1_H | 2908 | 1zc8_1_H |
2753 | DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_H. | 2909 | DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_H. |
2754 | 2910 | ||
2911 | +6bfb_1_Y | ||
2912 | +Nucleotides not inserted ! | ||
2913 | + | ||
2755 | 1zc8_1_J | 2914 | 1zc8_1_J |
2756 | DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_J. | 2915 | DSSR warning 1zc8.json: no nucleotides found. Ignoring 1zc8_1_J. |
2757 | 2916 | ||
... | @@ -2857,6 +3016,12 @@ Could not find nucleotides of chain BB in annotation 6z1p.json. Either there is | ... | @@ -2857,6 +3016,12 @@ Could not find nucleotides of chain BB in annotation 6z1p.json. Either there is |
2857 | 6z1p_1_BA | 3016 | 6z1p_1_BA |
2858 | Could not find nucleotides of chain BA in annotation 6z1p.json. Either there is a problem with 6z1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 3017 | Could not find nucleotides of chain BA in annotation 6z1p.json. Either there is a problem with 6z1p mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
2859 | 3018 | ||
3019 | +3p22_1_C | ||
3020 | +Nucleotides not inserted ! | ||
3021 | + | ||
3022 | +3p22_1_G | ||
3023 | +Nucleotides not inserted ! | ||
3024 | + | ||
2860 | 2uxd_1_X | 3025 | 2uxd_1_X |
2861 | Sequence is too short. (< 5 resolved nts) | 3026 | Sequence is too short. (< 5 resolved nts) |
2862 | 3027 | ||
... | @@ -2923,12 +3088,6 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -2923,12 +3088,6 @@ Sequence is too short. (< 5 resolved nts) |
2923 | 3ol8_1_P | 3088 | 3ol8_1_P |
2924 | Sequence is too short. (< 5 resolved nts) | 3089 | Sequence is too short. (< 5 resolved nts) |
2925 | 3090 | ||
2926 | -1qzc_1_C | ||
2927 | -DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_C. | ||
2928 | - | ||
2929 | -1qzc_1_A | ||
2930 | -DSSR warning 1qzc.json: no nucleotides found. Ignoring 1qzc_1_A. | ||
2931 | - | ||
2932 | 6yrq_1_E | 3091 | 6yrq_1_E |
2933 | Sequence is too short. (< 5 resolved nts) | 3092 | Sequence is too short. (< 5 resolved nts) |
2934 | 3093 | ||
... | @@ -3166,6 +3325,9 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3166,6 +3325,9 @@ Sequence is too short. (< 5 resolved nts) |
3166 | 4wtk_1_P | 3325 | 4wtk_1_P |
3167 | Sequence is too short. (< 5 resolved nts) | 3326 | Sequence is too short. (< 5 resolved nts) |
3168 | 3327 | ||
3328 | +6wlj_3_A | ||
3329 | +Nucleotides not inserted ! | ||
3330 | + | ||
3169 | 1vqn_1_4 | 3331 | 1vqn_1_4 |
3170 | Sequence is too short. (< 5 resolved nts) | 3332 | Sequence is too short. (< 5 resolved nts) |
3171 | 3333 | ||
... | @@ -3214,23 +3376,8 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B. | ... | @@ -3214,23 +3376,8 @@ DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_B. |
3214 | 4i67_1_B | 3376 | 4i67_1_B |
3215 | Sequence is too short. (< 5 resolved nts) | 3377 | Sequence is too short. (< 5 resolved nts) |
3216 | 3378 | ||
3217 | -3pgw_1_R | 3379 | +4jf2_1_A |
3218 | -DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R. | 3380 | +Nucleotides not inserted ! |
3219 | - | ||
3220 | -3pgw_1_N | ||
3221 | -DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N. | ||
3222 | - | ||
3223 | -3cw1_1_X | ||
3224 | -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_X. | ||
3225 | - | ||
3226 | -3cw1_1_W | ||
3227 | -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_W. | ||
3228 | - | ||
3229 | -3cw1_1_V | ||
3230 | -DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V. | ||
3231 | - | ||
3232 | -7b0y_1_A | ||
3233 | -Could not find nucleotides of chain A in annotation 7b0y.json. Either there is a problem with 7b0y mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
3234 | 3381 | ||
3235 | 6k32_1_T | 3382 | 6k32_1_T |
3236 | Could not find nucleotides of chain T in annotation 6k32.json. Either there is a problem with 6k32 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 3383 | Could not find nucleotides of chain T in annotation 6k32.json. Either there is a problem with 6k32 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
... | @@ -3244,11 +3391,11 @@ Could not find nucleotides of chain A in annotation 5mmj.json. Either there is a | ... | @@ -3244,11 +3391,11 @@ Could not find nucleotides of chain A in annotation 5mmj.json. Either there is a |
3244 | 5x8r_1_A | 3391 | 5x8r_1_A |
3245 | Could not find nucleotides of chain A in annotation 5x8r.json. Either there is a problem with 5x8r mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 3392 | Could not find nucleotides of chain A in annotation 5x8r.json. Either there is a problem with 5x8r mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
3246 | 3393 | ||
3247 | -2agn_1_E | 3394 | +3fu2_1_B |
3248 | -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_E. | 3395 | +Nucleotides not inserted ! |
3249 | 3396 | ||
3250 | -2agn_1_D | 3397 | +3fu2_1_A |
3251 | -DSSR warning 2agn.json: no nucleotides found. Ignoring 2agn_1_D. | 3398 | +Nucleotides not inserted ! |
3252 | 3399 | ||
3253 | 4v5z_1_BD | 3400 | 4v5z_1_BD |
3254 | DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BD. | 3401 | DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BD. |
... | @@ -3355,6 +3502,39 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3355,6 +3502,39 @@ Sequence is too short. (< 5 resolved nts) |
3355 | 5dto_1_B | 3502 | 5dto_1_B |
3356 | Sequence is too short. (< 5 resolved nts) | 3503 | Sequence is too short. (< 5 resolved nts) |
3357 | 3504 | ||
3505 | +6yml_1_A | ||
3506 | +Nucleotides not inserted ! | ||
3507 | + | ||
3508 | +6ymm_1_A | ||
3509 | +Nucleotides not inserted ! | ||
3510 | + | ||
3511 | +6ymi_1_M | ||
3512 | +Nucleotides not inserted ! | ||
3513 | + | ||
3514 | +6ymi_1_F | ||
3515 | +Nucleotides not inserted ! | ||
3516 | + | ||
3517 | +6ymi_1_A | ||
3518 | +Nucleotides not inserted ! | ||
3519 | + | ||
3520 | +6ylb_1_F | ||
3521 | +Nucleotides not inserted ! | ||
3522 | + | ||
3523 | +6ymi_1_C | ||
3524 | +Nucleotides not inserted ! | ||
3525 | + | ||
3526 | +6ymj_1_C | ||
3527 | +Nucleotides not inserted ! | ||
3528 | + | ||
3529 | +6ylb_1_C | ||
3530 | +Nucleotides not inserted ! | ||
3531 | + | ||
3532 | +6ymj_1_I | ||
3533 | +Nucleotides not inserted ! | ||
3534 | + | ||
3535 | +6ymj_1_O | ||
3536 | +Nucleotides not inserted ! | ||
3537 | + | ||
3358 | 4cxh_1_X | 3538 | 4cxh_1_X |
3359 | Could not find nucleotides of chain X in annotation 4cxh.json. Either there is a problem with 4cxh mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 3539 | Could not find nucleotides of chain X in annotation 4cxh.json. Either there is a problem with 4cxh mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
3360 | 3540 | ||
... | @@ -3463,6 +3643,24 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3463,6 +3643,24 @@ Sequence is too short. (< 5 resolved nts) |
3463 | 4v4f_1_B2 | 3643 | 4v4f_1_B2 |
3464 | Sequence is too short. (< 5 resolved nts) | 3644 | Sequence is too short. (< 5 resolved nts) |
3465 | 3645 | ||
3646 | +7m4y_1_V | ||
3647 | +Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
3648 | + | ||
3649 | +7m4x_1_V | ||
3650 | +Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
3651 | + | ||
3652 | +6v3a_1_V | ||
3653 | +Could not find nucleotides of chain V in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
3654 | + | ||
3655 | +6v39_1_V | ||
3656 | +Could not find nucleotides of chain V in annotation 6v39.json. Either there is a problem with 6v39 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
3657 | + | ||
3658 | +6ck5_1_A | ||
3659 | +Nucleotides not inserted ! | ||
3660 | + | ||
3661 | +6ck5_1_B | ||
3662 | +Nucleotides not inserted ! | ||
3663 | + | ||
3466 | 5it9_1_I | 3664 | 5it9_1_I |
3467 | Could not find nucleotides of chain I in annotation 5it9.json. Either there is a problem with 5it9 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 3665 | Could not find nucleotides of chain I in annotation 5it9.json. Either there is a problem with 5it9 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
3468 | 3666 | ||
... | @@ -3490,6 +3688,12 @@ DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_N. | ... | @@ -3490,6 +3688,12 @@ DSSR warning 3jcr.json: no nucleotides found. Ignoring 3jcr_1_N. |
3490 | 6gfw_1_R | 3688 | 6gfw_1_R |
3491 | Sequence is too short. (< 5 resolved nts) | 3689 | Sequence is too short. (< 5 resolved nts) |
3492 | 3690 | ||
3691 | +3j6x_1_IR | ||
3692 | +Could not find nucleotides of chain IR in annotation 3j6x.json. Either there is a problem with 3j6x mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
3693 | + | ||
3694 | +3j6y_1_IR | ||
3695 | +Could not find nucleotides of chain IR in annotation 3j6y.json. Either there is a problem with 3j6y mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
3696 | + | ||
3493 | 2vaz_1_A | 3697 | 2vaz_1_A |
3494 | DSSR warning 2vaz.json: no nucleotides found. Ignoring 2vaz_1_A. | 3698 | DSSR warning 2vaz.json: no nucleotides found. Ignoring 2vaz_1_A. |
3495 | 3699 | ||
... | @@ -3535,6 +3739,9 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3535,6 +3739,9 @@ Sequence is too short. (< 5 resolved nts) |
3535 | 5uh9_1_I | 3739 | 5uh9_1_I |
3536 | Sequence is too short. (< 5 resolved nts) | 3740 | Sequence is too short. (< 5 resolved nts) |
3537 | 3741 | ||
3742 | +4v5z_1_BS | ||
3743 | +DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BS. | ||
3744 | + | ||
3538 | 2ftc_1_R | 3745 | 2ftc_1_R |
3539 | DSSR warning 2ftc.json: no nucleotides found. Ignoring 2ftc_1_R. | 3746 | DSSR warning 2ftc.json: no nucleotides found. Ignoring 2ftc_1_R. |
3540 | 3747 | ||
... | @@ -3547,9 +3754,6 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3547,9 +3754,6 @@ Sequence is too short. (< 5 resolved nts) |
3547 | 4udv_1_R | 3754 | 4udv_1_R |
3548 | Sequence is too short. (< 5 resolved nts) | 3755 | Sequence is too short. (< 5 resolved nts) |
3549 | 3756 | ||
3550 | -2r1g_1_E | ||
3551 | -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_E. | ||
3552 | - | ||
3553 | 5zsc_1_D | 3757 | 5zsc_1_D |
3554 | Sequence is too short. (< 5 resolved nts) | 3758 | Sequence is too short. (< 5 resolved nts) |
3555 | 3759 | ||
... | @@ -3631,8 +3835,8 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3631,8 +3835,8 @@ Sequence is too short. (< 5 resolved nts) |
3631 | 3m85_1_Y | 3835 | 3m85_1_Y |
3632 | Sequence is too short. (< 5 resolved nts) | 3836 | Sequence is too short. (< 5 resolved nts) |
3633 | 3837 | ||
3634 | -1e8s_1_C | 3838 | +5u34_1_B |
3635 | -DSSR warning 1e8s.json: no nucleotides found. Ignoring 1e8s_1_C. | 3839 | +Nucleotides not inserted ! |
3636 | 3840 | ||
3637 | 5wnp_1_B | 3841 | 5wnp_1_B |
3638 | Could not find nucleotides of chain B in annotation 5wnp.json. Either there is a problem with 5wnp mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 3842 | Could not find nucleotides of chain B in annotation 5wnp.json. Either there is a problem with 5wnp mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
... | @@ -3700,12 +3904,21 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3700,12 +3904,21 @@ Sequence is too short. (< 5 resolved nts) |
3700 | 3u2e_1_C | 3904 | 3u2e_1_C |
3701 | Sequence is too short. (< 5 resolved nts) | 3905 | Sequence is too short. (< 5 resolved nts) |
3702 | 3906 | ||
3907 | +7eh1_1_I | ||
3908 | +Sequence is too short. (< 5 resolved nts) | ||
3909 | + | ||
3703 | 5uef_1_C | 3910 | 5uef_1_C |
3704 | Sequence is too short. (< 5 resolved nts) | 3911 | Sequence is too short. (< 5 resolved nts) |
3705 | 3912 | ||
3706 | 5uef_1_D | 3913 | 5uef_1_D |
3707 | Sequence is too short. (< 5 resolved nts) | 3914 | Sequence is too short. (< 5 resolved nts) |
3708 | 3915 | ||
3916 | +7eh2_1_R | ||
3917 | +Sequence is too short. (< 5 resolved nts) | ||
3918 | + | ||
3919 | +7eh2_1_I | ||
3920 | +Sequence is too short. (< 5 resolved nts) | ||
3921 | + | ||
3709 | 4x4u_1_H | 3922 | 4x4u_1_H |
3710 | Sequence is too short. (< 5 resolved nts) | 3923 | Sequence is too short. (< 5 resolved nts) |
3711 | 3924 | ||
... | @@ -3736,6 +3949,15 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3736,6 +3949,15 @@ Sequence is too short. (< 5 resolved nts) |
3736 | 7a5g_1_J | 3949 | 7a5g_1_J |
3737 | Could not find nucleotides of chain J in annotation 7a5g.json. Either there is a problem with 7a5g mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 3950 | Could not find nucleotides of chain J in annotation 7a5g.json. Either there is a problem with 7a5g mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
3738 | 3951 | ||
3952 | +1m5k_1_B | ||
3953 | +Nucleotides not inserted ! | ||
3954 | + | ||
3955 | +1m5o_1_E | ||
3956 | +Nucleotides not inserted ! | ||
3957 | + | ||
3958 | +1m5v_1_B | ||
3959 | +Nucleotides not inserted ! | ||
3960 | + | ||
3739 | 6gx6_1_B | 3961 | 6gx6_1_B |
3740 | Sequence is too short. (< 5 resolved nts) | 3962 | Sequence is too short. (< 5 resolved nts) |
3741 | 3963 | ||
... | @@ -3754,9 +3976,6 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3754,9 +3976,6 @@ Sequence is too short. (< 5 resolved nts) |
3754 | 1zn1_1_C | 3976 | 1zn1_1_C |
3755 | DSSR warning 1zn1.json: no nucleotides found. Ignoring 1zn1_1_C. | 3977 | DSSR warning 1zn1.json: no nucleotides found. Ignoring 1zn1_1_C. |
3756 | 3978 | ||
3757 | -1zn0_1_C | ||
3758 | -DSSR warning 1zn0.json: no nucleotides found. Ignoring 1zn0_1_C. | ||
3759 | - | ||
3760 | 1xpu_1_G | 3979 | 1xpu_1_G |
3761 | Sequence is too short. (< 5 resolved nts) | 3980 | Sequence is too short. (< 5 resolved nts) |
3762 | 3981 | ||
... | @@ -3826,9 +4045,15 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -3826,9 +4045,15 @@ Sequence is too short. (< 5 resolved nts) |
3826 | 6gc5_1_G | 4045 | 6gc5_1_G |
3827 | Sequence is too short. (< 5 resolved nts) | 4046 | Sequence is too short. (< 5 resolved nts) |
3828 | 4047 | ||
4048 | +4rne_1_C | ||
4049 | +Nucleotides not inserted ! | ||
4050 | + | ||
3829 | 1n1h_1_B | 4051 | 1n1h_1_B |
3830 | Sequence is too short. (< 5 resolved nts) | 4052 | Sequence is too short. (< 5 resolved nts) |
3831 | 4053 | ||
4054 | +7n2v_1_PT | ||
4055 | +Could not find nucleotides of chain PT in annotation 7n2v.json. Either there is a problem with 7n2v mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
4056 | + | ||
3832 | 4ohz_1_B | 4057 | 4ohz_1_B |
3833 | Sequence is too short. (< 5 resolved nts) | 4058 | Sequence is too short. (< 5 resolved nts) |
3834 | 4059 | ||
... | @@ -3874,6 +4099,15 @@ Could not find nucleotides of chain X in annotation 5y88.json. Either there is a | ... | @@ -3874,6 +4099,15 @@ Could not find nucleotides of chain X in annotation 5y88.json. Either there is a |
3874 | 4v5z_1_BB | 4099 | 4v5z_1_BB |
3875 | DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BB. | 4100 | DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BB. |
3876 | 4101 | ||
4102 | +5y85_1_D | ||
4103 | +Nucleotides not inserted ! | ||
4104 | + | ||
4105 | +5y85_1_B | ||
4106 | +Nucleotides not inserted ! | ||
4107 | + | ||
4108 | +5y87_1_D | ||
4109 | +Nucleotides not inserted ! | ||
4110 | + | ||
3877 | 3j0o_1_H | 4111 | 3j0o_1_H |
3878 | Could not find nucleotides of chain H in annotation 3j0o.json. Either there is a problem with 3j0o mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4112 | Could not find nucleotides of chain H in annotation 3j0o.json. Either there is a problem with 3j0o mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
3879 | 4113 | ||
... | @@ -4057,9 +4291,6 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4057,9 +4291,6 @@ Sequence is too short. (< 5 resolved nts) |
4057 | 6a6l_1_D | 4291 | 6a6l_1_D |
4058 | Sequence is too short. (< 5 resolved nts) | 4292 | Sequence is too short. (< 5 resolved nts) |
4059 | 4293 | ||
4060 | -4v5z_1_BS | ||
4061 | -DSSR warning 4v5z.json: no nucleotides found. Ignoring 4v5z_1_BS. | ||
4062 | - | ||
4063 | 4v8t_1_1 | 4294 | 4v8t_1_1 |
4064 | Could not find nucleotides of chain 1 in annotation 4v8t.json. Either there is a problem with 4v8t mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4295 | Could not find nucleotides of chain 1 in annotation 4v8t.json. Either there is a problem with 4v8t mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4065 | 4296 | ||
... | @@ -4072,6 +4303,9 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4072,6 +4303,9 @@ Sequence is too short. (< 5 resolved nts) |
4072 | 1uvi_1_E | 4303 | 1uvi_1_E |
4073 | Sequence is too short. (< 5 resolved nts) | 4304 | Sequence is too short. (< 5 resolved nts) |
4074 | 4305 | ||
4306 | +3gs5_1_A | ||
4307 | +Nucleotides not inserted ! | ||
4308 | + | ||
4075 | 4m7d_1_P | 4309 | 4m7d_1_P |
4076 | Sequence is too short. (< 5 resolved nts) | 4310 | Sequence is too short. (< 5 resolved nts) |
4077 | 4311 | ||
... | @@ -4132,12 +4366,12 @@ Could not find nucleotides of chain 2M in annotation 6ip6.json. Either there is | ... | @@ -4132,12 +4366,12 @@ Could not find nucleotides of chain 2M in annotation 6ip6.json. Either there is |
4132 | 6qcs_1_M | 4366 | 6qcs_1_M |
4133 | Sequence is too short. (< 5 resolved nts) | 4367 | Sequence is too short. (< 5 resolved nts) |
4134 | 4368 | ||
4369 | +7b5k_1_Z | ||
4370 | +Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
4371 | + | ||
4135 | 486d_1_G | 4372 | 486d_1_G |
4136 | Could not find nucleotides of chain G in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4373 | Could not find nucleotides of chain G in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4137 | 4374 | ||
4138 | -2r1g_1_C | ||
4139 | -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_C. | ||
4140 | - | ||
4141 | 486d_1_F | 4375 | 486d_1_F |
4142 | Could not find nucleotides of chain F in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4376 | Could not find nucleotides of chain F in annotation 486d.json. Either there is a problem with 486d mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4143 | 4377 | ||
... | @@ -4177,6 +4411,9 @@ Could not find nucleotides of chain L in annotation 4oq9.json. Either there is a | ... | @@ -4177,6 +4411,9 @@ Could not find nucleotides of chain L in annotation 4oq9.json. Either there is a |
4177 | 6r9q_1_B | 4411 | 6r9q_1_B |
4178 | Sequence is too short. (< 5 resolved nts) | 4412 | Sequence is too short. (< 5 resolved nts) |
4179 | 4413 | ||
4414 | +7m4u_1_A | ||
4415 | +Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
4416 | + | ||
4180 | 6v3a_1_SN1 | 4417 | 6v3a_1_SN1 |
4181 | Could not find nucleotides of chain SN1 in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4418 | Could not find nucleotides of chain SN1 in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4182 | 4419 | ||
... | @@ -4189,9 +4426,6 @@ Could not find nucleotides of chain SN1 in annotation 6v39.json. Either there is | ... | @@ -4189,9 +4426,6 @@ Could not find nucleotides of chain SN1 in annotation 6v39.json. Either there is |
4189 | 6v3e_1_SN1 | 4426 | 6v3e_1_SN1 |
4190 | Could not find nucleotides of chain SN1 in annotation 6v3e.json. Either there is a problem with 6v3e mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4427 | Could not find nucleotides of chain SN1 in annotation 6v3e.json. Either there is a problem with 6v3e mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4191 | 4428 | ||
4192 | -1pn7_1_C | ||
4193 | -DSSR warning 1pn7.json: no nucleotides found. Ignoring 1pn7_1_C. | ||
4194 | - | ||
4195 | 1mj1_1_Q | 4429 | 1mj1_1_Q |
4196 | Could not find nucleotides of chain Q in annotation 1mj1.json. Either there is a problem with 1mj1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4430 | Could not find nucleotides of chain Q in annotation 1mj1.json. Either there is a problem with 1mj1 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4197 | 4431 | ||
... | @@ -4315,12 +4549,6 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4315,12 +4549,6 @@ Sequence is too short. (< 5 resolved nts) |
4315 | 6oy6_1_I | 4549 | 6oy6_1_I |
4316 | Sequence is too short. (< 5 resolved nts) | 4550 | Sequence is too short. (< 5 resolved nts) |
4317 | 4551 | ||
4318 | -4bbl_1_Y | ||
4319 | -DSSR warning 4bbl.json: no nucleotides found. Ignoring 4bbl_1_Y. | ||
4320 | - | ||
4321 | -4bbl_1_Z | ||
4322 | -DSSR warning 4bbl.json: no nucleotides found. Ignoring 4bbl_1_Z. | ||
4323 | - | ||
4324 | 4qvd_1_H | 4552 | 4qvd_1_H |
4325 | Sequence is too short. (< 5 resolved nts) | 4553 | Sequence is too short. (< 5 resolved nts) |
4326 | 4554 | ||
... | @@ -4330,15 +4558,54 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4330,15 +4558,54 @@ Sequence is too short. (< 5 resolved nts) |
4330 | 3iy8_1_A | 4558 | 3iy8_1_A |
4331 | DSSR warning 3iy8.json: no nucleotides found. Ignoring 3iy8_1_A. | 4559 | DSSR warning 3iy8.json: no nucleotides found. Ignoring 3iy8_1_A. |
4332 | 4560 | ||
4333 | -6tnu_1_M | 4561 | +7n06_1_G |
4334 | -Could not find nucleotides of chain M in annotation 6tnu.json. Either there is a problem with 6tnu mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4562 | +Sequence is too short. (< 5 resolved nts) |
4335 | 4563 | ||
4336 | -5mc6_1_M | 4564 | +7n06_1_H |
4337 | -Could not find nucleotides of chain M in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4565 | +Sequence is too short. (< 5 resolved nts) |
4566 | + | ||
4567 | +7n06_1_I | ||
4568 | +Sequence is too short. (< 5 resolved nts) | ||
4569 | + | ||
4570 | +7n06_1_J | ||
4571 | +Sequence is too short. (< 5 resolved nts) | ||
4572 | + | ||
4573 | +7n06_1_K | ||
4574 | +Sequence is too short. (< 5 resolved nts) | ||
4575 | + | ||
4576 | +7n06_1_L | ||
4577 | +Sequence is too short. (< 5 resolved nts) | ||
4578 | + | ||
4579 | +7n33_1_G | ||
4580 | +Sequence is too short. (< 5 resolved nts) | ||
4581 | + | ||
4582 | +7n33_1_H | ||
4583 | +Sequence is too short. (< 5 resolved nts) | ||
4584 | + | ||
4585 | +7n33_1_I | ||
4586 | +Sequence is too short. (< 5 resolved nts) | ||
4587 | + | ||
4588 | +7n33_1_J | ||
4589 | +Sequence is too short. (< 5 resolved nts) | ||
4590 | + | ||
4591 | +7n33_1_K | ||
4592 | +Sequence is too short. (< 5 resolved nts) | ||
4593 | + | ||
4594 | +7n33_1_L | ||
4595 | +Sequence is too short. (< 5 resolved nts) | ||
4338 | 4596 | ||
4339 | 5mc6_1_N | 4597 | 5mc6_1_N |
4340 | Could not find nucleotides of chain N in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4598 | Could not find nucleotides of chain N in annotation 5mc6.json. Either there is a problem with 5mc6 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4341 | 4599 | ||
4600 | +2qwy_1_C | ||
4601 | +Nucleotides not inserted ! | ||
4602 | + | ||
4603 | +2qwy_1_A | ||
4604 | +Nucleotides not inserted ! | ||
4605 | + | ||
4606 | +2qwy_1_B | ||
4607 | +Nucleotides not inserted ! | ||
4608 | + | ||
4342 | 4eya_1_O | 4609 | 4eya_1_O |
4343 | Could not find nucleotides of chain O in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4610 | Could not find nucleotides of chain O in annotation 4eya.json. Either there is a problem with 4eya mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4344 | 4611 | ||
... | @@ -4363,12 +4630,6 @@ Could not find nucleotides of chain U in annotation 6htq.json. Either there is a | ... | @@ -4363,12 +4630,6 @@ Could not find nucleotides of chain U in annotation 6htq.json. Either there is a |
4363 | 6uu6_1_333 | 4630 | 6uu6_1_333 |
4364 | Sequence is too short. (< 5 resolved nts) | 4631 | Sequence is too short. (< 5 resolved nts) |
4365 | 4632 | ||
4366 | -6v3a_1_V | ||
4367 | -Could not find nucleotides of chain V in annotation 6v3a.json. Either there is a problem with 6v3a mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
4368 | - | ||
4369 | -6v39_1_V | ||
4370 | -Could not find nucleotides of chain V in annotation 6v39.json. Either there is a problem with 6v39 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
4371 | - | ||
4372 | 5a0v_1_F | 4633 | 5a0v_1_F |
4373 | Sequence is too short. (< 5 resolved nts) | 4634 | Sequence is too short. (< 5 resolved nts) |
4374 | 4635 | ||
... | @@ -4495,6 +4756,9 @@ Could not find nucleotides of chain BV in annotation 6xa1.json. Either there is | ... | @@ -4495,6 +4756,9 @@ Could not find nucleotides of chain BV in annotation 6xa1.json. Either there is |
4495 | 6ha8_1_X | 4756 | 6ha8_1_X |
4496 | Could not find nucleotides of chain X in annotation 6ha8.json. Either there is a problem with 6ha8 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4757 | Could not find nucleotides of chain X in annotation 6ha8.json. Either there is a problem with 6ha8 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4497 | 4758 | ||
4759 | +3bnp_1_B | ||
4760 | +Nucleotides not inserted ! | ||
4761 | + | ||
4498 | 1m8w_1_E | 4762 | 1m8w_1_E |
4499 | Could not find nucleotides of chain E in annotation 1m8w.json. Either there is a problem with 1m8w mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4763 | Could not find nucleotides of chain E in annotation 1m8w.json. Either there is a problem with 1m8w mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4500 | 4764 | ||
... | @@ -4564,6 +4828,21 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4564,6 +4828,21 @@ Sequence is too short. (< 5 resolved nts) |
4564 | 4wti_1_P | 4828 | 4wti_1_P |
4565 | Sequence is too short. (< 5 resolved nts) | 4829 | Sequence is too short. (< 5 resolved nts) |
4566 | 4830 | ||
4831 | +6dlr_1_A | ||
4832 | +Nucleotides not inserted ! | ||
4833 | + | ||
4834 | +6dlt_1_A | ||
4835 | +Nucleotides not inserted ! | ||
4836 | + | ||
4837 | +6dls_1_A | ||
4838 | +Nucleotides not inserted ! | ||
4839 | + | ||
4840 | +6dlq_1_A | ||
4841 | +Nucleotides not inserted ! | ||
4842 | + | ||
4843 | +6dnr_1_A | ||
4844 | +Nucleotides not inserted ! | ||
4845 | + | ||
4567 | 5l3p_1_Y | 4846 | 5l3p_1_Y |
4568 | Could not find nucleotides of chain Y in annotation 5l3p.json. Either there is a problem with 5l3p mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4847 | Could not find nucleotides of chain Y in annotation 5l3p.json. Either there is a problem with 5l3p mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4569 | 4848 | ||
... | @@ -4573,12 +4852,36 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4573,12 +4852,36 @@ Sequence is too short. (< 5 resolved nts) |
4573 | 3rzo_1_R | 4852 | 3rzo_1_R |
4574 | Sequence is too short. (< 5 resolved nts) | 4853 | Sequence is too short. (< 5 resolved nts) |
4575 | 4854 | ||
4855 | +5wlh_1_B | ||
4856 | +Nucleotides not inserted ! | ||
4857 | + | ||
4576 | 2f4v_1_Z | 4858 | 2f4v_1_Z |
4577 | Sequence is too short. (< 5 resolved nts) | 4859 | Sequence is too short. (< 5 resolved nts) |
4578 | 4860 | ||
4861 | +5ml7_1_B | ||
4862 | +Nucleotides not inserted ! | ||
4863 | + | ||
4579 | 1qln_1_R | 4864 | 1qln_1_R |
4580 | Sequence is too short. (< 5 resolved nts) | 4865 | Sequence is too short. (< 5 resolved nts) |
4581 | 4866 | ||
4867 | +3pgw_1_R | ||
4868 | +DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_R. | ||
4869 | + | ||
4870 | +3pgw_1_N | ||
4871 | +DSSR warning 3pgw.json: no nucleotides found. Ignoring 3pgw_1_N. | ||
4872 | + | ||
4873 | +3cw1_1_X | ||
4874 | +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_X. | ||
4875 | + | ||
4876 | +3cw1_1_W | ||
4877 | +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_W. | ||
4878 | + | ||
4879 | +3cw1_1_V | ||
4880 | +DSSR warning 3cw1.json: no nucleotides found. Ignoring 3cw1_1_V. | ||
4881 | + | ||
4882 | +7b0y_1_A | ||
4883 | +Could not find nucleotides of chain A in annotation 7b0y.json. Either there is a problem with 7b0y mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
4884 | + | ||
4582 | 6ogy_1_M | 4885 | 6ogy_1_M |
4583 | Sequence is too short. (< 5 resolved nts) | 4886 | Sequence is too short. (< 5 resolved nts) |
4584 | 4887 | ||
... | @@ -4588,12 +4891,12 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4588,12 +4891,12 @@ Sequence is too short. (< 5 resolved nts) |
4588 | 6uej_1_B | 4891 | 6uej_1_B |
4589 | Sequence is too short. (< 5 resolved nts) | 4892 | Sequence is too short. (< 5 resolved nts) |
4590 | 4893 | ||
4894 | +7kga_1_A | ||
4895 | +Nucleotides not inserted ! | ||
4896 | + | ||
4591 | 6ywy_1_BB | 4897 | 6ywy_1_BB |
4592 | Could not find nucleotides of chain BB in annotation 6ywy.json. Either there is a problem with 6ywy mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 4898 | Could not find nucleotides of chain BB in annotation 6ywy.json. Either there is a problem with 6ywy mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4593 | 4899 | ||
4594 | -1x18_1_A | ||
4595 | -DSSR warning 1x18.json: no nucleotides found. Ignoring 1x18_1_A. | ||
4596 | - | ||
4597 | 5ytx_1_B | 4900 | 5ytx_1_B |
4598 | Sequence is too short. (< 5 resolved nts) | 4901 | Sequence is too short. (< 5 resolved nts) |
4599 | 4902 | ||
... | @@ -4720,24 +5023,12 @@ Could not find nucleotides of chain AA in annotation 5mrf.json. Either there is | ... | @@ -4720,24 +5023,12 @@ Could not find nucleotides of chain AA in annotation 5mrf.json. Either there is |
4720 | 7jhy_1_Z | 5023 | 7jhy_1_Z |
4721 | Could not find nucleotides of chain Z in annotation 7jhy.json. Either there is a problem with 7jhy mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 5024 | Could not find nucleotides of chain Z in annotation 7jhy.json. Either there is a problem with 7jhy mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4722 | 5025 | ||
4723 | -2r1g_1_A | ||
4724 | -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_A. | ||
4725 | - | ||
4726 | -2r1g_1_D | ||
4727 | -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_D. | ||
4728 | - | ||
4729 | -2r1g_1_F | ||
4730 | -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_F. | ||
4731 | - | ||
4732 | 3eq4_1_Y | 5026 | 3eq4_1_Y |
4733 | DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_Y. | 5027 | DSSR warning 3eq4.json: no nucleotides found. Ignoring 3eq4_1_Y. |
4734 | 5028 | ||
4735 | 4wkr_1_C | 5029 | 4wkr_1_C |
4736 | Sequence is too short. (< 5 resolved nts) | 5030 | Sequence is too short. (< 5 resolved nts) |
4737 | 5031 | ||
4738 | -2r1g_1_X | ||
4739 | -DSSR warning 2r1g.json: no nucleotides found. Ignoring 2r1g_1_X. | ||
4740 | - | ||
4741 | 4v99_1_EC | 5032 | 4v99_1_EC |
4742 | Could not find nucleotides of chain EC in annotation 4v99.json. Either there is a problem with 4v99 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 5033 | Could not find nucleotides of chain EC in annotation 4v99.json. Either there is a problem with 4v99 mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4743 | 5034 | ||
... | @@ -4927,120 +5218,51 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -4927,120 +5218,51 @@ Sequence is too short. (< 5 resolved nts) |
4927 | 4ejt_1_G | 5218 | 4ejt_1_G |
4928 | Sequence is too short. (< 5 resolved nts) | 5219 | Sequence is too short. (< 5 resolved nts) |
4929 | 5220 | ||
4930 | -6lkq_1_W | 5221 | +1et4_1_A |
4931 | -Could not find nucleotides of chain W in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 5222 | +Nucleotides not inserted ! |
4932 | - | ||
4933 | -3qsu_1_P | ||
4934 | -Sequence is too short. (< 5 resolved nts) | ||
4935 | - | ||
4936 | -3qsu_1_R | ||
4937 | -Sequence is too short. (< 5 resolved nts) | ||
4938 | - | ||
4939 | -2xs7_1_B | ||
4940 | -Sequence is too short. (< 5 resolved nts) | ||
4941 | - | ||
4942 | -1n38_1_B | ||
4943 | -Sequence is too short. (< 5 resolved nts) | ||
4944 | - | ||
4945 | -4qvc_1_G | ||
4946 | -Sequence is too short. (< 5 resolved nts) | ||
4947 | - | ||
4948 | -6q1h_1_D | ||
4949 | -Sequence is too short. (< 5 resolved nts) | ||
4950 | - | ||
4951 | -6q1h_1_H | ||
4952 | -Sequence is too short. (< 5 resolved nts) | ||
4953 | - | ||
4954 | -6p7p_1_F | ||
4955 | -Sequence is too short. (< 5 resolved nts) | ||
4956 | - | ||
4957 | -6p7p_1_E | ||
4958 | -Sequence is too short. (< 5 resolved nts) | ||
4959 | - | ||
4960 | -6p7p_1_D | ||
4961 | -Sequence is too short. (< 5 resolved nts) | ||
4962 | - | ||
4963 | -6vm6_1_J | ||
4964 | -Sequence is too short. (< 5 resolved nts) | ||
4965 | 5223 | ||
4966 | -6vm6_1_G | 5224 | +1et4_1_C |
4967 | -Sequence is too short. (< 5 resolved nts) | 5225 | +Nucleotides not inserted ! |
4968 | 5226 | ||
4969 | -6wan_1_K | 5227 | +1et4_1_B |
4970 | -Sequence is too short. (< 5 resolved nts) | 5228 | +Nucleotides not inserted ! |
4971 | 5229 | ||
4972 | -6wan_1_H | 5230 | +1et4_1_D |
4973 | -Sequence is too short. (< 5 resolved nts) | 5231 | +Nucleotides not inserted ! |
4974 | 5232 | ||
4975 | -6wan_1_G | 5233 | +1et4_1_E |
4976 | -Sequence is too short. (< 5 resolved nts) | 5234 | +Nucleotides not inserted ! |
4977 | 5235 | ||
4978 | -6wan_1_L | 5236 | +1ddy_1_C |
4979 | -Sequence is too short. (< 5 resolved nts) | 5237 | +Nucleotides not inserted ! |
4980 | 5238 | ||
4981 | -6wan_1_I | 5239 | +1ddy_1_A |
4982 | -Sequence is too short. (< 5 resolved nts) | 5240 | +Nucleotides not inserted ! |
4983 | 5241 | ||
4984 | -6ywo_1_F | 5242 | +1ddy_1_E |
4985 | -Sequence is too short. (< 5 resolved nts) | 5243 | +Nucleotides not inserted ! |
4986 | - | ||
4987 | -6wan_1_J | ||
4988 | -Sequence is too short. (< 5 resolved nts) | ||
4989 | - | ||
4990 | -4oau_1_A | ||
4991 | -Sequence is too short. (< 5 resolved nts) | ||
4992 | 5244 | ||
4993 | -6ywo_1_E | 5245 | +6lkq_1_W |
4994 | -Sequence is too short. (< 5 resolved nts) | 5246 | +Could not find nucleotides of chain W in annotation 6lkq.json. Either there is a problem with 6lkq mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
4995 | - | ||
4996 | -6ywo_1_K | ||
4997 | -Sequence is too short. (< 5 resolved nts) | ||
4998 | - | ||
4999 | -6vm6_1_I | ||
5000 | -Sequence is too short. (< 5 resolved nts) | ||
5001 | - | ||
5002 | -6vm6_1_H | ||
5003 | -Sequence is too short. (< 5 resolved nts) | ||
5004 | - | ||
5005 | -6ywo_1_I | ||
5006 | -Sequence is too short. (< 5 resolved nts) | ||
5007 | - | ||
5008 | -2a1r_1_C | ||
5009 | -Sequence is too short. (< 5 resolved nts) | ||
5010 | - | ||
5011 | -6m6v_1_F | ||
5012 | -Sequence is too short. (< 5 resolved nts) | ||
5013 | - | ||
5014 | -6m6v_1_E | ||
5015 | -Sequence is too short. (< 5 resolved nts) | ||
5016 | - | ||
5017 | -2a1r_1_D | ||
5018 | -Sequence is too short. (< 5 resolved nts) | ||
5019 | - | ||
5020 | -3gpq_1_E | ||
5021 | -Sequence is too short. (< 5 resolved nts) | ||
5022 | 5247 | ||
5023 | -3gpq_1_F | 5248 | +6r47_1_A |
5024 | -Sequence is too short. (< 5 resolved nts) | 5249 | +Nucleotides not inserted ! |
5025 | 5250 | ||
5026 | -6o79_1_C | 5251 | +3qsu_1_P |
5027 | Sequence is too short. (< 5 resolved nts) | 5252 | Sequence is too short. (< 5 resolved nts) |
5028 | 5253 | ||
5029 | -6vm6_1_K | 5254 | +3qsu_1_R |
5030 | Sequence is too short. (< 5 resolved nts) | 5255 | Sequence is too short. (< 5 resolved nts) |
5031 | 5256 | ||
5032 | -6m6v_1_G | 5257 | +2xs7_1_B |
5033 | Sequence is too short. (< 5 resolved nts) | 5258 | Sequence is too short. (< 5 resolved nts) |
5034 | 5259 | ||
5035 | -6hyu_1_D | 5260 | +1n38_1_B |
5036 | Sequence is too short. (< 5 resolved nts) | 5261 | Sequence is too short. (< 5 resolved nts) |
5037 | 5262 | ||
5038 | -1laj_1_R | 5263 | +4qvc_1_G |
5039 | Sequence is too short. (< 5 resolved nts) | 5264 | Sequence is too short. (< 5 resolved nts) |
5040 | 5265 | ||
5041 | -6ybv_1_K | ||
5042 | -Could not find nucleotides of chain K in annotation 6ybv.json. Either there is a problem with 6ybv mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5043 | - | ||
5044 | 6mpf_1_W | 5266 | 6mpf_1_W |
5045 | Sequence is too short. (< 5 resolved nts) | 5267 | Sequence is too short. (< 5 resolved nts) |
5046 | 5268 | ||
... | @@ -5065,6 +5287,9 @@ Could not find nucleotides of chain V in annotation 6ftj.json. Either there is a | ... | @@ -5065,6 +5287,9 @@ Could not find nucleotides of chain V in annotation 6ftj.json. Either there is a |
5065 | 6ftg_1_V | 5287 | 6ftg_1_V |
5066 | Could not find nucleotides of chain V in annotation 6ftg.json. Either there is a problem with 6ftg mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | 5288 | Could not find nucleotides of chain V in annotation 6ftg.json. Either there is a problem with 6ftg mmCIF download, or the bases are not resolved in the structure. Delete it and retry. |
5067 | 5289 | ||
5290 | +3npn_1_A | ||
5291 | +Nucleotides not inserted ! | ||
5292 | + | ||
5068 | 4g0a_1_G | 5293 | 4g0a_1_G |
5069 | Sequence is too short. (< 5 resolved nts) | 5294 | Sequence is too short. (< 5 resolved nts) |
5070 | 5295 | ||
... | @@ -5080,15 +5305,6 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -5080,15 +5305,6 @@ Sequence is too short. (< 5 resolved nts) |
5080 | 5hkc_1_C | 5305 | 5hkc_1_C |
5081 | Sequence is too short. (< 5 resolved nts) | 5306 | Sequence is too short. (< 5 resolved nts) |
5082 | 5307 | ||
5083 | -4kzy_1_I | ||
5084 | -Could not find nucleotides of chain I in annotation 4kzy.json. Either there is a problem with 4kzy mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5085 | - | ||
5086 | -4kzz_1_I | ||
5087 | -Could not find nucleotides of chain I in annotation 4kzz.json. Either there is a problem with 4kzz mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5088 | - | ||
5089 | -4kzx_1_I | ||
5090 | -Could not find nucleotides of chain I in annotation 4kzx.json. Either there is a problem with 4kzx mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5091 | - | ||
5092 | 1rmv_1_B | 5308 | 1rmv_1_B |
5093 | Sequence is too short. (< 5 resolved nts) | 5309 | Sequence is too short. (< 5 resolved nts) |
5094 | 5310 | ||
... | @@ -5134,69 +5350,3 @@ Sequence is too short. (< 5 resolved nts) | ... | @@ -5134,69 +5350,3 @@ Sequence is too short. (< 5 resolved nts) |
5134 | 5hjz_1_C | 5350 | 5hjz_1_C |
5135 | Sequence is too short. (< 5 resolved nts) | 5351 | Sequence is too short. (< 5 resolved nts) |
5136 | 5352 | ||
5137 | -7nrc_1_SM | ||
5138 | -Could not find nucleotides of chain SM in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5139 | - | ||
5140 | -7nrc_1_SN | ||
5141 | -Could not find nucleotides of chain SN in annotation 7nrc.json. Either there is a problem with 7nrc mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5142 | - | ||
5143 | -7am2_1_R1 | ||
5144 | -Sequence is too short. (< 5 resolved nts) | ||
5145 | - | ||
5146 | -7k5l_1_R | ||
5147 | -Sequence is too short. (< 5 resolved nts) | ||
5148 | - | ||
5149 | -7b5k_1_X | ||
5150 | -Could not find nucleotides of chain X in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5151 | - | ||
5152 | -7d8c_1_C | ||
5153 | -Sequence is too short. (< 5 resolved nts) | ||
5154 | - | ||
5155 | -7m4y_1_V | ||
5156 | -Could not find nucleotides of chain V in annotation 7m4y.json. Either there is a problem with 7m4y mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5157 | - | ||
5158 | -7m4x_1_V | ||
5159 | -Could not find nucleotides of chain V in annotation 7m4x.json. Either there is a problem with 7m4x mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5160 | - | ||
5161 | -7b5k_1_Z | ||
5162 | -Could not find nucleotides of chain Z in annotation 7b5k.json. Either there is a problem with 7b5k mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5163 | - | ||
5164 | -7m4u_1_A | ||
5165 | -Could not find nucleotides of chain A in annotation 7m4u.json. Either there is a problem with 7m4u mmCIF download, or the bases are not resolved in the structure. Delete it and retry. | ||
5166 | - | ||
5167 | -7n06_1_G | ||
5168 | -Sequence is too short. (< 5 resolved nts) | ||
5169 | - | ||
5170 | -7n06_1_H | ||
5171 | -Sequence is too short. (< 5 resolved nts) | ||
5172 | - | ||
5173 | -7n06_1_I | ||
5174 | -Sequence is too short. (< 5 resolved nts) | ||
5175 | - | ||
5176 | -7n06_1_J | ||
5177 | -Sequence is too short. (< 5 resolved nts) | ||
5178 | - | ||
5179 | -7n06_1_K | ||
5180 | -Sequence is too short. (< 5 resolved nts) | ||
5181 | - | ||
5182 | -7n06_1_L | ||
5183 | -Sequence is too short. (< 5 resolved nts) | ||
5184 | - | ||
5185 | -7n33_1_G | ||
5186 | -Sequence is too short. (< 5 resolved nts) | ||
5187 | - | ||
5188 | -7n33_1_H | ||
5189 | -Sequence is too short. (< 5 resolved nts) | ||
5190 | - | ||
5191 | -7n33_1_I | ||
5192 | -Sequence is too short. (< 5 resolved nts) | ||
5193 | - | ||
5194 | -7n33_1_J | ||
5195 | -Sequence is too short. (< 5 resolved nts) | ||
5196 | - | ||
5197 | -7n33_1_K | ||
5198 | -Sequence is too short. (< 5 resolved nts) | ||
5199 | - | ||
5200 | -7n33_1_L | ||
5201 | -Sequence is too short. (< 5 resolved nts) | ||
5202 | - | ... | ... |
... | @@ -7,38 +7,27 @@ | ... | @@ -7,38 +7,27 @@ |
7 | # Run this file if you want the base counts, pair-type counts, identity percents, etc | 7 | # Run this file if you want the base counts, pair-type counts, identity percents, etc |
8 | # in the database. | 8 | # in the database. |
9 | 9 | ||
10 | -import getopt, os, pickle, sqlite3, shlex, subprocess, sys, warnings | 10 | +import getopt, glob, json, os, sqlite3, shlex, subprocess, sys, warnings |
11 | import numpy as np | 11 | import numpy as np |
12 | import pandas as pd | 12 | import pandas as pd |
13 | -import threading as th | ||
14 | import scipy.stats as st | 13 | import scipy.stats as st |
15 | import matplotlib | 14 | import matplotlib |
16 | import matplotlib.pyplot as plt | 15 | import matplotlib.pyplot as plt |
17 | import matplotlib.cm as cm | 16 | import matplotlib.cm as cm |
18 | import matplotlib.patches as mpatches | 17 | import matplotlib.patches as mpatches |
19 | import scipy.cluster.hierarchy as sch | 18 | import scipy.cluster.hierarchy as sch |
20 | -import sklearn | ||
21 | -import json | ||
22 | -import glob | ||
23 | -import pickle | ||
24 | -import Bio | ||
25 | from scipy.spatial.distance import squareform | 19 | from scipy.spatial.distance import squareform |
26 | from mpl_toolkits.mplot3d import axes3d | 20 | from mpl_toolkits.mplot3d import axes3d |
27 | from Bio import AlignIO, SeqIO | 21 | from Bio import AlignIO, SeqIO |
28 | from Bio.PDB.MMCIFParser import MMCIFParser | 22 | from Bio.PDB.MMCIFParser import MMCIFParser |
29 | -from Bio.PDB.vectors import Vector, calc_angle, calc_dihedral | ||
30 | from functools import partial | 23 | from functools import partial |
31 | -from multiprocessing import Pool, Manager | 24 | +from multiprocessing import Pool, Manager, Value |
32 | from os import path | 25 | from os import path |
33 | from tqdm import tqdm | 26 | from tqdm import tqdm |
34 | from collections import Counter | 27 | from collections import Counter |
35 | from setproctitle import setproctitle | 28 | from setproctitle import setproctitle |
36 | from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker, trace_unhandled_exceptions | 29 | from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker, trace_unhandled_exceptions |
37 | -from sklearn.mixture import GaussianMixture | 30 | +from geometric_stats import * |
38 | -import warnings | ||
39 | -from pandas.core.common import SettingWithCopyWarning | ||
40 | -from joblib import Parallel, delayed | ||
41 | - | ||
42 | 31 | ||
43 | np.set_printoptions(threshold=sys.maxsize, linewidth=np.inf, precision=8) | 32 | np.set_printoptions(threshold=sys.maxsize, linewidth=np.inf, precision=8) |
44 | path_to_3D_data = "tobedefinedbyoptions" | 33 | path_to_3D_data = "tobedefinedbyoptions" |
... | @@ -928,6 +917,7 @@ def general_stats(): | ... | @@ -928,6 +917,7 @@ def general_stats(): |
928 | fig.savefig(runDir + "/results/figures/Nfamilies.png") | 917 | fig.savefig(runDir + "/results/figures/Nfamilies.png") |
929 | plt.close() | 918 | plt.close() |
930 | 919 | ||
920 | +@trace_unhandled_exceptions | ||
931 | def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s): | 921 | def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s): |
932 | 922 | ||
933 | # Identify the right 3D file | 923 | # Identify the right 3D file |
... | @@ -1135,11 +1125,6 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): | ... | @@ -1135,11 +1125,6 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): |
1135 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | 1125 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") |
1136 | return 0 | 1126 | return 0 |
1137 | 1127 | ||
1138 | -def log_to_pbar(pbar): | ||
1139 | - def update(r): | ||
1140 | - pbar.update(1) | ||
1141 | - return update | ||
1142 | - | ||
1143 | def family_order(f): | 1128 | def family_order(f): |
1144 | # sort the RNA families so that the plots are readable | 1129 | # sort the RNA families so that the plots are readable |
1145 | 1130 | ||
... | @@ -1154,70 +1139,6 @@ def family_order(f): | ... | @@ -1154,70 +1139,6 @@ def family_order(f): |
1154 | else: | 1139 | else: |
1155 | return 2 | 1140 | return 2 |
1156 | 1141 | ||
1157 | -def conversion_angles(bdd): | ||
1158 | - """ | ||
1159 | - Convert database torsion angles to degrees | ||
1160 | - and put them in a list to reuse for statistics | ||
1161 | - """ | ||
1162 | - BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
1163 | - db_path = os.path.join(BASE_DIR, bdd) | ||
1164 | - baseDeDonnees = sqlite3.connect(db_path) | ||
1165 | - curseur = baseDeDonnees.cursor() | ||
1166 | - curseur.execute("SELECT chain_id, nt_name, alpha, beta, gamma, delta, epsilon, zeta, chi FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U' ;") | ||
1167 | - liste=[] | ||
1168 | - for nt in curseur.fetchall(): # retrieve the angle measurements and put them in a list | ||
1169 | - liste.append(nt) | ||
1170 | - angles_torsion=[] | ||
1171 | - for nt in liste : | ||
1172 | - angles_deg=[] | ||
1173 | - angles_deg.append(nt[0]) #chain_id | ||
1174 | - angles_deg.append(nt[1]) #nt_name | ||
1175 | - for i in range (2,9): # on all angles | ||
1176 | - angle=0 | ||
1177 | - if nt[i] == None : | ||
1178 | - angle=None | ||
1179 | - elif nt[i]<=np.pi: #if angle value <pi, positive | ||
1180 | - angle=(180/np.pi)*nt[i] | ||
1181 | - elif np.pi < nt[i] <= 2*np.pi : #if value of the angle between pi and 2pi, negative | ||
1182 | - angle=((180/np.pi)*nt[i])-360 | ||
1183 | - else : | ||
1184 | - angle=nt[i] # in case some angles still in degrees | ||
1185 | - angles_deg.append(angle) | ||
1186 | - angles_torsion.append(angles_deg) | ||
1187 | - return angles_torsion | ||
1188 | - | ||
1189 | -def conversion_eta_theta(bdd): | ||
1190 | - """ | ||
1191 | - Convert database pseudotorsion angles to degrees | ||
1192 | - and put them in a list to reuse for statistics | ||
1193 | - """ | ||
1194 | - BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
1195 | - db_path = os.path.join(BASE_DIR, bdd) | ||
1196 | - baseDeDonnees = sqlite3.connect(db_path) | ||
1197 | - curseur = baseDeDonnees.cursor() | ||
1198 | - curseur.execute("SELECT chain_id, nt_name, eta, theta, eta_prime, theta_prime, eta_base, theta_base FROM nucleotide WHERE nt_name='A' OR nt_name='C' OR nt_name='G' OR nt_name='U';") | ||
1199 | - liste=[] | ||
1200 | - for nt in curseur.fetchall(): | ||
1201 | - liste.append(nt) | ||
1202 | - angles_virtuels=[] | ||
1203 | - for nt in liste : | ||
1204 | - angles_deg=[] | ||
1205 | - angles_deg.append(nt[0]) #chain_id | ||
1206 | - angles_deg.append(nt[1]) #nt_name | ||
1207 | - for i in range (2,8): | ||
1208 | - angle=0 | ||
1209 | - if nt[i] == None : | ||
1210 | - angle=None | ||
1211 | - elif nt[i]<=np.pi: | ||
1212 | - angle=(180/np.pi)*nt[i] | ||
1213 | - elif np.pi < nt[i] <= 2*np.pi : | ||
1214 | - angle=((180/np.pi)*nt[i])-360 | ||
1215 | - else : | ||
1216 | - angle=nt[i] | ||
1217 | - angles_deg.append(angle) | ||
1218 | - angles_virtuels.append(angles_deg) | ||
1219 | - return angles_virtuels | ||
1220 | - | ||
1221 | def nt_3d_centers(cif_file, consider_all_atoms): | 1142 | def nt_3d_centers(cif_file, consider_all_atoms): |
1222 | """Return the nucleotides' coordinates, summarizing a nucleotide by only one point. | 1143 | """Return the nucleotides' coordinates, summarizing a nucleotide by only one point. |
1223 | If consider_all_atoms : barycentre is used | 1144 | If consider_all_atoms : barycentre is used |
... | @@ -1252,1674 +1173,30 @@ def nt_3d_centers(cif_file, consider_all_atoms): | ... | @@ -1252,1674 +1173,30 @@ def nt_3d_centers(cif_file, consider_all_atoms): |
1252 | result.append(res) | 1173 | result.append(res) |
1253 | return(result) | 1174 | return(result) |
1254 | 1175 | ||
1255 | -def liste_repres(fpath): | 1176 | +def representatives_from_nrlist(res): |
1256 | - repres=[] | 1177 | + nr_code = min([i for i in [1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 20.0] if i >= res]) |
1257 | - df=pd.read_csv(os.path.abspath(fpath)) | 1178 | + fpath = f"/home/data/RNA/3D/latest_nr_list_{nr_code}A.csv" |
1179 | + repres = [] | ||
1180 | + df = pd.read_csv(os.path.abspath(fpath)) | ||
1258 | for i in range(df.shape[0]): | 1181 | for i in range(df.shape[0]): |
1259 | - up_name=df["representative"][i] | 1182 | + up_name = df["representative"][i] |
1260 | if '+' in up_name: | 1183 | if '+' in up_name: |
1261 | - up_name=up_name.split('+') | 1184 | + up_name = up_name.split('+') |
1262 | for i in range(len(up_name)): | 1185 | for i in range(len(up_name)): |
1263 | - chain=up_name[i].split('|') | 1186 | + chain = up_name[i].split('|') |
1264 | - chain=chain[0].lower()+'_'+chain[1]+'_'+chain[2] | 1187 | + chain = chain[0].lower() + '_' + chain[1] + '_' + chain[2] |
1265 | - repres.append(chain+'.cif') | 1188 | + repres.append(chain + '.cif') |
1266 | else : | 1189 | else : |
1267 | - up_name=up_name.split('|') | 1190 | + up_name = up_name.split('|') |
1268 | - low_name=up_name[0].lower()+'_'+up_name[1]+'_'+up_name[2] | 1191 | + low_name = up_name[0].lower() + '_' + up_name[1] + '_' + up_name[2] |
1269 | - repres.append(low_name+'.cif') | 1192 | + repres.append(low_name + '.cif') |
1270 | 1193 | ||
1271 | return repres | 1194 | return repres |
1272 | 1195 | ||
1273 | - | 1196 | +def log_to_pbar(pbar): |
1274 | -def get_euclidian_distance(L1, L2): | 1197 | + def update(r): |
1275 | - """ | ||
1276 | - Returns the distance between two points (coordinates in lists) | ||
1277 | - """ | ||
1278 | - | ||
1279 | - if len(L1)*len(L2) == 0: | ||
1280 | - return np.nan | ||
1281 | - | ||
1282 | - if len(L1) == 1: | ||
1283 | - L1 = L1[0] | ||
1284 | - if len(L2) == 1: | ||
1285 | - L2 = L2[0] | ||
1286 | - | ||
1287 | - e = 0 | ||
1288 | - for i in range(len(L1)): | ||
1289 | - try: | ||
1290 | - e += float(L1[i] - L2[i])**2 | ||
1291 | - except TypeError: | ||
1292 | - print("Terms: ", L1, L2) | ||
1293 | - except IndexError: | ||
1294 | - print("Terms: ", L1, L2) | ||
1295 | - | ||
1296 | - return np.sqrt(e) | ||
1297 | - | ||
1298 | -def get_flat_angle(L1, L2, L3): | ||
1299 | - if len(L1)*len(L2)*len(L3) == 0: | ||
1300 | - return np.nan | ||
1301 | - | ||
1302 | - return calc_angle(Vector(L1[0]), Vector(L2[0]), Vector(L3[0]))*(180/np.pi) | ||
1303 | - | ||
1304 | -def get_torsion_angle(L1, L2, L3, L4): | ||
1305 | - if len(L1)*len(L2)*len(L3)*len(L4) == 0: | ||
1306 | - return np.nan | ||
1307 | - | ||
1308 | - return calc_dihedral(Vector(L1[0]), Vector(L2[0]), Vector(L3[0]), Vector(L4[0]))*(180/np.pi) | ||
1309 | - | ||
1310 | -def pos_b1(res): | ||
1311 | - """ | ||
1312 | - Returns the coordinates of virtual atom B1 (center of the first aromatic cycle) | ||
1313 | - """ | ||
1314 | - coordb1=[] | ||
1315 | - somme_x_b1=0 | ||
1316 | - somme_y_b1=0 | ||
1317 | - somme_z_b1=0 | ||
1318 | - moy_x_b1=0 | ||
1319 | - moy_y_b1=0 | ||
1320 | - moy_z_b1=0 | ||
1321 | - #different cases | ||
1322 | - #some residues have 2 aromatic cycles | ||
1323 | - if res.get_resname() in ['A', 'G', '2MG', '7MG', 'MA6', '6IA', 'OMG' , '2MA', 'B9B', 'A2M', '1MA', 'E7G', 'P7G', 'B8W', 'B8K', 'BGH', '6MZ', 'E6G', 'MHG', 'M7A', 'M2G', 'P5P', 'G7M', '1MG', 'T6A', 'MIA', 'YG', 'YYG', 'I', 'DG', 'N79', '574', 'DJF', 'AET', '12A', 'ANZ', 'UY4'] : | ||
1324 | - c=0 | ||
1325 | - names=[] | ||
1326 | - for atom in res : | ||
1327 | - if (atom.get_fullname() in ['N9', 'C8', 'N7', 'C4', 'C5']) : | ||
1328 | - c=c+1 | ||
1329 | - names.append(atom.get_name()) | ||
1330 | - coord=atom.get_vector() | ||
1331 | - somme_x_b1=somme_x_b1+coord[0] | ||
1332 | - somme_y_b1=somme_y_b1+coord[1] | ||
1333 | - somme_z_b1=somme_z_b1+coord[2] | ||
1334 | - else : | ||
1335 | - c=c | ||
1336 | - #calcul coord B1 | ||
1337 | - if c != 0 : | ||
1338 | - moy_x_b1=somme_x_b1/c | ||
1339 | - moy_y_b1=somme_y_b1/c | ||
1340 | - moy_z_b1=somme_z_b1/c | ||
1341 | - coordb1.append(moy_x_b1) | ||
1342 | - coordb1.append(moy_y_b1) | ||
1343 | - coordb1.append(moy_z_b1) | ||
1344 | - #others have only one cycle | ||
1345 | - if res.get_resname() in ['C', 'U', 'AG9', '70U', '1RN', 'RSP', '3AU', 'CM0', 'U8U', 'IU', 'E3C', '4SU', '5HM', 'LV2', 'LHH', '4AC', 'CH', 'Y5P', '2MU', '4OC', 'B8T', 'JMH', 'JMC', 'DC', 'B9H', 'UR3', 'I4U', 'B8Q', 'P4U', 'OMU', 'OMC', '5MU', 'H2U', 'CBV', 'M1Y', 'B8N', '3TD', 'B8H'] : | ||
1346 | - c=0 | ||
1347 | - for atom in res : | ||
1348 | - if (atom.get_fullname() in ['C6', 'N3', 'N1', 'C2', 'C4', 'C5']): | ||
1349 | - c=c+1 | ||
1350 | - coord=atom.get_vector() | ||
1351 | - somme_x_b1=somme_x_b1+coord[0] | ||
1352 | - somme_y_b1=somme_y_b1+coord[1] | ||
1353 | - somme_z_b1=somme_z_b1+coord[2] | ||
1354 | - #calcul coord B1 | ||
1355 | - if c != 0 : | ||
1356 | - moy_x_b1=somme_x_b1/c | ||
1357 | - moy_y_b1=somme_y_b1/c | ||
1358 | - moy_z_b1=somme_z_b1/c | ||
1359 | - coordb1.append(moy_x_b1) | ||
1360 | - coordb1.append(moy_y_b1) | ||
1361 | - coordb1.append(moy_z_b1) | ||
1362 | - | ||
1363 | - if len(coordb1): | ||
1364 | - return [coordb1] | ||
1365 | - else: | ||
1366 | - return [] | ||
1367 | - | ||
1368 | -def pos_b2(res): | ||
1369 | - """ | ||
1370 | - Returns the coordinates of virtual atom B2 (center of the second aromatic cycle, if exists) | ||
1371 | - """ | ||
1372 | - coordb2=[] | ||
1373 | - somme_x_b2=0 | ||
1374 | - somme_y_b2=0 | ||
1375 | - somme_z_b2=0 | ||
1376 | - moy_x_b2=0 | ||
1377 | - moy_y_b2=0 | ||
1378 | - moy_z_b2=0 | ||
1379 | - | ||
1380 | - if res.get_resname() in ['A', 'G', '2MG', '7MG', 'MA6', '6IA', 'OMG' , '2MA', 'B9B', 'A2M', '1MA', 'E7G', 'P7G', 'B8W', 'B8K', 'BGH', '6MZ', 'E6G', 'MHG', 'M7A', 'M2G', 'P5P', 'G7M', '1MG', 'T6A', 'MIA', 'YG', 'YYG', 'I', 'DG', 'N79', '574', 'DJF', 'AET', '12A', 'ANZ', 'UY4'] : #2 cycles aromatiques | ||
1381 | - c=0 | ||
1382 | - for atom in res : | ||
1383 | - if atom.get_fullname() in ['C6', 'N3', 'N1', 'C2', 'C4', 'C5'] : | ||
1384 | - c=c+1 | ||
1385 | - coord=atom.get_vector() | ||
1386 | - somme_x_b2=somme_x_b2+coord[0] | ||
1387 | - somme_y_b2=somme_y_b2+coord[1] | ||
1388 | - somme_z_b2=somme_z_b2+coord[2] | ||
1389 | - #calcul coord B2 | ||
1390 | - if c!=0 : | ||
1391 | - moy_x_b2=somme_x_b2/c | ||
1392 | - moy_y_b2=somme_y_b2/c | ||
1393 | - moy_z_b2=somme_z_b2/c | ||
1394 | - coordb2.append(moy_x_b2) | ||
1395 | - coordb2.append(moy_y_b2) | ||
1396 | - coordb2.append(moy_z_b2) | ||
1397 | - if len(coordb2): | ||
1398 | - return [coordb2] | ||
1399 | - else: | ||
1400 | - return [] | ||
1401 | - | ||
1402 | -@trace_unhandled_exceptions | ||
1403 | -def basepair_measures(res, pair): | ||
1404 | - """ | ||
1405 | - measurement of the flat angles describing a basepair in the HiRE-RNA model | ||
1406 | - """ | ||
1407 | - if res.get_resname()=='C' or res.get_resname()=='U' : | ||
1408 | - atom_c4_res = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | ||
1409 | - atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
1410 | - atom_b1_res = pos_b1(res) | ||
1411 | - if not len(atom_c4_res) or not len(atom_c1p_res) or not len(atom_b1_res): | ||
1412 | - return | ||
1413 | - a3_res = Vector(atom_c4_res[0]) | ||
1414 | - a2_res = Vector(atom_c1p_res[0]) | ||
1415 | - a1_res = Vector(atom_b1_res[0]) | ||
1416 | - if res.get_resname()=='A' or res.get_resname()=='G' : | ||
1417 | - atom_c1p_res = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
1418 | - atom_b1_res = pos_b1(res) | ||
1419 | - atom_b2_res = pos_b2(res) | ||
1420 | - if not len(atom_c1p_res) or not len(atom_b1_res) or not len(atom_b2_res): | ||
1421 | - return | ||
1422 | - a3_res = Vector(atom_c1p_res[0]) | ||
1423 | - a2_res = Vector(atom_b1_res[0]) | ||
1424 | - a1_res = Vector(atom_b2_res[0]) | ||
1425 | - | ||
1426 | - if pair.get_resname()=='C' or pair.get_resname()=='U' : | ||
1427 | - atom_c4_pair = [ atom.get_coord() for atom in pair if "C4'" in atom.get_fullname() ] | ||
1428 | - atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] | ||
1429 | - atom_b1_pair = pos_b1(pair) | ||
1430 | - if not len(atom_c4_pair) or not len(atom_c1p_pair) or not len(atom_b1_pair): | ||
1431 | - return | ||
1432 | - a3_pair = Vector(atom_c4_pair[0]) | ||
1433 | - a2_pair = Vector(atom_c1p_pair[0]) | ||
1434 | - a1_pair = Vector(atom_b1_pair[0]) | ||
1435 | - if pair.get_resname()=='A' or pair.get_resname()=='G' : | ||
1436 | - atom_c1p_pair = [ atom.get_coord() for atom in pair if "C1'" in atom.get_fullname() ] | ||
1437 | - atom_b1_pair = pos_b1(pair) | ||
1438 | - atom_b2_pair = pos_b2(pair) | ||
1439 | - if not len(atom_c1p_pair) or not len(atom_b1_pair) or not len(atom_b2_pair): # No C1' atom in the paired nucleotide, skip measures. | ||
1440 | - return | ||
1441 | - a3_pair = Vector(atom_c1p_pair[0]) | ||
1442 | - a2_pair = Vector(atom_b1_pair[0]) | ||
1443 | - a1_pair = Vector(atom_b2_pair[0]) | ||
1444 | - | ||
1445 | - # Bond vectors | ||
1446 | - res_32 = a3_res - a2_res | ||
1447 | - res_12 = a1_res - a2_res | ||
1448 | - pair_32 = a3_pair - a2_pair | ||
1449 | - pair_12 = a1_pair - a2_pair | ||
1450 | - rho = a1_res - a1_pair # from pair to res | ||
1451 | - | ||
1452 | - # dist | ||
1453 | - dist = rho.norm() | ||
1454 | - | ||
1455 | - # we calculate the 2 plane angles | ||
1456 | - with warnings.catch_warnings(): | ||
1457 | - warnings.simplefilter('ignore', RuntimeWarning) | ||
1458 | - b = res_12.angle(rho)*(180/np.pi) # equal to the previous implementation | ||
1459 | - c = pair_12.angle(-rho)*(180/np.pi) # | ||
1460 | - # a = calc_angle(a1_res, a2_res, a3_res)*(180/np.pi) # not required | ||
1461 | - # b = calc_angle(a2_res, a1_res, a1_pair)*(180/np.pi) | ||
1462 | - # c = calc_angle(a1_res, a1_pair, a2_pair)*(180/np.pi) | ||
1463 | - # d = calc_angle(a3_pair, a2_pair, a1_pair)*(180/np.pi) # not required | ||
1464 | - | ||
1465 | - # Compute plane vectors | ||
1466 | - n1 = (res_32**res_12).normalized() # ** between vectors, is the cross product | ||
1467 | - n2 = (pair_32**pair_12).normalized() | ||
1468 | - | ||
1469 | - # Distances between base tip and the other base's plane (orthogonal projection) | ||
1470 | - # if angle(rho, n) > pi/2 the distance is negative (signed following n) | ||
1471 | - d1 = rho*n1 # projection of rho on axis n1 | ||
1472 | - d2 = rho*n2 | ||
1473 | - | ||
1474 | - # Now the projection of rho in the planes. It's just a sum of the triangles' two other edges. | ||
1475 | - p1 = (-rho+n1**d1).normalized() # between vector and scalar, ** is the multiplication by a scalar | ||
1476 | - p2 = (rho-n2**d2).normalized() | ||
1477 | - | ||
1478 | - # Measure tau, the dihedral | ||
1479 | - u = (res_12**rho).normalized() | ||
1480 | - v = (rho**pair_12).normalized() | ||
1481 | - cosTau1 = n1*u | ||
1482 | - cosTau2 = v*n2 | ||
1483 | - | ||
1484 | - # cosTau is enough to compute alpha, but we can't distinguish | ||
1485 | - # yet betwwen tau and -tau. If the full computation if required, then: | ||
1486 | - tau1 = np.arccos(cosTau1)*(180/np.pi) | ||
1487 | - tau2 = np.arccos(cosTau2)*(180/np.pi) | ||
1488 | - w1 = u**n1 | ||
1489 | - w2 = v**n2 | ||
1490 | - if res_12*w1 < 0: | ||
1491 | - tau1 = -tau1 | ||
1492 | - if pair_12*w2 < 0: | ||
1493 | - tau2 = -tau2 | ||
1494 | - | ||
1495 | - # And finally, the a1 and a2 angles between res_12 and p1 / pair_12 and p2 | ||
1496 | - with warnings.catch_warnings(): | ||
1497 | - warnings.simplefilter('ignore', RuntimeWarning) | ||
1498 | - a1 = (-res_12).angle(p1)*(180/np.pi) | ||
1499 | - a2 = (-pair_12).angle(p2)*(180/np.pi) | ||
1500 | - if cosTau1 > 0: | ||
1501 | - # CosTau > 0 (Tau < 90 or Tau > 270) implies that alpha > 180. | ||
1502 | - a1 = -a1 | ||
1503 | - if cosTau2 > 0: | ||
1504 | - a2 = -a2 | ||
1505 | - | ||
1506 | - return [dist, b, c, d1, d2, a1, a2, tau1, tau2] | ||
1507 | - | ||
1508 | -@trace_unhandled_exceptions | ||
1509 | -def measure_from_structure(f): | ||
1510 | - """ | ||
1511 | - Do geometric measures required on a given filename | ||
1512 | - """ | ||
1513 | - | ||
1514 | - name = f.split('.')[0] | ||
1515 | - | ||
1516 | - global idxQueue | ||
1517 | - thr_idx = idxQueue.get() | ||
1518 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measure_from_structure({f})") | ||
1519 | - | ||
1520 | - # Open the structure | ||
1521 | - with warnings.catch_warnings(): | ||
1522 | - # Ignore the PDB problems. This mostly warns that some chain is discontinuous. | ||
1523 | - warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.PDBConstructionWarning) | ||
1524 | - warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.BiopythonWarning) | ||
1525 | - parser=MMCIFParser() | ||
1526 | - s = parser.get_structure(f, os.path.abspath(path_to_3D_data+ "rna_only/" + f)) | ||
1527 | - | ||
1528 | - #pyle_measures(name, s, thr_idx) | ||
1529 | - #measures_aa(name, s, thr_idx) | ||
1530 | - if DO_HIRE_RNA_MEASURES: | ||
1531 | - measures_hrna(name, s, thr_idx) | ||
1532 | - measures_hrna_basepairs(name, s, thr_idx) | ||
1533 | - if DO_WADLEY_ANALYSIS: | ||
1534 | - #measures_wadley(name, s, thr_idx) | ||
1535 | - pyle_measures(name, s, thr_idx) | ||
1536 | - | ||
1537 | - idxQueue.put(thr_idx) # replace the thread index in the queue | ||
1538 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | ||
1539 | - | ||
1540 | -@trace_unhandled_exceptions | ||
1541 | -def measures_wadley(name, s, thr_idx): | ||
1542 | - """ | ||
1543 | - Measures the distances and plane angles involving C1' and P atoms | ||
1544 | - Saves the results in a dataframe | ||
1545 | - """ | ||
1546 | - | ||
1547 | - # do not recompute something already computed | ||
1548 | - if (path.isfile(runDir + '/results/geometry/Pyle/angles/flat_angles_pyle_' + name + '.csv') and | ||
1549 | - path.isfile(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv")): | ||
1550 | - return | ||
1551 | - | ||
1552 | - liste_dist = [] | ||
1553 | - liste_angl = [] | ||
1554 | - last_p = [] | ||
1555 | - last_c1p = [] | ||
1556 | - last_c4p = [] | ||
1557 | - | ||
1558 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_wadley({name})") | ||
1559 | - | ||
1560 | - chain = next(s[0].get_chains()) | ||
1561 | - for res in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_wadley", unit="res", leave=False): | ||
1562 | - p_c1p_psuiv = np.nan | ||
1563 | - c1p_psuiv_c1psuiv = np.nan | ||
1564 | - if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] : | ||
1565 | - atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] | ||
1566 | - atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
1567 | - atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | ||
1568 | - if len(atom_c1p) > 1: | ||
1569 | - for atom in res: | ||
1570 | - if "C1'" in atom.get_fullname(): | ||
1571 | - print("\n", atom.get_fullname(), "-", res.get_resname(), "\n") | ||
1572 | - | ||
1573 | - p_c1p_psuiv = get_flat_angle(last_p, last_c1p, atom_p) | ||
1574 | - c1p_psuiv_c1psuiv = get_flat_angle(last_c1p, atom_p, atom_c1p) | ||
1575 | - c1p_psuiv = get_euclidian_distance(last_c1p, atom_p) | ||
1576 | - p_c1p = get_euclidian_distance(atom_p, atom_c1p) | ||
1577 | - c4p_psuiv = get_euclidian_distance(last_c4p, atom_p) | ||
1578 | - p_c4p = get_euclidian_distance(atom_p, atom_c4p) | ||
1579 | - | ||
1580 | - last_p = atom_p | ||
1581 | - last_c1p = atom_c1p | ||
1582 | - last_c4p = atom_c4p | ||
1583 | - | ||
1584 | - liste_dist.append([res.get_resname(), c1p_psuiv, p_c1p, c4p_psuiv, p_c4p]) | ||
1585 | - liste_angl.append([res.get_resname(), p_c1p_psuiv, c1p_psuiv_c1psuiv]) | ||
1586 | - | ||
1587 | - df = pd.DataFrame(liste_dist, columns=["Residu", "C1'-P", "P-C1'", "C4'-P", "P-C4'"]) | ||
1588 | - df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_wadley_" + name + ".csv") | ||
1589 | - df = pd.DataFrame(liste_angl, columns=["Residu", "P-C1'-P°", "C1'-P°-C1'°"]) | ||
1590 | - df.to_csv(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle_"+name+".csv") | ||
1591 | - | ||
1592 | -@trace_unhandled_exceptions | ||
1593 | -def measures_aa(name, s, thr_idx): | ||
1594 | - """ | ||
1595 | - Measures the distance between atoms linked by covalent bonds | ||
1596 | - """ | ||
1597 | - | ||
1598 | - # do not recompute something already computed | ||
1599 | - if path.isfile(runDir+"/results/geometry/all-atoms/distances/dist_atoms_"+name+".csv"): | ||
1600 | - return | ||
1601 | - | ||
1602 | - last_o3p = [] # o3 'of the previous nucleotide linked to the P of the current nucleotide | ||
1603 | - liste_common = [] | ||
1604 | - liste_purines = [] | ||
1605 | - liste_pyrimidines = [] | ||
1606 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measure_aa_dists({name})") | ||
1607 | - | ||
1608 | - chain = next(s[0].get_chains()) # 1 chain per file | ||
1609 | - residues = list(chain.get_residues()) | ||
1610 | - pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measure_aa_dists", unit="res", leave=False) | ||
1611 | - pbar.update(0) | ||
1612 | - for res in chain : | ||
1613 | - | ||
1614 | - # for residues A, G, C, U | ||
1615 | - op3_p=[] | ||
1616 | - p_op1=[] | ||
1617 | - p_op2=[] | ||
1618 | - p_o5p=[] | ||
1619 | - o5p_c5p=[] | ||
1620 | - c5p_c4p=[] | ||
1621 | - c4p_o4p=[] | ||
1622 | - o4p_c1p=[] | ||
1623 | - c1p_c2p=[] | ||
1624 | - c2p_o2p=[] | ||
1625 | - c2p_c3p=[] | ||
1626 | - c3p_o3p=[] | ||
1627 | - c4p_c3p=[] | ||
1628 | - | ||
1629 | - #if res = A or G | ||
1630 | - c1p_n9=None | ||
1631 | - n9_c8=None | ||
1632 | - c8_n7=None | ||
1633 | - n7_c5=None | ||
1634 | - c5_c6=None | ||
1635 | - c6_n1=None | ||
1636 | - n1_c2=None | ||
1637 | - c2_n3=None | ||
1638 | - n3_c4=None | ||
1639 | - c4_n9=None | ||
1640 | - c4_c5=None | ||
1641 | - #if res=G | ||
1642 | - c6_o6=None | ||
1643 | - c2_n2=None | ||
1644 | - #if res = A | ||
1645 | - c6_n6=None | ||
1646 | - | ||
1647 | - #if res = C or U | ||
1648 | - c1p_n1=None | ||
1649 | - n1_c6=None | ||
1650 | - c6_c5=None | ||
1651 | - c5_c4=None | ||
1652 | - c4_n3=None | ||
1653 | - n3_c2=None | ||
1654 | - c2_n1=None | ||
1655 | - c2_o2=None | ||
1656 | - #if res =C | ||
1657 | - c4_n4=None | ||
1658 | - #if res=U | ||
1659 | - c4_o4=None | ||
1660 | - last_o3p_p=None | ||
1661 | - | ||
1662 | - | ||
1663 | - if res.get_resname()=='A' or res.get_resname()=='G' or res.get_resname()=='C' or res.get_resname()=='U' : | ||
1664 | - #get the coordinates of the atoms | ||
1665 | - atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] | ||
1666 | - atom_op3 = [ atom.get_coord() for atom in res if "OP3" in atom.get_fullname() ] | ||
1667 | - atom_op1 = [ atom.get_coord() for atom in res if "OP1" in atom.get_fullname() ] | ||
1668 | - atom_op2 = [ atom.get_coord() for atom in res if "OP2" in atom.get_fullname() ] | ||
1669 | - atom_o5p= [ atom.get_coord() for atom in res if "O5'" in atom.get_fullname() ] | ||
1670 | - atom_c5p = [ atom.get_coord() for atom in res if "C5'" in atom.get_fullname() ] | ||
1671 | - atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | ||
1672 | - atom_o4p = [ atom.get_coord() for atom in res if "O4'" in atom.get_fullname() ] | ||
1673 | - atom_c3p = [ atom.get_coord() for atom in res if "C3'" in atom.get_fullname() ] | ||
1674 | - atom_o3p = [ atom.get_coord() for atom in res if "O3'" in atom.get_fullname() ] | ||
1675 | - atom_c2p = [ atom.get_coord() for atom in res if "C2'" in atom.get_fullname() ] | ||
1676 | - atom_o2p = [ atom.get_coord() for atom in res if "O2'" in atom.get_fullname() ] | ||
1677 | - atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
1678 | - atom_n9 = [ atom.get_coord() for atom in res if "N9" in atom.get_fullname() ] | ||
1679 | - atom_c8 = [ atom.get_coord() for atom in res if "C8" in atom.get_fullname() ] | ||
1680 | - atom_n7 = [ atom.get_coord() for atom in res if "N7" in atom.get_fullname() ] | ||
1681 | - atom_c5 = [ atom.get_coord() for atom in res if atom.get_name() == "C5"] | ||
1682 | - atom_c6 = [ atom.get_coord() for atom in res if "C6" in atom.get_fullname() ] | ||
1683 | - atom_o6 = [ atom.get_coord() for atom in res if "O6" in atom.get_fullname() ] | ||
1684 | - atom_n6 = [ atom.get_coord() for atom in res if "N6" in atom.get_fullname() ] | ||
1685 | - atom_n1 = [ atom.get_coord() for atom in res if "N1" in atom.get_fullname() ] | ||
1686 | - atom_c2 = [ atom.get_coord() for atom in res if atom.get_name() == "C2"] | ||
1687 | - atom_n2 = [ atom.get_coord() for atom in res if "N2" in atom.get_fullname() ] | ||
1688 | - atom_o2 = [ atom.get_coord() for atom in res if atom.get_name() == "O2"] | ||
1689 | - atom_n3 = [ atom.get_coord() for atom in res if "N3" in atom.get_fullname() ] | ||
1690 | - atom_c4 = [ atom.get_coord() for atom in res if atom.get_name() == "C4" ] | ||
1691 | - atom_n4 = [ atom.get_coord() for atom in res if "N4" in atom.get_fullname() ] | ||
1692 | - atom_o4 = [ atom.get_coord() for atom in res if atom.get_name() == "O4"] | ||
1693 | - | ||
1694 | - op3_p = get_euclidian_distance(atom_op3, atom_p) | ||
1695 | - last_o3p_p = get_euclidian_distance(last_o3p, atom_p) # link with the previous nucleotide | ||
1696 | - p_op1 = get_euclidian_distance(atom_op1, atom_p) | ||
1697 | - p_op2 = get_euclidian_distance(atom_op2, atom_p) | ||
1698 | - p_o5p = get_euclidian_distance(atom_o5p, atom_p) | ||
1699 | - o5p_c5p = get_euclidian_distance(atom_o5p, atom_c5p) | ||
1700 | - c5p_c4p = get_euclidian_distance(atom_c5p, atom_c4p) | ||
1701 | - c4p_o4p = get_euclidian_distance(atom_c4p, atom_o4p) | ||
1702 | - c4p_c3p = get_euclidian_distance(atom_c4p, atom_c3p) | ||
1703 | - o4p_c1p = get_euclidian_distance(atom_o4p, atom_c1p) | ||
1704 | - c1p_c2p = get_euclidian_distance(atom_c1p, atom_c2p) | ||
1705 | - c2p_o2p = get_euclidian_distance(atom_c2p, atom_o2p) | ||
1706 | - c2p_c3p = get_euclidian_distance(atom_c2p, atom_c3p) | ||
1707 | - c3p_o3p = get_euclidian_distance(atom_c3p, atom_o3p) | ||
1708 | - | ||
1709 | - last_o3p=atom_o3p # o3' of this residue becomes the previous o3' of the following | ||
1710 | - | ||
1711 | - #different cases for the aromatic cycles | ||
1712 | - if res.get_resname()=='A' or res.get_resname()=='G': | ||
1713 | - # computes the distances between atoms of aromatic cycles | ||
1714 | - c1p_n9 = get_euclidian_distance(atom_c1p, atom_n9) | ||
1715 | - n9_c8 = get_euclidian_distance(atom_n9, atom_c8) | ||
1716 | - c8_n7 = get_euclidian_distance(atom_c8, atom_n7) | ||
1717 | - n7_c5 = get_euclidian_distance(atom_n7, atom_c5) | ||
1718 | - c5_c6 = get_euclidian_distance(atom_c5, atom_c6) | ||
1719 | - c6_o6 = get_euclidian_distance(atom_c6, atom_o6) | ||
1720 | - c6_n6 = get_euclidian_distance(atom_c6, atom_n6) | ||
1721 | - c6_n1 = get_euclidian_distance(atom_c6, atom_n1) | ||
1722 | - n1_c2 = get_euclidian_distance(atom_n1, atom_c2) | ||
1723 | - c2_n2 = get_euclidian_distance(atom_c2, atom_n2) | ||
1724 | - c2_n3 = get_euclidian_distance(atom_c2, atom_n3) | ||
1725 | - n3_c4 = get_euclidian_distance(atom_n3, atom_c4) | ||
1726 | - c4_n9 = get_euclidian_distance(atom_c4, atom_n9) | ||
1727 | - c4_c5 = get_euclidian_distance(atom_c4, atom_c5) | ||
1728 | - if res.get_resname()=='C' or res.get_resname()=='U' : | ||
1729 | - c1p_n1 = get_euclidian_distance(atom_c1p, atom_n1) | ||
1730 | - n1_c6 = get_euclidian_distance(atom_n1, atom_c6) | ||
1731 | - c6_c5 = get_euclidian_distance(atom_c6, atom_c5) | ||
1732 | - c5_c4 = get_euclidian_distance(atom_c5, atom_c4) | ||
1733 | - c4_n3 = get_euclidian_distance(atom_c4, atom_n3) | ||
1734 | - n3_c2 = get_euclidian_distance(atom_n3, atom_c2) | ||
1735 | - c2_o2 = get_euclidian_distance(atom_c2, atom_o2) | ||
1736 | - c2_n1 = get_euclidian_distance(atom_c2, atom_n1) | ||
1737 | - c4_n4 = get_euclidian_distance(atom_c4, atom_n4) | ||
1738 | - c4_o4 = get_euclidian_distance(atom_c4, atom_o4) | ||
1739 | - | ||
1740 | - liste_common.append([res.get_resname(), last_o3p_p, op3_p, p_op1, p_op2, p_o5p, o5p_c5p, c5p_c4p, c4p_o4p, c4p_c3p, o4p_c1p, c1p_c2p, c2p_o2p, c2p_c3p, c3p_o3p] ) | ||
1741 | - liste_purines.append([c1p_n9, n9_c8, c8_n7, n7_c5, c5_c6, c6_o6, c6_n6, c6_n1, n1_c2, c2_n2, c2_n3, n3_c4, c4_n9, c4_c5]) | ||
1742 | - liste_pyrimidines.append([c1p_n1, n1_c6, c6_c5, c5_c4, c4_n3, n3_c2, c2_o2, c2_n1, c4_n4, c4_o4]) | ||
1743 | - pbar.update(1) | ||
1744 | - | ||
1745 | - df_comm=pd.DataFrame(liste_common, columns=["Residu", "O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'"]) | ||
1746 | - df_pur=pd.DataFrame(liste_purines, columns=["C1'-N9", "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5" ]) | ||
1747 | - df_pyr=pd.DataFrame(liste_pyrimidines, columns=["C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"]) | ||
1748 | - df=pd.concat([df_comm, df_pur, df_pyr], axis = 1) | ||
1749 | - pbar.close() | ||
1750 | - | ||
1751 | - df.to_csv(runDir + "/results/geometry/all-atoms/distances/dist_atoms_" + name + ".csv") | ||
1752 | - | ||
1753 | -@trace_unhandled_exceptions | ||
1754 | -def measures_hrna(name, s, thr_idx): | ||
1755 | - """ | ||
1756 | - Measures the distance/angles between the atoms of the HiRE-RNA model linked by covalent bonds | ||
1757 | - """ | ||
1758 | - | ||
1759 | - # do not recompute something already computed | ||
1760 | - if (path.isfile(runDir + '/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA '+name+'.csv') and | ||
1761 | - path.isfile(runDir + '/results/geometry/HiRE-RNA/angles/angles_hire_RNA '+name+'.csv') and | ||
1762 | - path.isfile(runDir + '/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA '+name+'.csv')): | ||
1763 | - return | ||
1764 | - | ||
1765 | - liste_dist=[] | ||
1766 | - liste_angl = [] | ||
1767 | - liste_tors = [] | ||
1768 | - last_c4p = [] | ||
1769 | - last_c5p = [] | ||
1770 | - last_c1p = [] | ||
1771 | - last_o5p = [] | ||
1772 | - | ||
1773 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_hrna({name})") | ||
1774 | - | ||
1775 | - chain = next(s[0].get_chains()) | ||
1776 | - residues=list(chain.get_residues()) | ||
1777 | - for res in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna", unit="res", leave=False): | ||
1778 | - # distances | ||
1779 | - p_o5p = None | ||
1780 | - o5p_c5p = None | ||
1781 | - c5p_c4p = None | ||
1782 | - c4p_c1p = None | ||
1783 | - c1p_b1 = None | ||
1784 | - b1_b2 = None | ||
1785 | - last_c4p_p = np.nan | ||
1786 | - | ||
1787 | - # angles | ||
1788 | - p_o5p_c5p = None | ||
1789 | - o5p_c5p_c4p = None | ||
1790 | - c5p_c4p_c1p = None | ||
1791 | - c4p_c1p_b1 = None | ||
1792 | - c1p_b1_b2 = None | ||
1793 | - lastc4p_p_o5p = None | ||
1794 | - lastc5p_lastc4p_p = None | ||
1795 | - lastc1p_lastc4p_p = None | ||
1796 | - | ||
1797 | - # torsions | ||
1798 | - p_o5_c5_c4 = np.nan | ||
1799 | - o5_c5_c4_c1 = np.nan | ||
1800 | - c5_c4_c1_b1 = np.nan | ||
1801 | - c4_c1_b1_b2 = np.nan | ||
1802 | - o5_c5_c4_psuiv = np.nan | ||
1803 | - c5_c4_psuiv_o5suiv = np.nan | ||
1804 | - c4_psuiv_o5suiv_c5suiv = np.nan | ||
1805 | - c1_c4_psuiv_o5suiv = np.nan | ||
1806 | - | ||
1807 | - if res.get_resname() not in ['ATP', 'CCC', 'A3P', 'A23', 'GDP', 'RIA', "2BA"] : # several phosphate groups, ignore | ||
1808 | - atom_p = [ atom.get_coord() for atom in res if atom.get_name() == "P"] | ||
1809 | - atom_o5p= [ atom.get_coord() for atom in res if "O5'" in atom.get_fullname() ] | ||
1810 | - atom_c5p = [ atom.get_coord() for atom in res if "C5'" in atom.get_fullname() ] | ||
1811 | - atom_c4p = [ atom.get_coord() for atom in res if "C4'" in atom.get_fullname() ] | ||
1812 | - atom_c1p = [ atom.get_coord() for atom in res if "C1'" in atom.get_fullname() ] | ||
1813 | - atom_b1 = pos_b1(res) # position b1 to be calculated, depending on the case | ||
1814 | - atom_b2 = pos_b2(res) # position b2 to be calculated only for those with 2 cycles | ||
1815 | - | ||
1816 | - # Distances. If one of the atoms is empty, the euclidian distance returns NaN. | ||
1817 | - last_c4p_p = get_euclidian_distance(last_c4p, atom_p) | ||
1818 | - p_o5p = get_euclidian_distance(atom_p, atom_o5p) | ||
1819 | - o5p_c5p = get_euclidian_distance(atom_o5p, atom_c5p) | ||
1820 | - c5p_c4p = get_euclidian_distance(atom_c5p, atom_c4p) | ||
1821 | - c4p_c1p = get_euclidian_distance(atom_c4p, atom_c1p) | ||
1822 | - c1p_b1 = get_euclidian_distance(atom_c1p, atom_b1) | ||
1823 | - b1_b2 = get_euclidian_distance(atom_b1, atom_b2) | ||
1824 | - | ||
1825 | - # flat angles. Same. | ||
1826 | - lastc4p_p_o5p = get_flat_angle(last_c4p, atom_p, atom_o5p) | ||
1827 | - lastc1p_lastc4p_p = get_flat_angle(last_c1p, last_c4p, atom_p) | ||
1828 | - lastc5p_lastc4p_p = get_flat_angle(last_c5p, last_c4p, atom_p) | ||
1829 | - p_o5p_c5p = get_flat_angle(atom_p, atom_o5p, atom_c5p) | ||
1830 | - o5p_c5p_c4p = get_flat_angle(atom_o5p, atom_c5p, atom_c4p) | ||
1831 | - c5p_c4p_c1p = get_flat_angle(atom_c5p, atom_c4p, atom_c1p) | ||
1832 | - c4p_c1p_b1 = get_flat_angle(atom_c4p, atom_c1p, atom_b1) | ||
1833 | - c1p_b1_b2 = get_flat_angle(atom_c1p, atom_b1, atom_b2) | ||
1834 | - | ||
1835 | - # torsions. Idem. | ||
1836 | - p_o5_c5_c4 = get_torsion_angle(atom_p, atom_o5p, atom_c5p, atom_c4p) | ||
1837 | - o5_c5_c4_c1 = get_torsion_angle(atom_o5p, atom_c5p, atom_c4p, atom_c1p) | ||
1838 | - c5_c4_c1_b1 = get_torsion_angle(atom_c5p, atom_c4p, atom_c1p, atom_b1) | ||
1839 | - c4_c1_b1_b2 = get_torsion_angle(atom_c4p, atom_c1p, atom_b1, atom_b2) | ||
1840 | - o5_c5_c4_psuiv = get_torsion_angle(last_o5p, last_c5p, last_c4p, atom_p) | ||
1841 | - c5_c4_psuiv_o5suiv = get_torsion_angle(last_c5p, last_c4p, atom_p, atom_o5p) | ||
1842 | - c4_psuiv_o5suiv_c5suiv = get_torsion_angle(last_c4p, atom_p, atom_o5p, atom_c5p) | ||
1843 | - c1_c4_psuiv_o5suiv = get_torsion_angle(last_c1p, last_c4p, atom_p, atom_o5p) | ||
1844 | - | ||
1845 | - last_c4p = atom_c4p | ||
1846 | - last_c5p = atom_c5p | ||
1847 | - last_c1p = atom_c1p | ||
1848 | - last_o5p = atom_o5p | ||
1849 | - liste_dist.append([res.get_resname(), last_c4p_p, p_o5p, o5p_c5p, c5p_c4p, c4p_c1p, c1p_b1, b1_b2]) | ||
1850 | - liste_angl.append([res.get_resname(), lastc4p_p_o5p, lastc1p_lastc4p_p, lastc5p_lastc4p_p, p_o5p_c5p, o5p_c5p_c4p, c5p_c4p_c1p, c4p_c1p_b1, c1p_b1_b2]) | ||
1851 | - liste_tors.append([res.get_resname(), p_o5_c5_c4, o5_c5_c4_c1, c5_c4_c1_b1, c4_c1_b1_b2, o5_c5_c4_psuiv, c5_c4_psuiv_o5suiv, c4_psuiv_o5suiv_c5suiv, c1_c4_psuiv_o5suiv]) | ||
1852 | - df = pd.DataFrame(liste_dist, columns=["Residu", "C4'-P", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2"]) | ||
1853 | - df.to_csv(runDir + '/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA '+name+'.csv') | ||
1854 | - df = pd.DataFrame(liste_angl, columns=["Residu", "C4'-P-O5'", "C1'-C4'-P", "C5'-C4'-P", "P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2"]) | ||
1855 | - df.to_csv(runDir + '/results/geometry/HiRE-RNA/angles/angles_hire_RNA ' + name + ".csv") | ||
1856 | - df=pd.DataFrame(liste_tors, columns=["Residu", "P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "O5'-C5'-C4'-P°", "C5'-C4'-P°-O5'°", "C4'-P°-O5'°-C5'°", "C1'-C4'-P°-O5'°"]) | ||
1857 | - df.to_csv(runDir + '/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA '+name+'.csv') | ||
1858 | - | ||
1859 | -@trace_unhandled_exceptions | ||
1860 | -def measures_hrna_basepairs(name, s, thr_idx): | ||
1861 | - """ | ||
1862 | - Open a rna_only/ file, and run measures_hrna_basepairs_chain() on every chain | ||
1863 | - """ | ||
1864 | - | ||
1865 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_hrna_basepairs({name})") | ||
1866 | - | ||
1867 | - l=[] | ||
1868 | - chain = next(s[0].get_chains()) | ||
1869 | - | ||
1870 | - # do not recompute something already computed | ||
1871 | - if path.isfile(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs_"+name+".csv"): | ||
1872 | - return | ||
1873 | - | ||
1874 | - df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + name)) | ||
1875 | - | ||
1876 | - if df['index_chain'][0] == 1: # ignore files with numbering errors : TODO : remove when we get DSSR Pro, there should not be numbering errors anymore | ||
1877 | - l = measures_hrna_basepairs_chain(name, chain, df, thr_idx) | ||
1878 | - df_calc = pd.DataFrame(l, columns=["type_LW", "nt1_idx", "nt1_res", "nt2_idx", "nt2_res", "Distance", | ||
1879 | - "211_angle", "112_angle", "dB1", "dB2", "alpha1", "alpha2", "3211_torsion", "1123_torsion"]) | ||
1880 | - df_calc.to_csv(runDir + "/results/geometry/HiRE-RNA/basepairs/"+'basepairs_' + name + '.csv', float_format="%.3f") | ||
1881 | - | ||
1882 | -@trace_unhandled_exceptions | ||
1883 | -def measures_hrna_basepairs_chain(name, chain, df, thr_idx): | ||
1884 | - """ | ||
1885 | - Cleanup of the dataset | ||
1886 | - measurements of distances and angles between paired nucleotides in the chain | ||
1887 | - """ | ||
1888 | - | ||
1889 | - results = [] | ||
1890 | - warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) | ||
1891 | - | ||
1892 | - pairs = df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep | ||
1893 | - for i in range(pairs.shape[0]): # we remove the lines where no pairing (NaN in paired) | ||
1894 | - index_with_nan = pairs.index[pairs.iloc[:,2].isnull()] | ||
1895 | - pairs.drop(index_with_nan, 0, inplace=True) | ||
1896 | - | ||
1897 | - paired_int = [] | ||
1898 | - for i in pairs.index: # convert values from paired to integers or lists of integers | ||
1899 | - paired = pairs.at[i, 'paired'] | ||
1900 | - if type(paired) is np.int64 or type(paired) is np.float64: | ||
1901 | - paired_int.append(int(paired)) | ||
1902 | - else : #strings | ||
1903 | - if len(paired) < 3: # a single pairing | ||
1904 | - paired_int.append(int(paired)) | ||
1905 | - else : # several pairings | ||
1906 | - paired = paired.split(',') | ||
1907 | - l = [ int(i) for i in paired ] | ||
1908 | - paired_int.append(l) | ||
1909 | - | ||
1910 | - pair_type_LW_bis = [] | ||
1911 | - for j in pairs.index: | ||
1912 | - pair_type_LW = pairs.at[j, 'pair_type_LW'] | ||
1913 | - if len(pair_type_LW) < 4 : # a single pairing | ||
1914 | - pair_type_LW_bis.append(pair_type_LW) | ||
1915 | - else : # several pairings | ||
1916 | - pair_type_LW = pair_type_LW.split(',') | ||
1917 | - l = [ i for i in pair_type_LW ] | ||
1918 | - pair_type_LW_bis.append(pair_type_LW) | ||
1919 | - | ||
1920 | - # addition of these new columns | ||
1921 | - pairs.insert(4, "paired_int", paired_int, True) | ||
1922 | - pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True) | ||
1923 | - | ||
1924 | - indexNames = pairs[pairs['paired_int'] == 0].index | ||
1925 | - pairs.drop(indexNames, inplace=True) # deletion of lines with a 0 in paired_int (matching to another RNA chain) | ||
1926 | - | ||
1927 | - for i in tqdm(pairs.index, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_hrna_basepairs_chain", unit="res", leave=False): | ||
1928 | - # calculations for each row of the pairs dataset | ||
1929 | - index = pairs.at[i, 'index_chain'] | ||
1930 | - res1 = chain[(' ', index, ' ')].get_resname() | ||
1931 | - if res1 not in ['A','C','G','U']: | ||
1932 | - continue | ||
1933 | - type_LW = pairs.at[i, 'pair_type_LW_bis'] # pairing type | ||
1934 | - num_paired = pairs.at[i, 'paired_int'] # number (index_chain) of the paired nucleotide | ||
1935 | - | ||
1936 | - if type(num_paired) is int or type(num_paired) is np.int64: | ||
1937 | - res2 = chain[(' ', num_paired, ' ')].get_resname() | ||
1938 | - if res2 not in ["A","C","G","U"]: | ||
1939 | - continue | ||
1940 | - measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired, ' ')]) | ||
1941 | - if measures is not None: | ||
1942 | - results.append([type_LW, index, res1, num_paired, res2] + measures) | ||
1943 | - else: | ||
1944 | - for j in range(len(num_paired)): # if several pairings, process them one by one | ||
1945 | - if num_paired[j] != 0: | ||
1946 | - res2 = chain[(' ', num_paired[j], ' ')].get_resname() | ||
1947 | - if res2 not in ["A","C","G","U"]: | ||
1948 | - continue | ||
1949 | - measures = basepair_measures(chain[(' ', index, ' ')], chain[(' ', num_paired[j], ' ')]) | ||
1950 | - if measures is not None: | ||
1951 | - results.append([type_LW[j], index, res1, num_paired[j], res2] + measures) | ||
1952 | - | ||
1953 | - return results | ||
1954 | - | ||
1955 | -@trace_unhandled_exceptions | ||
1956 | -def pyle_measures(name, s, thr_idx): | ||
1957 | - | ||
1958 | - if (path.isfile(runDir + '/results/geometry/Pyle/distances/distances_pyle_'+name+'.csv')): | ||
1959 | - return | ||
1960 | - | ||
1961 | - liste_dist=[] | ||
1962 | - #classes=[] | ||
1963 | - #for i in range(0, 150, 5): | ||
1964 | - #classes.append([i, i+5]) | ||
1965 | - #classes.append([150, 300]) | ||
1966 | - #occur_p_p=len(classes)*[0] | ||
1967 | - #occur_p_c1=len(classes)*[0] | ||
1968 | - #occur_p_c4=len(classes)*[0] | ||
1969 | - #occur_c1_c1=len(classes)*[0] | ||
1970 | - #occur_c4_c4=len(classes)*[0] | ||
1971 | - #nb_occurs=[] | ||
1972 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} pyle_measures({name})") | ||
1973 | - | ||
1974 | - chain = next(s[0].get_chains()) | ||
1975 | - #residues=list(chain.get_residues()) | ||
1976 | - for res1 in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} pyle_measures", unit="res", leave=False): | ||
1977 | - #res1=chain[i] | ||
1978 | - if res1.get_resname() in ["A", "C", "G", "U"]: | ||
1979 | - resnum1=list(res1.get_id())[1] | ||
1980 | - atom_p_1 = [ atom.get_coord() for atom in res1 if atom.get_name() == "P"] | ||
1981 | - atom_c1p_1 = [ atom.get_coord() for atom in res1 if "C1'" in atom.get_fullname() ] | ||
1982 | - atom_c4p_1 = [ atom.get_coord() for atom in res1 if "C4'" in atom.get_fullname() ] | ||
1983 | - for res2 in chain: | ||
1984 | - resnum2=list(res2.get_id())[1] | ||
1985 | - if resnum2-resnum1 < 4 : | ||
1986 | - continue | ||
1987 | - p_p=np.nan | ||
1988 | - p_c4p=np.nan | ||
1989 | - p_c1p=np.nan | ||
1990 | - c4p_c4p=np.nan | ||
1991 | - c1p_c1p=np.nan | ||
1992 | - #res2=chain[j] | ||
1993 | - if res2.get_resname() in ["A", "C", "G", "U"]: | ||
1994 | - | ||
1995 | - atom_p_2 = [ atom.get_coord() for atom in res2 if atom.get_name() == "P"] | ||
1996 | - atom_c1p_2 = [ atom.get_coord() for atom in res2 if "C1'" in atom.get_fullname() ] | ||
1997 | - atom_c4p_2 = [ atom.get_coord() for atom in res2 if "C4'" in atom.get_fullname() ] | ||
1998 | - | ||
1999 | - p_p = get_euclidian_distance(atom_p_1, atom_p_2) | ||
2000 | - p_c4p= get_euclidian_distance(atom_p_1, atom_c4p_2) | ||
2001 | - p_c1p= get_euclidian_distance(atom_p_1, atom_c1p_2) | ||
2002 | - c4p_c4p= get_euclidian_distance(atom_c4p_1, atom_c4p_2) | ||
2003 | - c1p_c1p= get_euclidian_distance(atom_c1p_1, atom_c1p_2) | ||
2004 | - | ||
2005 | - liste_dist.append([res1.get_resname(), int(resnum1), res2.get_resname(), int(resnum2), p_p, p_c4p, p_c1p, c4p_c4p, c1p_c1p]) | ||
2006 | - ''' | ||
2007 | - for x in range(len(classes)): | ||
2008 | - if classes[x][0] <= p_p <= classes[x][1]: | ||
2009 | - occur_p_p[x]=occur_p_p[x]+1 | ||
2010 | - if classes[x][0] <= p_c4p <= classes[x][1]: | ||
2011 | - occur_p_c4[x]=occur_p_c4[x]+1 | ||
2012 | - if classes[x][0] <= p_c1p <= classes[x][1]: | ||
2013 | - occur_p_c1[x]=occur_p_c1[x]+1 | ||
2014 | - if classes[x][0] <= c4p_c4p <= classes[x][1]: | ||
2015 | - occur_c4_c4[x]=occur_c4_c4[x]+1 | ||
2016 | - if classes[x][0] <= c1p_c1p <= classes[x][1]: | ||
2017 | - occur_c1_c1[x]=occur_c1_c1[x]+1 | ||
2018 | - ''' | ||
2019 | - #for x in range(len(classes)): | ||
2020 | - # for i in range(len(liste_dist)): | ||
2021 | - # if classes[x][0] <= liste_dist[i][4] <= classes[x][1]: | ||
2022 | - # occur_p_p[x]=occur_p_p[x]+1 | ||
2023 | - # if classes[x][0] <= liste_dist[i][5] <= classes[x][1]: | ||
2024 | - # occur_p_c4[x]=occur_p_c4[x]+1 | ||
2025 | - # if classes[x][0] <= liste_dist[i][6] <= classes[x][1]: | ||
2026 | - # occur_p_c1[x]=occur_p_c1[x]+1 | ||
2027 | - # if classes[x][0] <= liste_dist[i][7] <= classes[x][1]: | ||
2028 | - # occur_c4_c4[x]=occur_c4_c4[x]+1 | ||
2029 | - # if classes[x][0] <= liste_dist[i][8] <= classes[x][1]: | ||
2030 | - # occur_c1_c1[x]=occur_c1_c1[x]+1 | ||
2031 | - #nb_occurs.append([classes[x], occur_p_p[x], occur_p_c1[x], occur_p_c4[x], occur_c1_c1[x], occur_c4_c4[x]]) | ||
2032 | - #df = pd.DataFrame(nb_occurs, columns=["classe", "P-P", "P-C1'", "P-C4'", "C1'-C1'", "C4'-C4'"]) | ||
2033 | - # return df | ||
2034 | - # nb_occurs.append([classes, occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4]) | ||
2035 | - # print(nb_occurs) | ||
2036 | - # return nb_occurs | ||
2037 | - | ||
2038 | - | ||
2039 | - df = pd.DataFrame(liste_dist, columns=["res1", "resnum1", "res2", "resnum2", "P-P", "P-C4'", "P-C1'", "C4'-C4'", "C1'-C1'"]) | ||
2040 | - df.to_csv(runDir + "/results/geometry/Pyle/distances/distances_pyle_" + name + ".csv") | ||
2041 | - | ||
2042 | -@trace_unhandled_exceptions | ||
2043 | -def count_occur_pyle_dist(fpath): | ||
2044 | - | ||
2045 | - global idxQueue | ||
2046 | - thr_idx = idxQueue.get() | ||
2047 | - setproctitle(f"Worker {thr_idx+1} : Extract occurences of {fpath}") | ||
2048 | - | ||
2049 | - liste=os.listdir(fpath) | ||
2050 | - pbar = tqdm(total=len(liste), position=thr_idx, desc="Preparing ", leave=False) | ||
2051 | - df = pd.read_csv(os.path.abspath(fpath + liste.pop())) | ||
2052 | - occur_p_p=list(df["P-P"]) | ||
2053 | - occur_p_c1=list(df["P-C1'"]) | ||
2054 | - occur_p_c4=list(df["P-C4'"]) | ||
2055 | - occur_c1_c1=list(df["C1'-C1'"]) | ||
2056 | - occur_c4_c4=list(df["C4'-C4'"]) | ||
2057 | - nb_occurs=[] | ||
2058 | - for f in range(len(liste)): | ||
2059 | - df = pd.read_csv(os.path.abspath(fpath + liste.pop())) | ||
2060 | - # print(liste[f]) | ||
2061 | - for k in range(df.shape[0]): | ||
2062 | - occur_p_p[k]=occur_p_p[k]+df["P-P"][k] | ||
2063 | - occur_p_c1[k]=occur_p_c1[k]+df["P-C1'"][k] | ||
2064 | - occur_p_c4[k]=occur_p_c4[k]+df["P-C4'"][k] | ||
2065 | - occur_c1_c1[k]=occur_c1_c1[k]+df["C1'-C1'"][k] | ||
2066 | - occur_c4_c4[k]=occur_c4_c4[k]+df["C4'-C4'"][k] | ||
2067 | - pbar.update(1) | ||
2068 | - nb_occurs=[occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4] | ||
2069 | - # return(list(df["classe"]), occur_p_p, occur_p_c1, occur_p_c4, occur_c1_c1, occur_c4_c4) | ||
2070 | - df = pd.DataFrame(nb_occurs, columns=list(df["classe"])) | ||
2071 | - | ||
2072 | - df.to_csv(runDir + "/results/geometry/Pyle/classes_dist/occurences_dist.csv") | ||
2073 | - idxQueue.put(thr_idx) # replace the thread index in the queue | ||
2074 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | ||
2075 | - | ||
2076 | - | ||
2077 | -@trace_unhandled_exceptions | ||
2078 | -def GMM_histo(data_ori, name_data, toric=False, hist=True, col=None, save=True) : | ||
2079 | - """ | ||
2080 | - Plot Gaussian-Mixture-Model (with or without histograms) | ||
2081 | - """ | ||
2082 | - data_ori = np.array(data_ori) | ||
2083 | - | ||
2084 | - if toric: | ||
2085 | - # Extend the data on the right and on the left (for angles) | ||
2086 | - data = np.concatenate([data_ori, data_ori-360.0, data_ori+360.0]) | ||
2087 | - else: | ||
2088 | - data = data_ori | ||
2089 | - | ||
2090 | - # chooses the number of components based on the maximum likelihood value (maxlogv) | ||
2091 | - n_components_range = np.arange(8)+1 | ||
2092 | - # aic = [] | ||
2093 | - # bic = [] | ||
2094 | - maxlogv=[] | ||
2095 | - md = np.array(data).reshape(-1,1) | ||
2096 | - nb_components = 1 | ||
2097 | - nb_log_max = n_components_range[0] | ||
2098 | - log_max = 0 | ||
2099 | - for n_comp in n_components_range: | ||
2100 | - gmm = GaussianMixture(n_components=n_comp).fit(md) | ||
2101 | - # aic.append(abs(gmm.aic(md))) | ||
2102 | - # bic.append(abs(gmm.bic(md))) | ||
2103 | - maxlogv.append(gmm.lower_bound_) | ||
2104 | - if gmm.lower_bound_== max(maxlogv) : # takes the maximum | ||
2105 | - nb_components = n_comp | ||
2106 | - # if there is convergence, keep the first maximum found | ||
2107 | - if abs(gmm.lower_bound_-log_max) < 0.02 : #threshold=0.02 | ||
2108 | - nb_components = nb_log_max | ||
2109 | - break | ||
2110 | - log_max = max(maxlogv) | ||
2111 | - nb_log_max = n_comp | ||
2112 | - | ||
2113 | - | ||
2114 | - # Now compute the final GMM | ||
2115 | - obs = np.array(data).reshape(-1,1) # still on extended data | ||
2116 | - g = GaussianMixture(n_components=nb_components) | ||
2117 | - g.fit(obs) | ||
2118 | - | ||
2119 | - if toric: | ||
2120 | - # Now decide which to keep | ||
2121 | - keep = [] | ||
2122 | - weights = [] | ||
2123 | - means = [] | ||
2124 | - covariances = [] | ||
2125 | - sum_weights = 0.0 | ||
2126 | - for m in g.means_: | ||
2127 | - keep.append(m > -180 and m <= 180) | ||
2128 | - for i, w in enumerate(g.weights_): | ||
2129 | - if not keep[i]: | ||
2130 | - continue | ||
2131 | - sum_weights += w | ||
2132 | - for i in range(nb_components): | ||
2133 | - if not keep[i]: | ||
2134 | - continue | ||
2135 | - means.append(g.means_[i]) | ||
2136 | - covariances.append(g.covariances_[i]) | ||
2137 | - weights.append(g.weights_[i]/sum_weights) | ||
2138 | - nb_components = len(means) | ||
2139 | - else: | ||
2140 | - weights = g.weights_ | ||
2141 | - means = g.means_ | ||
2142 | - covariances = g.covariances_ | ||
2143 | - | ||
2144 | - # plot histograms if asked, with the appropriate number of components | ||
2145 | - if hist: | ||
2146 | - plt.hist(data_ori, color="green", edgecolor='black', linewidth=1.2, bins=50, density=True) | ||
2147 | - if toric: | ||
2148 | - plt.xlabel("Angle (Degrees)") | ||
2149 | - else: | ||
2150 | - plt.xlabel("Distance (Angströms)") | ||
2151 | - plt.ylabel("Density") | ||
2152 | - | ||
2153 | - # Prepare the GMM curve with some absciss points | ||
2154 | - if toric: | ||
2155 | - x = np.linspace(-360.0,360.0,721) | ||
2156 | - else: | ||
2157 | - D = obs.ravel() | ||
2158 | - xmin = D.min() | ||
2159 | - #xmax = min(10.0, D.max()) | ||
2160 | - xmax = D.max() | ||
2161 | - x = np.linspace(xmin,xmax,1000) | ||
2162 | - colors=['red', 'blue', 'gold', 'cyan', 'magenta', 'white', 'black', 'green'] | ||
2163 | - | ||
2164 | - # prepare the dictionary to save the parameters | ||
2165 | - summary_data = {} | ||
2166 | - summary_data["measure"] = name_data | ||
2167 | - summary_data["weights"] = [] | ||
2168 | - summary_data["means"] = [] | ||
2169 | - summary_data["std"] = [] | ||
2170 | - | ||
2171 | - # plot | ||
2172 | - curves = [] | ||
2173 | - for i in range(nb_components): | ||
2174 | - | ||
2175 | - # store the parameters | ||
2176 | - mean = means[i] | ||
2177 | - sigma = np.sqrt(covariances[i]) | ||
2178 | - weight = weights[i] | ||
2179 | - summary_data["means"].append("{:.2f}".format(float(str(mean).strip("[]")))) | ||
2180 | - summary_data["std"].append("{:.2f}".format(float(str(sigma).strip("[]")))) | ||
2181 | - summary_data["weights"].append("{:.2f}".format(float(str(weight).strip("[]")))) | ||
2182 | - | ||
2183 | - # compute the right x and y data to plot | ||
2184 | - y = weight*st.norm.pdf(x, mean, sigma) | ||
2185 | - if toric: | ||
2186 | - y_mod = (((y[0]+180.0)%360.0)-180.0) | ||
2187 | - x_mod = (((x+180.0)%360.0)-180.0) | ||
2188 | - s = sorted(zip(x_mod,y_mod)) | ||
2189 | - newx = [] | ||
2190 | - newy = [] | ||
2191 | - for k in range(0, len(s), 2): | ||
2192 | - if k == 362.0: | ||
2193 | - continue # this value is dealt with when k = 360.0 | ||
2194 | - # print(k, "summing: ", s[k-int(k>360)], s[k+1-int(k>360)]) | ||
2195 | - newx.append(s[k-int(k>360)][0]) | ||
2196 | - if k == 360.0: | ||
2197 | - newy.append(s[k][1]+s[k+1][1]+s[k+2][1]) | ||
2198 | - else: | ||
2199 | - newy.append(s[k-int(k>360)][1]+s[k+1-int(k>360)][1]) | ||
2200 | - else: | ||
2201 | - newx = x | ||
2202 | - newy = y[0] | ||
2203 | - | ||
2204 | - if hist: | ||
2205 | - # plot on top of the histograms | ||
2206 | - plt.plot(newx, newy, c=colors[i]) | ||
2207 | - else: | ||
2208 | - # store for later summation | ||
2209 | - curves.append(np.array(newy)) | ||
2210 | - | ||
2211 | - if hist: | ||
2212 | - plt.title(f"Histogram of {name_data} with GMM of {nb_components} components (" + str(len(data_ori))+" values)") | ||
2213 | - if save: | ||
2214 | - plt.savefig(f"Histogram_{name_data}_{nb_components}_comps.png") | ||
2215 | - plt.close() | ||
2216 | - else: | ||
2217 | - # Plot their sum, do not save figure yet | ||
2218 | - try: | ||
2219 | - plt.plot(newx, sum(curves), c=col, label=name_data) | ||
2220 | - except TypeError: | ||
2221 | - print("N curves:", len(curves)) | ||
2222 | - for c in curves: | ||
2223 | - print(c) | ||
2224 | - plt.legend() | ||
2225 | - | ||
2226 | - # Save the json | ||
2227 | - with open(runDir + "/results/geometry/json/" +name_data + ".json", 'w', encoding='utf-8') as f: | ||
2228 | - json.dump(summary_data, f, indent=4) | ||
2229 | - | ||
2230 | -@trace_unhandled_exceptions | ||
2231 | -def gmm_aa_dists(): | ||
2232 | - """ | ||
2233 | - Draw the figures representing the data on the measurements of distances between atoms | ||
2234 | - """ | ||
2235 | - | ||
2236 | - setproctitle("GMM (all atoms, distances)") | ||
2237 | - | ||
2238 | - df=pd.read_csv(os.path.abspath(runDir + "/results/geometry/all-atoms/distances/dist_atoms.csv")) | ||
2239 | - | ||
2240 | - last_o3p_p=list(df["O3'-P"][~ np.isnan(df["O3'-P"])]) | ||
2241 | - #print(last_o3p_p) | ||
2242 | - op3_p=list(df["OP3-P"][~ np.isnan(df["OP3-P"])]) | ||
2243 | - p_op1=list(df["P-OP1"][~ np.isnan(df["P-OP1"])]) | ||
2244 | - p_op2=list(df["P-OP2"][~ np.isnan(df["P-OP2"])]) | ||
2245 | - p_o5p=list(df["P-O5'"][~ np.isnan(df["P-O5'"])]) | ||
2246 | - o5p_c5p=list(df["O5'-C5'"][~ np.isnan(df["O5'-C5'"])]) | ||
2247 | - c5p_c4p=list(df["C5'-C4'"][~ np.isnan(df["C5'-C4'"])]) | ||
2248 | - c4p_o4p=list(df["C4'-O4'"][~ np.isnan(df["C4'-O4'"])]) | ||
2249 | - o4p_c1p=list(df["O4'-C1'"][~ np.isnan(df["O4'-C1'"])]) | ||
2250 | - c1p_c2p=list(df["C1'-C2'"][~ np.isnan(df["C1'-C2'"])]) | ||
2251 | - c2p_o2p=list(df["C2'-O2'"][~ np.isnan(df["C2'-O2'"])]) | ||
2252 | - c2p_c3p=list(df["C2'-C3'"][~ np.isnan(df["C2'-C3'"])]) | ||
2253 | - c3p_o3p=list(df["C3'-O3'"][~ np.isnan(df["C3'-O3'"])]) | ||
2254 | - c4p_c3p=list(df["C4'-C3'"][~ np.isnan(df["C4'-C3'"])]) | ||
2255 | - | ||
2256 | - #if res = A ou G | ||
2257 | - c1p_n9=list(df["C1'-N9"][~ np.isnan(df["C1'-N9"])]) | ||
2258 | - n9_c8=list(df["N9-C8"][~ np.isnan(df["N9-C8"])]) | ||
2259 | - c8_n7=list(df["C8-N7"][~ np.isnan(df["C8-N7"])]) | ||
2260 | - n7_c5=list(df["N7-C5"][~ np.isnan(df["N7-C5"])]) | ||
2261 | - c5_c6=list(df["C5-C6"][~ np.isnan(df["C5-C6"])]) | ||
2262 | - c6_n1=list(df["C6-N1"][~ np.isnan(df["C6-N1"])]) | ||
2263 | - n1_c2=list(df["N1-C2"][~ np.isnan(df["N1-C2"])]) | ||
2264 | - c2_n3=list(df["C2-N3"][~ np.isnan(df["C2-N3"])]) | ||
2265 | - n3_c4=list(df["N3-C4"][~ np.isnan(df["N3-C4"])]) | ||
2266 | - c4_n9=list(df["C4-N9"][~ np.isnan(df["C4-N9"])]) | ||
2267 | - c4_c5=list(df["C4-C5"][~ np.isnan(df["C4-C5"])]) | ||
2268 | - #if res=G | ||
2269 | - c6_o6=list(df["C6-O6"][~ np.isnan(df["C6-O6"])]) | ||
2270 | - c2_n2=list(df["C2-N2"][~ np.isnan(df["C2-N2"])]) | ||
2271 | - #if res = A | ||
2272 | - c6_n6=list(df["C6-N6"][~ np.isnan(df["C6-N6"])]) | ||
2273 | - | ||
2274 | - #if res = C ou U | ||
2275 | - c1p_n1=list(df["C1'-N1"][~ np.isnan(df["C1'-N1"])]) | ||
2276 | - n1_c6=list(df["N1-C6"][~ np.isnan(df["N1-C6"])]) | ||
2277 | - c6_c5=list(df["C6-C5"][~ np.isnan(df["C6-C5"])]) | ||
2278 | - c5_c4=list(df["C5-C4"][~ np.isnan(df["C5-C4"])]) | ||
2279 | - c4_n3=list(df["C4-N3"][~ np.isnan(df["C4-N3"])]) | ||
2280 | - n3_c2=list(df["N3-C2"][~ np.isnan(df["N3-C2"])]) | ||
2281 | - c2_n1=list(df["C2-N1"][~ np.isnan(df["C2-N1"])]) | ||
2282 | - c2_o2=list(df["C2-O2"][~ np.isnan(df["C2-O2"])]) | ||
2283 | - #if res =C | ||
2284 | - c4_n4=list(df["C4-N4"][~ np.isnan(df["C4-N4"])]) | ||
2285 | - #if res=U | ||
2286 | - c4_o4=list(df["C4-O4"][~ np.isnan(df["C4-O4"])]) | ||
2287 | - | ||
2288 | - os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/commun/", exist_ok=True) | ||
2289 | - os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/commun/") | ||
2290 | - # draw figures for atoms common to all nucleotides | ||
2291 | - GMM_histo(last_o3p_p, "O3'-P") | ||
2292 | - if len(op3_p) > 0 : | ||
2293 | - GMM_histo(op3_p, "OP3-P") | ||
2294 | - GMM_histo(p_op1, "P-OP1") | ||
2295 | - GMM_histo(p_op2, "P-OP2") | ||
2296 | - | ||
2297 | - GMM_histo(p_o5p, "P-O5'") | ||
2298 | - GMM_histo(o5p_c5p, "O5'-C5'") | ||
2299 | - GMM_histo(c5p_c4p, "C5'-C4'") | ||
2300 | - GMM_histo(c4p_o4p, "C4'-O4'") | ||
2301 | - GMM_histo(c4p_c3p, "C4'-C3'") | ||
2302 | - GMM_histo(c3p_o3p, "C3'-O3'") | ||
2303 | - GMM_histo(o4p_c1p, "O4'-C1'") | ||
2304 | - GMM_histo(c1p_c2p, "C1'-C2'") | ||
2305 | - GMM_histo(c2p_c3p, "C2'-C3'") | ||
2306 | - GMM_histo(c2p_o2p, "C2'-O2'") | ||
2307 | - | ||
2308 | - if len(op3_p) > 0 : | ||
2309 | - GMM_histo(op3_p, "OP3-P", toric=False, hist=False, col= 'lightcoral') | ||
2310 | - GMM_histo(p_op1, "P-OP1", toric=False, hist=False, col='gold') | ||
2311 | - GMM_histo(p_op2, "P-OP2", toric=False, hist=False, col='lightseagreen') | ||
2312 | - GMM_histo(last_o3p_p, "O3'-P", toric=False, hist=False, col='saddlebrown') | ||
2313 | - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkturquoise') | ||
2314 | - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='darkkhaki') | ||
2315 | - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='indigo') | ||
2316 | - GMM_histo(c4p_o4p, "C4'-O4'", toric=False, hist=False, col='maroon') | ||
2317 | - GMM_histo(c4p_c3p, "C4'-C3'", toric=False, hist=False, col='burlywood') | ||
2318 | - GMM_histo(c3p_o3p, "C3'-O3'", toric=False, hist=False, col='steelblue') | ||
2319 | - GMM_histo(o4p_c1p, "O4'-C1'", toric=False, hist=False, col='tomato') | ||
2320 | - GMM_histo(c1p_c2p, "C1'-C2'", toric=False, hist=False, col='darkolivegreen') | ||
2321 | - GMM_histo(c2p_c3p, "C2'-C3'", toric=False, hist=False, col='orchid') | ||
2322 | - GMM_histo(c2p_o2p, "C2'-O2'", toric=False, hist=False, col='deeppink') | ||
2323 | - axes=plt.gca() | ||
2324 | - axes.set_ylim(0, 100) | ||
2325 | - plt.xlabel("Distance (Angströms)") | ||
2326 | - plt.title("GMM of distances between common atoms ") | ||
2327 | - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/commun/" + "GMM_distances_common_atoms.png") | ||
2328 | - plt.close() | ||
2329 | - | ||
2330 | - os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/purines/", exist_ok=True) | ||
2331 | - os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/purines/") | ||
2332 | - # purines | ||
2333 | - GMM_histo(c1p_n9, "C1'-N9") | ||
2334 | - GMM_histo(n9_c8, "N9-C8") | ||
2335 | - GMM_histo(c8_n7, "C8-N7") | ||
2336 | - GMM_histo(n7_c5, "N7-C5") | ||
2337 | - GMM_histo(c5_c6, "C5-C6") | ||
2338 | - GMM_histo(c6_o6, "C6-O6") | ||
2339 | - GMM_histo(c6_n6, "C6-N6") | ||
2340 | - GMM_histo(c6_n1, "C6-N1") | ||
2341 | - GMM_histo(n1_c2, "N1-C2") | ||
2342 | - GMM_histo(c2_n2, "C2-N2") | ||
2343 | - GMM_histo(c2_n3, "C2-N3") | ||
2344 | - GMM_histo(n3_c4, "N3-C4") | ||
2345 | - GMM_histo(c4_n9, "C4-N9") | ||
2346 | - GMM_histo(c4_c5, "C4-C5") | ||
2347 | - | ||
2348 | - GMM_histo(c1p_n9, "C1'-N9", hist=False, col='lightcoral') | ||
2349 | - GMM_histo(n9_c8, "N9-C8", hist=False, col='gold') | ||
2350 | - GMM_histo(c8_n7, "C8-N7", hist=False, col='lightseagreen') | ||
2351 | - GMM_histo(n7_c5, "N7-C5", hist=False, col='saddlebrown') | ||
2352 | - GMM_histo(c5_c6, "C5-C6", hist=False, col='darkturquoise') | ||
2353 | - GMM_histo(c6_o6, "C6-O6", hist=False, col='darkkhaki') | ||
2354 | - GMM_histo(c6_n6, "C6-N6", hist=False, col='indigo') | ||
2355 | - GMM_histo(c6_n1, "C6-N1", hist=False, col='maroon') | ||
2356 | - GMM_histo(n1_c2, "N1-C2", hist=False, col='burlywood') | ||
2357 | - GMM_histo(c2_n2, "C2-N2", hist=False, col='steelblue') | ||
2358 | - GMM_histo(c2_n3, "C2-N3", hist=False, col='tomato') | ||
2359 | - GMM_histo(n3_c4, "N3-C4", hist=False, col='darkolivegreen') | ||
2360 | - GMM_histo(c4_n9, "C4-N9", hist=False, col='orchid') | ||
2361 | - GMM_histo(c4_c5, "C4-C5", hist=False, col='deeppink') | ||
2362 | - axes=plt.gca() | ||
2363 | - axes.set_ylim(0, 100) | ||
2364 | - plt.xlabel("Distance (Angströms)") | ||
2365 | - plt.title("GMM of distances between atoms of the purine cycles", fontsize=10) | ||
2366 | - plt.savefig(runDir+ "/results/figures/GMM/all-atoms/distances/purines/" + "GMM_distances_purine_cycles.png") | ||
2367 | - plt.close() | ||
2368 | - | ||
2369 | - os.makedirs(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/", exist_ok=True) | ||
2370 | - os.chdir(runDir+"/results/figures/GMM/all-atoms/distances/pyrimidines/") | ||
2371 | - # pyrimidines | ||
2372 | - | ||
2373 | - GMM_histo(c1p_n1, "C1'-N1") | ||
2374 | - GMM_histo(n1_c6, "N1-C6") | ||
2375 | - GMM_histo(c6_c5, "C6-C5") | ||
2376 | - GMM_histo(c5_c4, "C5-C4") | ||
2377 | - GMM_histo(c4_n3, "C4-N3") | ||
2378 | - GMM_histo(n3_c2, "N3-C2") | ||
2379 | - GMM_histo(c2_o2, "C2-O2") | ||
2380 | - GMM_histo(c2_n1, "C2-N1") | ||
2381 | - GMM_histo(c4_n4, "C4-N4") | ||
2382 | - GMM_histo(c4_o4, "C4-O4") | ||
2383 | - | ||
2384 | - GMM_histo(c1p_n1, "C1'-N1", hist=False, col='lightcoral') | ||
2385 | - GMM_histo(n1_c6, "N1-C6", hist=False, col='gold') | ||
2386 | - GMM_histo(c6_c5, "C6-C5", hist=False, col='lightseagreen') | ||
2387 | - GMM_histo(c5_c4, "C5-C4", hist=False, col='deeppink') | ||
2388 | - GMM_histo(c4_n3, "C4-N3", hist=False, col='red') | ||
2389 | - GMM_histo(n3_c2, "N3-C2", hist=False, col='lime') | ||
2390 | - GMM_histo(c2_o2, "C2-O2", hist=False, col='indigo') | ||
2391 | - GMM_histo(c2_n1, "C2-N1", hist=False, col='maroon') | ||
2392 | - GMM_histo(c4_n4, "C4-N4", hist=False, col='burlywood') | ||
2393 | - GMM_histo(c4_o4, "C4-O4", hist=False, col='steelblue') | ||
2394 | - axes=plt.gca() | ||
2395 | - #axes.set_xlim(1, 2) | ||
2396 | - axes.set_ylim(0, 100) | ||
2397 | - plt.xlabel("Distance (Angströms") | ||
2398 | - plt.title("GMM of distances between atoms of the pyrimidine cycles", fontsize=10) | ||
2399 | - plt.savefig(runDir + "/results/figures/GMM/all-atoms/distances/pyrimidines/" + "GMM_distances_pyrimidine_cycles.png") | ||
2400 | - plt.close() | ||
2401 | - | ||
2402 | - os.chdir(runDir) | ||
2403 | - setproctitle("GMM (all atoms, distances) finished") | ||
2404 | - | ||
2405 | -@trace_unhandled_exceptions | ||
2406 | -def gmm_aa_torsions(): | ||
2407 | - """ | ||
2408 | - Separates the torsion angle measurements by angle type and plots the figures representing the data | ||
2409 | - """ | ||
2410 | - setproctitle("GMM (all atoms, torsions)") | ||
2411 | - | ||
2412 | - # we create lists to store the values of each angle | ||
2413 | - alpha=[] | ||
2414 | - beta=[] | ||
2415 | - gamma=[] | ||
2416 | - delta=[] | ||
2417 | - epsilon=[] | ||
2418 | - zeta=[] | ||
2419 | - chi = [] | ||
2420 | - for angles_deg in conversion_angles(runDir + "/results/RNANet.db"): | ||
2421 | - alpha.append(angles_deg[2]) | ||
2422 | - beta.append(angles_deg[3]) | ||
2423 | - gamma.append(angles_deg[4]) | ||
2424 | - delta.append(angles_deg[5]) | ||
2425 | - epsilon.append(angles_deg[6]) | ||
2426 | - zeta.append(angles_deg[7]) | ||
2427 | - chi.append(angles_deg[8]) | ||
2428 | - | ||
2429 | - # we remove the null values | ||
2430 | - alpha=[i for i in alpha if i != None] | ||
2431 | - beta=[i for i in beta if i != None] | ||
2432 | - gamma=[i for i in gamma if i != None] | ||
2433 | - delta=[i for i in delta if i != None] | ||
2434 | - epsilon=[i for i in epsilon if i != None] | ||
2435 | - zeta=[i for i in zeta if i != None] | ||
2436 | - chi=[i for i in chi if i != None] | ||
2437 | - | ||
2438 | - os.makedirs(runDir + "/results/figures/GMM/all-atoms/torsions/", exist_ok=True) | ||
2439 | - os.chdir(runDir + "/results/figures/GMM/all-atoms/torsions/") | ||
2440 | - | ||
2441 | - """ | ||
2442 | - We plot the GMMs with histogram for each angle | ||
2443 | - We create the corresponding json with the means and standard deviations of each Gaussian | ||
2444 | - We draw the figure grouping the GMMs of all angles without histogram to compare them with each other | ||
2445 | - """ | ||
2446 | - | ||
2447 | - GMM_histo(alpha, "Alpha", toric=True) | ||
2448 | - GMM_histo(beta, "Beta", toric=True) | ||
2449 | - GMM_histo(gamma, "Gamma", toric=True) | ||
2450 | - GMM_histo(delta, "Delta", toric=True) | ||
2451 | - GMM_histo(epsilon, "Epsilon", toric=True) | ||
2452 | - GMM_histo(zeta, "Zeta", toric=True) | ||
2453 | - GMM_histo(chi, "Xhi", toric=True) | ||
2454 | - | ||
2455 | - GMM_histo(alpha, "Alpha", toric=True, hist=False, col='red') | ||
2456 | - GMM_histo(beta, "Beta", toric=True, hist=False, col='firebrick') | ||
2457 | - GMM_histo(gamma, "Gamma", toric=True, hist=False, col='limegreen') | ||
2458 | - GMM_histo(delta, "Delta", toric=True, hist=False, col='darkslateblue') | ||
2459 | - GMM_histo(epsilon, "Epsilon", toric=True, hist=False, col='goldenrod') | ||
2460 | - GMM_histo(zeta, "Zeta", toric=True, hist=False, col='teal') | ||
2461 | - GMM_histo(chi, "Xhi", toric=True, hist=False, col='hotpink') | ||
2462 | - plt.xlabel("Angle (Degrees)") | ||
2463 | - plt.title("GMM of torsion angles") | ||
2464 | - plt.savefig("GMM_torsions.png") | ||
2465 | - plt.close() | ||
2466 | - | ||
2467 | - os.chdir(runDir) | ||
2468 | - setproctitle("GMM (all atoms, torsions) finished") | ||
2469 | - | ||
2470 | -@trace_unhandled_exceptions | ||
2471 | -def gmm_wadley(): | ||
2472 | - | ||
2473 | - setproctitle("GMM (Pyle model)") | ||
2474 | - | ||
2475 | - # Distances | ||
2476 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/distances/distances_wadley.csv")) | ||
2477 | - | ||
2478 | - p_c1p = list(df["C1'-P"][~ np.isnan(df["C1'-P"])]) | ||
2479 | - c1p_p = list(df["P-C1'"][~ np.isnan(df["P-C1'"])]) | ||
2480 | - p_c4p = list(df["C4'-P"][~ np.isnan(df["C4'-P"])]) | ||
2481 | - c4p_p = list(df["P-C4'"][~ np.isnan(df["P-C4'"])]) | ||
2482 | - | ||
2483 | - os.makedirs(runDir + "/results/figures/GMM/Pyle/distances/", exist_ok=True) | ||
2484 | - os.chdir(runDir + "/results/figures/GMM/Pyle/distances/") | ||
2485 | - | ||
2486 | - GMM_histo(p_c1p, "P-C1'") | ||
2487 | - GMM_histo(c1p_p, "C1'-P") | ||
2488 | - GMM_histo(p_c4p, "P-C4'") | ||
2489 | - GMM_histo(c4p_p, "C4'-P") | ||
2490 | - | ||
2491 | - GMM_histo(p_c4p, "P-C4'", toric=False, hist=False, col='gold') | ||
2492 | - GMM_histo(c4p_p, "C4'-P", toric=False, hist=False, col='indigo') | ||
2493 | - GMM_histo(p_c1p, "P-C1'", toric=False, hist=False, col='firebrick') | ||
2494 | - GMM_histo(c1p_p, "C1'-P", toric=False, hist=False, col='seagreen') | ||
2495 | - plt.xlabel("Distance (Angströms)") | ||
2496 | - plt.title("GMM of distances (Pyle model)") | ||
2497 | - plt.savefig("GMM_distances_pyle_model.png") | ||
2498 | - plt.close() | ||
2499 | - | ||
2500 | - # Flat Angles | ||
2501 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/angles/flat_angles_pyle.csv")) | ||
2502 | - | ||
2503 | - p_c1p_psuiv = list(df["P-C1'-P°"][~ np.isnan(df["P-C1'-P°"])]) | ||
2504 | - c1p_psuiv_c1psuiv = list(df["C1'-P°-C1'°"][~ np.isnan(df["C1'-P°-C1'°"])]) | ||
2505 | - | ||
2506 | - | ||
2507 | - os.makedirs(runDir + "/results/figures/GMM/Pyle/angles/", exist_ok=True) | ||
2508 | - os.chdir(runDir + "/results/figures/GMM/Pyle/angles/") | ||
2509 | - | ||
2510 | - GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True) | ||
2511 | - GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True) | ||
2512 | - | ||
2513 | - GMM_histo(p_c1p_psuiv, "P-C1'-P°", toric=True, hist=False, col='firebrick') | ||
2514 | - GMM_histo(c1p_psuiv_c1psuiv, "C1'-P°-C1'°", toric=True, hist=False, col='seagreen') | ||
2515 | - plt.xlabel("Angle (Degrees)") | ||
2516 | - plt.title("GMM of flat angles (Pyle model)") | ||
2517 | - plt.savefig("GMM_flat_angles_pyle_model.png") | ||
2518 | - plt.close() | ||
2519 | - | ||
2520 | - # Torsion angles | ||
2521 | - eta=[] | ||
2522 | - theta=[] | ||
2523 | - eta_prime=[] | ||
2524 | - theta_prime=[] | ||
2525 | - eta_base=[] | ||
2526 | - theta_base=[] | ||
2527 | - | ||
2528 | - for angles_deg in conversion_eta_theta(runDir + "/results/RNANet.db"): | ||
2529 | - eta.append(angles_deg[2]) | ||
2530 | - theta.append(angles_deg[3]) | ||
2531 | - eta_prime.append(angles_deg[4]) | ||
2532 | - theta_prime.append(angles_deg[5]) | ||
2533 | - eta_base.append(angles_deg[6]) | ||
2534 | - theta_base.append(angles_deg[7]) | ||
2535 | - | ||
2536 | - eta=[i for i in eta if i != None] | ||
2537 | - theta=[i for i in theta if i != None] | ||
2538 | - eta_prime=[i for i in eta_prime if i != None] | ||
2539 | - theta_prime=[i for i in theta_prime if i != None] | ||
2540 | - eta_base=[i for i in eta_base if i != None] | ||
2541 | - theta_base=[i for i in theta_base if i != None] | ||
2542 | - | ||
2543 | - | ||
2544 | - os.makedirs(runDir + "/results/figures/GMM/Pyle/pseudotorsions/", exist_ok=True) | ||
2545 | - os.chdir(runDir + "/results/figures/GMM/Pyle/pseudotorsions/") | ||
2546 | - | ||
2547 | - GMM_histo(eta, "Eta", toric=True) | ||
2548 | - GMM_histo(theta, "Theta", toric=True) | ||
2549 | - GMM_histo(eta_prime, "Eta'", toric=True) | ||
2550 | - GMM_histo(theta_prime, "Theta'", toric=True) | ||
2551 | - GMM_histo(eta_base, "Eta''", toric=True) | ||
2552 | - GMM_histo(theta_base, "Theta''", toric=True) | ||
2553 | - | ||
2554 | - GMM_histo(eta, "Eta", toric=True, hist=False, col='mediumaquamarine') | ||
2555 | - GMM_histo(theta, "Theta", toric=True, hist=False, col='darkorchid') | ||
2556 | - GMM_histo(eta_prime, "Eta'", toric=True, hist=False, col='cyan') | ||
2557 | - GMM_histo(theta_prime, "Theta'", toric=True, hist=False, col='crimson') | ||
2558 | - GMM_histo(eta_base, "Eta''", toric=True, hist=False, col='royalblue') | ||
2559 | - GMM_histo(theta_base, "Theta''", toric=True, hist=False, col='palevioletred') | ||
2560 | - plt.xlabel("Angle (Degrees)") | ||
2561 | - plt.title("GMM of pseudo-torsion angles (Pyle Model)") | ||
2562 | - plt.savefig("GMM_pseudotorsion_angles_pyle_model.png") | ||
2563 | - plt.close() | ||
2564 | - | ||
2565 | - os.chdir(runDir) | ||
2566 | - setproctitle("GMM (Pyle model) finished") | ||
2567 | - | ||
2568 | -def gmm_pyle_type(ntpair, data): | ||
2569 | - | ||
2570 | - setproctitle(f"GMM (Pyle {ntpair} )") | ||
2571 | - | ||
2572 | - os.makedirs(runDir + "/results/figures/GMM/Pyle/distances/", exist_ok=True) | ||
2573 | - os.chdir(runDir + "/results/figures/GMM/Pyle/distances/") | ||
2574 | - | ||
2575 | - p_p=list(data["P-P"][~ np.isnan(data["P-P"])]) | ||
2576 | - p_c4p=list(data["P-C4'"][~ np.isnan(data["P-C4'"])]) | ||
2577 | - p_c1p=list(data["P-C1'"][~ np.isnan(data["P-C1'"])]) | ||
2578 | - c4p_c4p=list(data["C4'-C4'"][~ np.isnan(data["C4'-C4'"])]) | ||
2579 | - c1p_c1p=list(data["C1'-C1'"][~ np.isnan(data["C1'-C1'"])]) | ||
2580 | - print(len(p_p)) | ||
2581 | - # res2=list(data["resnum2"]) | ||
2582 | - # res1=list(data["resnum1"]) | ||
2583 | - # diff=[] | ||
2584 | - # for i in range(len(res1)): | ||
2585 | - # diff.append(res2[i]-res1[i]) | ||
2586 | - # print(diff[:100]) | ||
2587 | - | ||
2588 | - GMM_histo(p_p, f"Distance P-P between {ntpair} tips for {str(len(p_p))} values", toric=False, hist=False, col="cyan") | ||
2589 | - GMM_histo(p_c4p, f"Distance P-C4' between {ntpair} tips", toric=False, hist=False, col="tomato") | ||
2590 | - GMM_histo(p_c1p, f"Distance P-C1' between {ntpair} tips", toric=False, hist=False, col="goldenrod") | ||
2591 | - GMM_histo(c4p_c4p, f"Distance C4'-C4' between {ntpair} tips", toric=False, hist=False, col="magenta") | ||
2592 | - GMM_histo(c1p_c1p, f"Distance C1'-C1' between {ntpair} tips", toric=False, hist=False, col="black") | ||
2593 | - # GMM_histo(diff, f"Gap between {ntpair} tips", toric=False, hist=False, col="tomato") | ||
2594 | - plt.xlabel("Distance (Angströms)") | ||
2595 | - | ||
2596 | - # plt.xlabel("Number of residues") | ||
2597 | - plt.ylabel("Distance (Angströms)") | ||
2598 | - plt.title(f"GMM of distances for {ntpair} ", fontsize=10) | ||
2599 | - | ||
2600 | - # plt.savefig(f"Longueurs_Pyle_{ntpair}.png" ) | ||
2601 | - plt.savefig(f"Distances_Pyle_{ntpair}.png" ) | ||
2602 | - plt.close() | ||
2603 | - setproctitle(f"GMM (Pyle {ntpair} distances) finished") | ||
2604 | - | ||
2605 | -def gmm_pyle(): | ||
2606 | - | ||
2607 | - setproctitle("GMM (Pyle model)") | ||
2608 | - | ||
2609 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/distances/distances.csv")) | ||
2610 | - | ||
2611 | - # dist = ["P-P", "P-C4'", "P-C1'", "C4'-C4'", "C1'-C1'"] | ||
2612 | - data=df | ||
2613 | - if len(data): | ||
2614 | - for b1 in ['A','C','G','U']: | ||
2615 | - for b2 in ['A','C','G','U']: | ||
2616 | - thisbases = data[(data.res1 == b1)&(data.res2 == b2)] | ||
2617 | - if len(thisbases): | ||
2618 | - gmm_pyle_type(b1+b2, thisbases) | ||
2619 | - | ||
2620 | -@trace_unhandled_exceptions | ||
2621 | -def gmm_hrna(): | ||
2622 | - """ | ||
2623 | - Draw the figures representing the data on the measurements between atoms of the HiRE-RNA model | ||
2624 | - """ | ||
2625 | - | ||
2626 | - setproctitle("GMM (HiRE-RNA)") | ||
2627 | - | ||
2628 | - # Distances | ||
2629 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/distances/dist_atoms_hire_RNA.csv")) | ||
2630 | - | ||
2631 | - last_c4p_p = list(df["C4'-P"][~ np.isnan(df["C4'-P"])]) | ||
2632 | - p_o5p = list(df["P-O5'"][~ np.isnan(df["P-O5'"])]) | ||
2633 | - o5p_c5p = list(df["O5'-C5'"][~ np.isnan(df["O5'-C5'"])]) | ||
2634 | - c5p_c4p = list(df["C5'-C4'"][~ np.isnan(df["C5'-C4'"])]) | ||
2635 | - c4p_c1p = list(df["C4'-C1'"][~ np.isnan(df["C4'-C1'"])]) | ||
2636 | - c1p_b1 = list(df["C1'-B1"][~ np.isnan(df["C1'-B1"])]) | ||
2637 | - b1_b2 = list(df["B1-B2"][~ np.isnan(df["B1-B2"])]) | ||
2638 | - | ||
2639 | - os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/distances/", exist_ok=True) | ||
2640 | - os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/distances/") | ||
2641 | - | ||
2642 | - GMM_histo(o5p_c5p, "O5'-C5'") | ||
2643 | - GMM_histo(b1_b2, "B1-B2") | ||
2644 | - GMM_histo(c1p_b1, "C1'-B1") | ||
2645 | - GMM_histo(c5p_c4p, "C5'-C4'") | ||
2646 | - GMM_histo(c4p_c1p, "C4'-C1'") | ||
2647 | - GMM_histo(p_o5p, "P-O5'") | ||
2648 | - GMM_histo(last_c4p_p, "C4'-P") | ||
2649 | - | ||
2650 | - GMM_histo(o5p_c5p, "O5'-C5'", toric=False, hist=False, col='lightcoral') | ||
2651 | - GMM_histo(b1_b2, "B1-B2", toric=False, hist=False, col='limegreen') | ||
2652 | - GMM_histo(c1p_b1, "C1'-B1", toric=False, hist=False, col='tomato') | ||
2653 | - GMM_histo(c5p_c4p, "C5'-C4'", toric=False, hist=False, col='aquamarine') | ||
2654 | - GMM_histo(c4p_c1p, "C4'-C1'", toric=False, hist=False, col='goldenrod') | ||
2655 | - GMM_histo(p_o5p, "P-O5'", toric=False, hist=False, col='darkcyan') | ||
2656 | - GMM_histo(last_c4p_p, "C4'-P", toric=False, hist=False, col='deeppink') | ||
2657 | - axes = plt.gca() | ||
2658 | - axes.set_ylim(0, 100) | ||
2659 | - plt.xlabel("Distance (Angströms)") | ||
2660 | - plt.title("GMM of distances between HiRE-RNA beads") | ||
2661 | - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/distances/GMM_distances_HiRE_RNA.png") | ||
2662 | - plt.close() | ||
2663 | - | ||
2664 | - # Angles | ||
2665 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/angles/angles_hire_RNA.csv")) | ||
2666 | - | ||
2667 | - lastc4p_p_o5p = list(df["C4'-P-O5'"][~ np.isnan(df["C4'-P-O5'"])]) | ||
2668 | - lastc1p_lastc4p_p = list(df["C1'-C4'-P"][~ np.isnan(df["C1'-C4'-P"])]) | ||
2669 | - lastc5p_lastc4p_p = list(df["C5'-C4'-P"][~ np.isnan(df["C5'-C4'-P"])]) | ||
2670 | - p_o5p_c5p = list(df["P-O5'-C5'"][~ np.isnan(df["P-O5'-C5'"])]) | ||
2671 | - o5p_c5p_c4p = list(df["O5'-C5'-C4'"][~ np.isnan(df["O5'-C5'-C4'"])]) | ||
2672 | - c5p_c4p_c1p = list(df["C5'-C4'-C1'"][~ np.isnan(df["C5'-C4'-C1'"])]) | ||
2673 | - c4p_c1p_b1 = list(df["C4'-C1'-B1"][~ np.isnan(df["C4'-C1'-B1"])]) | ||
2674 | - c1p_b1_b2 = list(df["C1'-B1-B2"][~ np.isnan(df["C1'-B1-B2"])]) | ||
2675 | - | ||
2676 | - os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True) | ||
2677 | - os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/angles/") | ||
2678 | - | ||
2679 | - GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True) | ||
2680 | - GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True) | ||
2681 | - GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True) | ||
2682 | - GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True) | ||
2683 | - GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True) | ||
2684 | - GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True) | ||
2685 | - GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True) | ||
2686 | - GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True) | ||
2687 | - | ||
2688 | - GMM_histo(lastc4p_p_o5p, "C4'-P-O5'", toric=True, hist=False, col='lightcoral') | ||
2689 | - GMM_histo(lastc1p_lastc4p_p, "C1'-C4'-P", toric=True, hist=False, col='limegreen') | ||
2690 | - GMM_histo(lastc5p_lastc4p_p, "C5'-C4'-P", toric=True, hist=False, col='tomato') | ||
2691 | - GMM_histo(p_o5p_c5p, "P-O5'-C5'", toric=True, hist=False, col='aquamarine') | ||
2692 | - GMM_histo(o5p_c5p_c4p, "O5'-C5'-C4'", toric=True, hist=False, col='goldenrod') | ||
2693 | - GMM_histo(c5p_c4p_c1p, "C5'-C4'-C1'", toric=True, hist=False, col='darkcyan') | ||
2694 | - GMM_histo(c4p_c1p_b1, "C4'-C1'-B1", toric=True, hist=False, col='deeppink') | ||
2695 | - GMM_histo(c1p_b1_b2, "C1'-B1-B2", toric=True, hist=False, col='indigo') | ||
2696 | - axes = plt.gca() | ||
2697 | - axes.set_ylim(0, 100) | ||
2698 | - plt.xlabel("Angle (Degres)") | ||
2699 | - plt.title("GMM of angles between HiRE-RNA beads") | ||
2700 | - plt.savefig(runDir + "/results/figures/GMM/HiRE-RNA/angles/GMM_angles_HiRE_RNA.png") | ||
2701 | - plt.close() | ||
2702 | - | ||
2703 | - # Torsions | ||
2704 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/torsions/angles_torsion_hire_RNA.csv")) | ||
2705 | - | ||
2706 | - p_o5_c5_c4 = list(df["P-O5'-C5'-C4'"][~ np.isnan(df["P-O5'-C5'-C4'"])]) | ||
2707 | - o5_c5_c4_c1 = list(df["O5'-C5'-C4'-C1'"][~ np.isnan(df["O5'-C5'-C4'-C1'"])]) | ||
2708 | - c5_c4_c1_b1 = list(df["C5'-C4'-C1'-B1"][~ np.isnan(df["C5'-C4'-C1'-B1"])]) | ||
2709 | - c4_c1_b1_b2 = list(df["C4'-C1'-B1-B2"][~ np.isnan(df["C4'-C1'-B1-B2"])]) | ||
2710 | - o5_c5_c4_psuiv = list(df["O5'-C5'-C4'-P°"][~ np.isnan(df["O5'-C5'-C4'-P°"])]) | ||
2711 | - c5_c4_psuiv_o5suiv = list(df["C5'-C4'-P°-O5'°"][~ np.isnan(df["C5'-C4'-P°-O5'°"])]) | ||
2712 | - c4_psuiv_o5suiv_c5suiv = list(df["C4'-P°-O5'°-C5'°"][~ np.isnan(df["C4'-P°-O5'°-C5'°"])]) | ||
2713 | - c1_c4_psuiv_o5suiv = list(df["C1'-C4'-P°-O5'°"][~ np.isnan(df["C1'-C4'-P°-O5'°"])]) | ||
2714 | - | ||
2715 | - os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True) | ||
2716 | - os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/torsions/") | ||
2717 | - | ||
2718 | - GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True) | ||
2719 | - GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True) | ||
2720 | - GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True) | ||
2721 | - GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True) | ||
2722 | - GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True) | ||
2723 | - GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True) | ||
2724 | - GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True) | ||
2725 | - GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True) | ||
2726 | - | ||
2727 | - GMM_histo(p_o5_c5_c4, "P-O5'-C5'-C4'", toric=True, hist=False, col='darkred') | ||
2728 | - GMM_histo(o5_c5_c4_c1, "O5'-C5'-C4'-C1'", toric=True, hist=False, col='chocolate') | ||
2729 | - GMM_histo(c5_c4_c1_b1, "C5'-C4'-C1'-B1", toric=True, hist=False, col='mediumvioletred') | ||
2730 | - GMM_histo(c4_c1_b1_b2, "C4'-C1'-B1-B2", toric=True, hist=False, col='cadetblue') | ||
2731 | - GMM_histo(o5_c5_c4_psuiv, "O5'-C5'-C4'-P°", toric=True, hist=False, col='darkkhaki') | ||
2732 | - GMM_histo(c5_c4_psuiv_o5suiv, "C5'-C4'-P°-O5'°", toric=True, hist=False, col='springgreen') | ||
2733 | - GMM_histo(c4_psuiv_o5suiv_c5suiv, "C4'-P°-O5'°-C5'°", toric=True, hist=False, col='indigo') | ||
2734 | - GMM_histo(c1_c4_psuiv_o5suiv, "C1'-C4'-P°-O5'°", toric=True, hist=False, col='gold') | ||
2735 | - plt.xlabel("Angle (Degrees)") | ||
2736 | - plt.title("GMM of torsion angles between HiRE-RNA beads") | ||
2737 | - plt.savefig("GMM_torsions_HiRE_RNA.png") | ||
2738 | - plt.close() | ||
2739 | - | ||
2740 | - os.chdir(runDir) | ||
2741 | - setproctitle("GMM (HiRE-RNA) finished") | ||
2742 | - | ||
2743 | -@trace_unhandled_exceptions | ||
2744 | -def gmm_hrna_basepair_type(type_LW, ntpair, data): | ||
2745 | - """ | ||
2746 | - function to plot the statistical figures you want | ||
2747 | - By type of pairing: | ||
2748 | - Superposition of GMMs of plane angles | ||
2749 | - Superposition of the histogram and the GMM of the distances | ||
2750 | - all in the same window | ||
2751 | - """ | ||
2752 | - | ||
2753 | - setproctitle(f"GMM (HiRE-RNA {type_LW} basepairs)") | ||
2754 | - | ||
2755 | - figure = plt.figure(figsize = (10, 10)) | ||
2756 | - plt.gcf().subplots_adjust(left = 0.1, bottom = 0.1, right = 0.9, top = 0.9, wspace = 0, hspace = 0.5) | ||
2757 | - | ||
2758 | - plt.subplot(2, 1, 1) | ||
2759 | - GMM_histo(data["211_angle"], f"{type_LW}_{ntpair}_C1'-B1-B1pair", toric=True, hist=False, col='cyan' ) | ||
2760 | - GMM_histo(data["112_angle"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair", toric=True, hist=False, col='magenta') | ||
2761 | - GMM_histo(data["3211_torsion"], f"{type_LW}_{ntpair}_C4'-C1'-B1-B1pair", toric=True, hist=False, col='black' ) | ||
2762 | - GMM_histo(data["1123_torsion"], f"{type_LW}_{ntpair}_B1-B1pair-C1'pair-C4'pair", toric=True, hist=False, col='maroon') | ||
2763 | - GMM_histo(data["alpha1"], f"{type_LW}_{ntpair}_alpha_1", toric=True, hist=False, col="yellow") | ||
2764 | - GMM_histo(data["alpha2"], f"{type_LW}_{ntpair}_alpha_2", toric=True, hist=False, col='olive') | ||
2765 | - plt.xlabel("Angle (degree)") | ||
2766 | - plt.title(f"GMM of plane angles for {type_LW} {ntpair} basepairs", fontsize=10) | ||
2767 | - | ||
2768 | - plt.subplot(2, 1, 2) | ||
2769 | - GMM_histo(data["Distance"], f"Distance between {type_LW} {ntpair} tips", toric=False, hist=False, col="cyan") | ||
2770 | - GMM_histo(data["dB1"], f"{type_LW} {ntpair} dB1", toric=False, hist=False, col="tomato") | ||
2771 | - GMM_histo(data["dB2"], f"{type_LW} {ntpair} dB2", toric=False, hist=False, col="goldenrod") | ||
2772 | - plt.xlabel("Distance (Angströms)") | ||
2773 | - plt.title(f"GMM of distances for {type_LW} {ntpair} basepairs", fontsize=10) | ||
2774 | - | ||
2775 | - plt.savefig(f"{type_LW}_{ntpair}_basepairs.png" ) | ||
2776 | - plt.close() | ||
2777 | - setproctitle(f"GMM (HiRE-RNA {type_LW} {ntpair} basepairs) finished") | ||
2778 | - | ||
2779 | -@trace_unhandled_exceptions | ||
2780 | -def gmm_hrna_basepairs(): | ||
2781 | - | ||
2782 | - setproctitle("GMM (HiRE-RNA basepairs)") | ||
2783 | - | ||
2784 | - df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/HiRE-RNA/basepairs/basepairs.csv")) | ||
2785 | - | ||
2786 | - lw = ["cWW", "tWW", "cWH", "tWH", "cHW", "tHW", "cWS", "tWS", "cSW", "tSW", "cHH", "tHH", "cSH", "tSH", "cHS", "tHS", "cSS", "tSS"] | ||
2787 | - | ||
2788 | - os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) | ||
2789 | - os.chdir(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/") | ||
2790 | - | ||
2791 | - for lw_type in lw: | ||
2792 | - data = df[df['type_LW'] == lw_type ] | ||
2793 | - if len(data): | ||
2794 | - for b1 in ['A','C','G','U']: | ||
2795 | - for b2 in ['A','C','G','U']: | ||
2796 | - thisbases = data[(data.nt1_res == b1)&(data.nt2_res == b2)] | ||
2797 | - if len(thisbases): | ||
2798 | - gmm_hrna_basepair_type(lw_type, b1+b2, thisbases) | ||
2799 | - | ||
2800 | - # colors = ['lightcoral', "lightseagreen", "black", "goldenrod", "olive", "steelblue", "silver", "deeppink", "navy", | ||
2801 | - # "sienna", "maroon", "orange", "mediumaquamarine", "tomato", "indigo", "orchid", "tan", "lime"] | ||
2802 | - # for lw_type, col in zip(lw, colors): | ||
2803 | - # data = df[df['type LW'] == lw_type] | ||
2804 | - # GMM_histo(data.Distance, lw_type, toric=False, hist=False, col=col) | ||
2805 | - # plt.xlabel('Distance (Angströms)') | ||
2806 | - # plt.title("GMM of distances between base tips ("+str(nt)+ " values)", fontsize=8) | ||
2807 | - # plt.savefig("distances_between_tips.png") | ||
2808 | - # plt.close() | ||
2809 | - | ||
2810 | - os.chdir(runDir) | ||
2811 | - setproctitle(f"GMM (HiRE-RNA basepairs) finished") | ||
2812 | - | ||
2813 | -def merge_jsons(): | ||
2814 | - # All atom distances | ||
2815 | - bonds = ["O3'-P", "OP3-P", "P-OP1", "P-OP2", "P-O5'", "O5'-C5'", "C5'-C4'", "C4'-O4'", "C4'-C3'", "O4'-C1'", "C1'-C2'", "C2'-O2'", "C2'-C3'", "C3'-O3'", "C1'-N9", | ||
2816 | - "N9-C8", "C8-N7", "N7-C5", "C5-C6", "C6-O6", "C6-N6", "C6-N1", "N1-C2", "C2-N2", "C2-N3", "N3-C4", "C4-N9", "C4-C5", | ||
2817 | - "C1'-N1", "N1-C6", "C6-C5", "C5-C4", "C4-N3", "N3-C2", "C2-O2", "C2-N1", "C4-N4", "C4-O4"] | ||
2818 | - bonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in bonds ] | ||
2819 | - concat_jsons(bonds, runDir + "/results/geometry/json/all_atom_distances.json") | ||
2820 | - | ||
2821 | - | ||
2822 | - # All atom torsions | ||
2823 | - torsions = ["Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Xhi", "Zeta"] | ||
2824 | - torsions = [ runDir + "/results/geometry/json/" + x + ".json" for x in torsions ] | ||
2825 | - concat_jsons(torsions, runDir + "/results/geometry/json/all_atom_torsions.json") | ||
2826 | - | ||
2827 | - # HiRE-RNA distances | ||
2828 | - hrnabonds = ["P-O5'", "O5'-C5'", "C5'-C4'", "C4'-C1'", "C1'-B1", "B1-B2", "C4'-P"] | ||
2829 | - hrnabonds = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnabonds ] | ||
2830 | - concat_jsons(hrnabonds, runDir + "/results/geometry/json/hirerna_distances.json") | ||
2831 | - | ||
2832 | - # HiRE-RNA angles | ||
2833 | - hrnaangles = ["P-O5'-C5'", "O5'-C5'-C4'", "C5'-C4'-C1'", "C4'-C1'-B1", "C1'-B1-B2", "C4'-P-O5'", "C5'-C4'-P", "C1'-C4'-P"] | ||
2834 | - hrnaangles = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnaangles ] | ||
2835 | - concat_jsons(hrnaangles, runDir + "/results/geometry/json/hirerna_angles.json") | ||
2836 | - | ||
2837 | - # HiRE-RNA torsions | ||
2838 | - hrnators = ["P-O5'-C5'-C4'", "O5'-C5'-C4'-C1'", "C5'-C4'-C1'-B1", "C4'-C1'-B1-B2", "C4'-P°-O5'°-C5'°", "C5'-C4'-P°-O5'°", "C1'-C4'-P°-O5'°", "O5'-C5'-C4'-P°"] | ||
2839 | - hrnators = [ runDir + "/results/geometry/json/" + x + ".json" for x in hrnators ] | ||
2840 | - concat_jsons(hrnators, runDir + "/results/geometry/json/hirerna_torsions.json") | ||
2841 | - | ||
2842 | - # HiRE-RNA basepairs | ||
2843 | - for nt1 in ['A', 'C', 'G', 'U']: | ||
2844 | - for nt2 in ['A', 'C', 'G', 'U']: | ||
2845 | - bps = glob.glob(runDir + f"/results/geometry/json/*{nt1}{nt2}*.json") | ||
2846 | - concat_jsons(bps, runDir + f"/results/geometry/json/hirerna_{nt1}{nt2}_basepairs.json") | ||
2847 | - | ||
2848 | - # Delete previous files | ||
2849 | - for f in bonds + torsions + hrnabonds + hrnaangles + hrnators: | ||
2850 | - try: | ||
2851 | - os.remove(f) | ||
2852 | - except FileNotFoundError: | ||
2853 | - pass | ||
2854 | - for f in glob.glob(runDir + "/results/geometry/json/t*.json"): | ||
2855 | - try: | ||
2856 | - os.remove(f) | ||
2857 | - except FileNotFoundError: | ||
2858 | - pass | ||
2859 | - for f in glob.glob(runDir + "/results/geometry/json/c*.json"): | ||
2860 | - try: | ||
2861 | - os.remove(f) | ||
2862 | - except FileNotFoundError: | ||
2863 | - pass | ||
2864 | - for f in glob.glob(runDir + "/results/geometry/json/Distance*.json"): | ||
2865 | - try: | ||
2866 | - os.remove(f) | ||
2867 | - except FileNotFoundError: | ||
2868 | - pass | ||
2869 | - | ||
2870 | -@trace_unhandled_exceptions | ||
2871 | -def loop(f): | ||
2872 | - return pd.read_csv(f) | ||
2873 | - | ||
2874 | -@trace_unhandled_exceptions | ||
2875 | -def concat_dataframes(fpath, outfilename): | ||
2876 | - """ | ||
2877 | - Concatenates the dataframes containing measures | ||
2878 | - and creates a new dataframe gathering all | ||
2879 | - """ | ||
2880 | - global idxQueue | ||
2881 | - thr_idx = idxQueue.get() | ||
2882 | - setproctitle(f"Worker {thr_idx+1} : Concatenation of {fpath}") | ||
2883 | - | ||
2884 | - liste = os.listdir(fpath) | ||
2885 | - pbar = tqdm(total=len(liste), position=thr_idx, desc="Preparing "+outfilename, leave=False) | ||
2886 | - df_tot = pd.read_csv(os.path.abspath(fpath + liste.pop()), engine="python") | ||
2887 | - #df=Parallel(n_jobs=-1, verbose=20)(delayed(loop)(os.path.abspath(fpath+liste[f])) for f in range (len(liste))) | ||
2888 | - #except : | ||
2889 | - # print(liste[f]) | ||
2890 | - | ||
2891 | - pbar.update(1) | ||
2892 | - for f in range(len(liste)): | ||
2893 | - # try : | ||
2894 | - df = pd.read_csv(os.path.abspath(fpath + liste.pop()), engine='python') | ||
2895 | - # except : | ||
2896 | - # print(liste[f]) | ||
2897 | - # continue | ||
2898 | - df_tot = pd.concat([df_tot, df], ignore_index=True) | ||
2899 | pbar.update(1) | 1198 | pbar.update(1) |
2900 | - #df = pd.concat(df, ignore_index=True) | 1199 | + return update |
2901 | - #pbar.update(1) | ||
2902 | - #df.to_csv(fpath + outfilename) | ||
2903 | - df_tot.to_csv(fpath + outfilename) | ||
2904 | - idxQueue.put(thr_idx) # replace the thread index in the queue | ||
2905 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | ||
2906 | - | ||
2907 | -def concat_jsons(flist, outfilename): | ||
2908 | - """ | ||
2909 | - Reads JSON files computed by the geometry jobs and merge them into a smaller | ||
2910 | - number of files | ||
2911 | - """ | ||
2912 | - | ||
2913 | - result = [] | ||
2914 | - for f in flist: | ||
2915 | - # if not path.isfile(f): | ||
2916 | - # continue: | ||
2917 | - with open(f, "rb") as infile: | ||
2918 | - result.append(json.load(infile)) | ||
2919 | - | ||
2920 | - # write the files | ||
2921 | - with open(outfilename, 'w', encoding='utf-8') as f: | ||
2922 | - json.dump(result, f, indent=4) | ||
2923 | 1200 | ||
2924 | def process_jobs(joblist): | 1201 | def process_jobs(joblist): |
2925 | """ | 1202 | """ |
... | @@ -2952,13 +1229,16 @@ if __name__ == "__main__": | ... | @@ -2952,13 +1229,16 @@ if __name__ == "__main__": |
2952 | DO_WADLEY_ANALYSIS = False | 1229 | DO_WADLEY_ANALYSIS = False |
2953 | DO_AVG_DISTANCE_MATRIX = False | 1230 | DO_AVG_DISTANCE_MATRIX = False |
2954 | DO_HIRE_RNA_MEASURES = False | 1231 | DO_HIRE_RNA_MEASURES = False |
1232 | + RESCAN_GMM_COMP_NUM = False | ||
2955 | try: | 1233 | try: |
2956 | - opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", "3d-folder=", "seq-folder=", "hire-rna" ]) | 1234 | + opts, _ = getopt.getopt( sys.argv[1:], "r:h", |
1235 | + [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", | ||
1236 | + "3d-folder=", "seq-folder=", "hire-rna", "rescan-nmodes" ]) | ||
2957 | except getopt.GetoptError as err: | 1237 | except getopt.GetoptError as err: |
2958 | print(err) | 1238 | print(err) |
2959 | sys.exit(2) | 1239 | sys.exit(2) |
2960 | - for opt, arg in opts: | ||
2961 | 1240 | ||
1241 | + for opt, arg in opts: | ||
2962 | if opt == "-h" or opt == "--help": | 1242 | if opt == "-h" or opt == "--help": |
2963 | print( "RNANet statistics, a script to build a multiscale RNA dataset from public data\n" | 1243 | print( "RNANet statistics, a script to build a multiscale RNA dataset from public data\n" |
2964 | "Developed by Louis Becquey, Khodor Hannoush, and Aglaé Tabot 2019/2021") | 1244 | "Developed by Louis Becquey, Khodor Hannoush, and Aglaé Tabot 2019/2021") |
... | @@ -2975,28 +1255,28 @@ if __name__ == "__main__": | ... | @@ -2975,28 +1255,28 @@ if __name__ == "__main__": |
2975 | print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.") | 1255 | print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.") |
2976 | print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.") | 1256 | print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.") |
2977 | print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data.") | 1257 | print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data.") |
2978 | - | 1258 | + print("--rescan-nmodes\t\tDo not assume the number of modes in distances and angles distributions, measure it.") |
2979 | sys.exit() | 1259 | sys.exit() |
2980 | - elif opt == '--version': | 1260 | + elif opt == "--version": |
2981 | print("RNANet statistics 1.6 beta") | 1261 | print("RNANet statistics 1.6 beta") |
2982 | sys.exit() | 1262 | sys.exit() |
2983 | elif opt == "-r" or opt == "--resolution": | 1263 | elif opt == "-r" or opt == "--resolution": |
2984 | assert float(arg) > 0.0 and float(arg) <= 20.0 | 1264 | assert float(arg) > 0.0 and float(arg) <= 20.0 |
2985 | res_thr = float(arg) | 1265 | res_thr = float(arg) |
2986 | - elif opt=='--3d-folder': | 1266 | + elif opt == "--3d-folder": |
2987 | path_to_3D_data = path.abspath(arg) | 1267 | path_to_3D_data = path.abspath(arg) |
2988 | if path_to_3D_data[-1] != '/': | 1268 | if path_to_3D_data[-1] != '/': |
2989 | path_to_3D_data += '/' | 1269 | path_to_3D_data += '/' |
2990 | - elif opt=='--seq-folder': | 1270 | + elif opt == "--seq-folder": |
2991 | path_to_seq_data = path.abspath(arg) | 1271 | path_to_seq_data = path.abspath(arg) |
2992 | if path_to_seq_data[-1] != '/': | 1272 | if path_to_seq_data[-1] != '/': |
2993 | path_to_seq_data += '/' | 1273 | path_to_seq_data += '/' |
2994 | - elif opt=='--from-scratch': | 1274 | + elif opt == "--from-scratch": |
2995 | DELETE_OLD_DATA = True | 1275 | DELETE_OLD_DATA = True |
2996 | DO_WADLEY_ANALYSIS = True | 1276 | DO_WADLEY_ANALYSIS = True |
2997 | - elif opt=="--distance-matrices": | 1277 | + elif opt == "--distance-matrices": |
2998 | DO_AVG_DISTANCE_MATRIX = True | 1278 | DO_AVG_DISTANCE_MATRIX = True |
2999 | - elif opt=='--wadley': | 1279 | + elif opt == "--wadley": |
3000 | DO_WADLEY_ANALYSIS = True | 1280 | DO_WADLEY_ANALYSIS = True |
3001 | os.makedirs(runDir+"/results/geometry/Pyle/distances/", exist_ok=True) | 1281 | os.makedirs(runDir+"/results/geometry/Pyle/distances/", exist_ok=True) |
3002 | os.makedirs(runDir+"/results/geometry/Pyle/classes_dist/", exist_ok=True) | 1282 | os.makedirs(runDir+"/results/geometry/Pyle/classes_dist/", exist_ok=True) |
... | @@ -3005,7 +1285,7 @@ if __name__ == "__main__": | ... | @@ -3005,7 +1285,7 @@ if __name__ == "__main__": |
3005 | os.makedirs(runDir+"/results/figures/GMM/Pyle/distances/", exist_ok=True) | 1285 | os.makedirs(runDir+"/results/figures/GMM/Pyle/distances/", exist_ok=True) |
3006 | os.makedirs(runDir+"/results/figures/GMM/Pyle/angles/", exist_ok=True) | 1286 | os.makedirs(runDir+"/results/figures/GMM/Pyle/angles/", exist_ok=True) |
3007 | os.makedirs(runDir+"/results/figures/GMM/Pyle/pseudotorsions/", exist_ok=True) | 1287 | os.makedirs(runDir+"/results/figures/GMM/Pyle/pseudotorsions/", exist_ok=True) |
3008 | - elif opt=='--hire-rna': | 1288 | + elif opt == "--hire-rna": |
3009 | DO_HIRE_RNA_MEASURES = True | 1289 | DO_HIRE_RNA_MEASURES = True |
3010 | os.makedirs(runDir + "/results/geometry/HiRE-RNA/distances/", exist_ok=True) | 1290 | os.makedirs(runDir + "/results/geometry/HiRE-RNA/distances/", exist_ok=True) |
3011 | os.makedirs(runDir + "/results/geometry/HiRE-RNA/angles/", exist_ok=True) | 1291 | os.makedirs(runDir + "/results/geometry/HiRE-RNA/angles/", exist_ok=True) |
... | @@ -3015,7 +1295,8 @@ if __name__ == "__main__": | ... | @@ -3015,7 +1295,8 @@ if __name__ == "__main__": |
3015 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True) | 1295 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True) |
3016 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True) | 1296 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True) |
3017 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) | 1297 | os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) |
3018 | - | 1298 | + elif opt == "rescan-nmodes": |
1299 | + RESCAN_GMM_COMP_NUM = True | ||
3019 | 1300 | ||
3020 | # Load mappings. famlist will contain only families with structures at this resolution threshold. | 1301 | # Load mappings. famlist will contain only families with structures at this resolution threshold. |
3021 | 1302 | ||
... | @@ -3053,7 +1334,8 @@ if __name__ == "__main__": | ... | @@ -3053,7 +1334,8 @@ if __name__ == "__main__": |
3053 | print("Old data deleted.") | 1334 | print("Old data deleted.") |
3054 | 1335 | ||
3055 | # Prepare the multiprocessing execution environment | 1336 | # Prepare the multiprocessing execution environment |
3056 | - nworkers = min(read_cpu_number()-1, 50) | 1337 | + global nworkers |
1338 | + nworkers = read_cpu_number()-1 | ||
3057 | print("Using", nworkers, "threads...") | 1339 | print("Using", nworkers, "threads...") |
3058 | thr_idx_mgr = Manager() | 1340 | thr_idx_mgr = Manager() |
3059 | idxQueue = thr_idx_mgr.Queue() | 1341 | idxQueue = thr_idx_mgr.Queue() |
... | @@ -3063,26 +1345,25 @@ if __name__ == "__main__": | ... | @@ -3063,26 +1345,25 @@ if __name__ == "__main__": |
3063 | # Define the tasks | 1345 | # Define the tasks |
3064 | joblist = [] | 1346 | joblist = [] |
3065 | 1347 | ||
3066 | - # Do eta/theta plots | 1348 | + # # Do eta/theta plots |
3067 | - #if n_unmapped_chains and DO_WADLEY_ANALYSIS: | 1349 | + # if n_unmapped_chains and DO_WADLEY_ANALYSIS: |
3068 | # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) | 1350 | # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) |
3069 | # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) | 1351 | # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) |
3070 | 1352 | ||
3071 | - # Do distance matrices for each family excl. LSU/SSU (will be processed later) | 1353 | + # # Do distance matrices for each family excl. LSU/SSU (will be processed later) |
3072 | - if DO_AVG_DISTANCE_MATRIX: | 1354 | + # if DO_AVG_DISTANCE_MATRIX: |
3073 | - extracted_chains = [] | 1355 | + # extracted_chains = [] |
3074 | - for file in os.listdir(path_to_3D_data + "rna_mapped_to_Rfam"): | 1356 | + # for file in os.listdir(path_to_3D_data + "rna_mapped_to_Rfam"): |
3075 | - if os.path.isfile(os.path.join(path_to_3D_data + "rna_mapped_to_Rfam", file)): | 1357 | + # if os.path.isfile(os.path.join(path_to_3D_data + "rna_mapped_to_Rfam", file)): |
3076 | - e1 = file.split('_')[0] | 1358 | + # e1 = file.split('_')[0] |
3077 | - e2 = file.split('_')[1] | 1359 | + # e2 = file.split('_')[1] |
3078 | - e3 = file.split('_')[2] | 1360 | + # e3 = file.split('_')[2] |
3079 | - extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3) | 1361 | + # extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3) |
3080 | - for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]: # Process the rRNAs later only 3 by 3 | 1362 | + # for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]: # Process the rRNAs later only 3 by 3 |
3081 | - joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) | 1363 | + # joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) |
3082 | - joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) | 1364 | + # joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) |
3083 | - | 1365 | + |
3084 | - # Do general family statistics | 1366 | + # # Do general family statistics |
3085 | - | ||
3086 | # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths | 1367 | # joblist.append(Job(function=stats_len)) # Computes figures about chain lengths |
3087 | # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) | 1368 | # joblist.append(Job(function=stats_freq)) # updates the database (nucleotide frequencies in families) |
3088 | # for f in famlist: | 1369 | # for f in famlist: |
... | @@ -3091,25 +1372,18 @@ if __name__ == "__main__": | ... | @@ -3091,25 +1372,18 @@ if __name__ == "__main__": |
3091 | # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) | 1372 | # joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database (identity matrices of families) |
3092 | 1373 | ||
3093 | 1374 | ||
3094 | - # Do geometric measures on all chains | 1375 | + # Do geometric measures |
3095 | - #print(liste_repres('/home/data/RNA/3D/latest_nr_list_4.0A.csv')) | ||
3096 | - | ||
3097 | - # print(measure_from_structure(os.listdir(path_to_3D_data + "rna_only")[0])) | ||
3098 | if n_unmapped_chains: | 1376 | if n_unmapped_chains: |
3099 | - # os.makedirs(runDir+"/results/geometry/all-atoms/distances/", exist_ok=True) | 1377 | + os.makedirs(runDir + "/results/geometry/all-atoms/distances/", exist_ok=True) |
3100 | - # liste_struct = os.listdir(path_to_3D_data + "rna_only") | 1378 | + # structure_list = os.listdir(path_to_3D_data + "rna_only") |
3101 | - liste_struct=liste_repres('/home/data/RNA/3D/latest_nr_list_4.0A.csv') | 1379 | + structure_list = representatives_from_nrlist(res_thr) |
3102 | - # if '4zdo_1_E.cif' in liste_struct: | 1380 | + for f in structure_list: |
3103 | - # liste_struct.remove('4zdo_1_E.cif') # weird cases to remove for now | ||
3104 | - # if '4zdp_1_E.cif' in liste_struct: | ||
3105 | - # liste_struct.remove('4zdp_1_E.cif') | ||
3106 | - for f in liste_struct: | ||
3107 | if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]): | 1381 | if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]): |
3108 | joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances | 1382 | joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances |
3109 | 1383 | ||
3110 | 1384 | ||
3111 | - process_jobs(joblist) | 1385 | + # process_jobs(joblist) |
3112 | - #count_occur_pyle_dist(runDir + '/results/geometry/Pyle/classes_dist/') | 1386 | + |
3113 | # Now process the memory-heavy tasks family by family | 1387 | # Now process the memory-heavy tasks family by family |
3114 | if DO_AVG_DISTANCE_MATRIX: | 1388 | if DO_AVG_DISTANCE_MATRIX: |
3115 | for f in LSU_set: | 1389 | for f in LSU_set: |
... | @@ -3124,36 +1398,31 @@ if __name__ == "__main__": | ... | @@ -3124,36 +1398,31 @@ if __name__ == "__main__": |
3124 | 1398 | ||
3125 | # finish the work after the parallel portions | 1399 | # finish the work after the parallel portions |
3126 | 1400 | ||
3127 | - # per_chain_stats() # per chain base frequencies en basepair types | 1401 | + # per_chain_stats() # per chain base frequencies and basepair types |
3128 | # seq_idty() # identity matrices from pre-computed .npy matrices | 1402 | # seq_idty() # identity matrices from pre-computed .npy matrices |
3129 | # stats_pairs() | 1403 | # stats_pairs() |
3130 | - concat_dataframes(runDir + '/results/geometry/Pyle/distances/', 'distances.csv') | ||
3131 | if n_unmapped_chains: | 1404 | if n_unmapped_chains: |
3132 | # general_stats() | 1405 | # general_stats() |
3133 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) | 1406 | os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True) |
3134 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) | 1407 | os.makedirs(runDir+"/results/geometry/json/", exist_ok=True) |
1408 | + concat_dataframes(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv') | ||
1409 | + if DO_HIRE_RNA_MEASURES: | ||
1410 | + concat_dataframes(runDir + '/results/geometry/HiRE-RNA/distances/', 'distances_HiRERNA.csv') | ||
1411 | + concat_dataframes(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_HiRERNA.csv') | ||
1412 | + concat_dataframes(runDir + '/results/geometry/HiRE-RNA/torsions/', 'torsions_HiRERNA.csv') | ||
1413 | + concat_dataframes(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs_HiRERNA.csv') | ||
1414 | + if DO_WADLEY_ANALYSIS: | ||
1415 | + concat_dataframes(runDir + '/results/geometry/Pyle/distances/', 'distances_pyle.csv') | ||
1416 | + concat_dataframes(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv') | ||
3135 | joblist = [] | 1417 | joblist = [] |
3136 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances.csv'))) | 1418 | + joblist.append(Job(function=gmm_aa_dists, args=(RESCAN_GMM_COMP_NUM))) |
3137 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv'))) | 1419 | + joblist.append(Job(function=gmm_aa_torsions, args=(RESCAN_GMM_COMP_NUM))) |
3138 | - # if DO_HIRE_RNA_MEASURES: | ||
3139 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv'))) | ||
3140 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_hire_RNA.csv'))) | ||
3141 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/torsions/', 'angles_torsion_hire_RNA.csv'))) | ||
3142 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs.csv'))) | ||
3143 | - # if DO_WADLEY_ANALYSIS: | ||
3144 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/distances/', 'distances_wadley.csv'))) | ||
3145 | - # joblist.append(Job(function=concat_dataframes, args=(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv'))) | ||
3146 | - # process_jobs(joblist) | ||
3147 | - joblist = [] | ||
3148 | - # joblist.append(Job(function=gmm_aa_dists, args=())) | ||
3149 | - # joblist.append(Job(function=gmm_aa_torsions, args=())) | ||
3150 | if DO_HIRE_RNA_MEASURES: | 1420 | if DO_HIRE_RNA_MEASURES: |
3151 | - # joblist.append(Job(function=gmm_hrna, args=())) | 1421 | + joblist.append(Job(function=gmm_hrna, args=(RESCAN_GMM_COMP_NUM))) |
3152 | - joblist.append(Job(function=gmm_hrna_basepairs, args=())) | 1422 | + joblist.append(Job(function=gmm_hrna_basepairs, args=(RESCAN_GMM_COMP_NUM))) |
3153 | - # if DO_WADLEY_ANALYSIS: | 1423 | + if DO_WADLEY_ANALYSIS: |
3154 | - # joblist.append(Job(function=gmm_wadley, args=())) | 1424 | + joblist.append(Job(function=gmm_pyle, args=(RESCAN_GMM_COMP_NUM))) |
3155 | - # joblist.append(Job(function=gmm_pyle, args=())) | ||
3156 | if len(joblist): | 1425 | if len(joblist): |
3157 | process_jobs(joblist) | 1426 | process_jobs(joblist) |
3158 | - #merge_jsons() | 1427 | + merge_jsons() |
3159 | 1428 | ... | ... |
-
mentioned in commit 8d00ac09
-
Please register or login to post a comment