Louis BECQUEY

NR class representatives only for rRNA distance matrices

...@@ -23,3 +23,5 @@ scripts/*.sh ...@@ -23,3 +23,5 @@ scripts/*.sh
23 scripts/*.tar 23 scripts/*.tar
24 scripts/measure.py 24 scripts/measure.py
25 scripts/recompute_some_chains.py 25 scripts/recompute_some_chains.py
26 +scripts/convert_rna_jsons.py
27 +scripts/recompute_family.py
......
...@@ -8,6 +8,9 @@ FEATURE CHANGES ...@@ -8,6 +8,9 @@ FEATURE CHANGES
8 The LSU and SSU are now aligned with Infernal options '--cpu 10 --mxsize 8192 --mxtau 0.1', which is slow, 8 The LSU and SSU are now aligned with Infernal options '--cpu 10 --mxsize 8192 --mxtau 0.1', which is slow,
9 requires up to 100 GB of RAM, and yields a suboptimal alignment (tau=0.1 is quite bad), but is homogenous with the other families. 9 requires up to 100 GB of RAM, and yields a suboptimal alignment (tau=0.1 is quite bad), but is homogenous with the other families.
10 - The LSU and SSU therefore have defined cm_coords fields, and therefore distance matrices can be computed. 10 - The LSU and SSU therefore have defined cm_coords fields, and therefore distance matrices can be computed.
11 + - Distances matrices are computed on all availables molecules of the family by default, but you can use statistics.py --non-redundant to only
12 + select the equivalence class representatives at a given resolution into account (new option). For storage reasons, rRNAs are always run in
13 + this mode (but this might change in the future : space required is 'only' ~300 GB).
11 - We now provide for download the renumbered (standardised) 3D MMCIF files, the nucleotides being numbered by their "index_chain" in the database. 14 - We now provide for download the renumbered (standardised) 3D MMCIF files, the nucleotides being numbered by their "index_chain" in the database.
12 - We now provide for download the sequences of the 3D chains aligned by Rfam family (without Rfam sequences, which have been removed). 15 - We now provide for download the sequences of the 3D chains aligned by Rfam family (without Rfam sequences, which have been removed).
13 - statistics.py now computes histograms and a density estimation with Gaussian mixture models for a large set of geometric parameters, 16 - statistics.py now computes histograms and a density estimation with Gaussian mixture models for a large set of geometric parameters,
...@@ -23,7 +26,7 @@ FEATURE CHANGES ...@@ -23,7 +26,7 @@ FEATURE CHANGES
23 BUG CORRECTIONS 26 BUG CORRECTIONS
24 - New code file geometric_stats.py 27 - New code file geometric_stats.py
25 - New automation script that starts from scratch 28 - New automation script that starts from scratch
26 - - Many small fixes 29 + - Many small fixes, leading to the support of many previously "known issues"
27 - Performance tweaks 30 - Performance tweaks
28 31
29 TECHNICAL CHANGES 32 TECHNICAL CHANGES
......
1 -6cfj_1_1X
2 -6cfj_1_2X
3 -5hcq_1_1X
4 -6cae_1_1X
5 -5hcq_1_2X
6 -5hcr_1_1X
7 -4z8c_1_1X
8 -5j4b_1_1X
9 -6xhy_1_1X
10 -6xhy_1_2X
11 -5j4b_1_2X
12 -4z8c_1_2X
13 -6cae_1_2X
14 -5j4c_1_1X
15 -5w4k_1_1X
16 -6of1_1_1X
17 -6xhw_1_1X
18 -5hcr_1_2X
19 -5hd1_1_1X
20 -5hcp_1_1X
21 -6of1_1_2X
22 -5hau_1_1W
23 -5j4c_1_2X
24 -5wis_1_1X
25 -6xhv_1_1X
26 -6xqd_1_1X
27 -6nd5_1_1X
28 -5w4k_1_2X
29 -6xhw_1_2X
30 -5hau_1_2W
31 -6xqd_1_2X
32 -6xhv_1_2X
33 -4y4p_1_1X
34 -6o97_1_1X
35 -5hcp_1_2X
36 -5doy_1_1X
37 -4zer_1_1X
38 -5wit_1_1X
39 -5hd1_1_2X
40 -6nd5_1_2X
41 -4z3s_1_1X
42 -7jql_1_1X
43 -7jqm_1_1X
44 -7jql_1_2X
45 -5wis_1_2X
46 -6nd6_1_1X
47 -6o97_1_2X
48 -4y4p_1_2X
49 -7jqm_1_2X
50 -4z3s_1_2X
51 -4zer_1_2X
52 -6uo1_1_2X
53 -6uo1_1_1X
54 -5doy_1_2X
55 -5wit_1_2X
56 -5f8k_1_1X
57 -6nd6_1_2X
58 -6xqe_1_1X
59 -6xqe_1_2X
60 -6n9e_1_1X
61 -6n9e_1_2X
62 -6n9f_1_1X
63 -5f8k_1_2X
64 -6n9f_1_2X
65 -6xz7_1_F
66 -6xzb_1_F2
67 -6xza_1_F2
68 -6y69_1_W
69 -5afi_1_V
70 -5afi_1_W
71 -6h4n_1_W
72 -5wdt_1_V
73 -5wfs_1_V
74 -5wdt_1_W
75 -5wfs_1_W
76 -5we4_1_V
77 -5we4_1_W
78 -5uq8_1_Y
79 -6c4i_1_Y
80 -6c4i_1_X
81 -6yef_1_X
82 -5zeb_1_V
83 -5zep_1_W
84 -5lzd_1_V
85 -5we6_1_V
86 -5wfk_1_V
87 -5wfk_1_W
88 -5we6_1_W
89 -5u4i_1_Y
90 -5uq7_1_Y
91 -5u4i_1_X
92 -5lza_1_V
93 -5wf0_1_V
94 -5wf0_1_W
95 -5zeu_1_V
96 -5l3p_1_X
97 -3jcj_1_V
98 -6gxm_1_X
99 -6gwt_1_X
100 -6gxn_1_X
101 -6gxo_1_X
102 -3j9y_1_V
103 -6o9k_1_Y
104 -6o7k_1_V
105 -5lzf_1_V
106 -3jcn_1_V
107 -5lzc_1_V
108 -5u4j_1_X
109 -5u4j_1_Z
110 -5lzb_1_V
111 -6h58_1_W
112 -6h58_1_WW
113 -5j8b_1_X
114 -4v7j_1_AV
115 -4v7j_1_BV
116 -4v7k_1_BV
117 -4v7k_1_AV
118 -4v7k_1_BW
119 -4v7k_1_AW
120 -4v7j_1_AW
121 -4v7j_1_BW
122 -4v4j_1_Z
123 -6i0v_1_B
124 -5k77_1_X
125 -5k77_1_V
126 -5k77_1_Y
127 -5k77_1_W
128 -5k77_1_Z
129 -4pei_1_X
130 -4pei_1_V
131 -4pei_1_W
132 -4pei_1_Z
133 -4pei_1_Y
134 -4a3c_1_P
135 -4a3e_1_P
136 -6lkq_1_U
137 -7k00_1_B
138 -6ys3_1_A
139 -6qdw_1_A
140 -6hcj_1_Q3
141 -6hcq_1_Q3
142 -6o8w_1_U
143 -5mmm_1_Z
144 -4w2e_1_W
145 -5j4b_1_1Y
146 -6cfj_1_1W
147 -5w4k_1_1Y
148 -6xhy_1_1W
149 -5wit_1_1W
150 -6cfj_1_1Y
151 -6cfj_1_2W
152 -5j4c_1_1W
153 -5wis_1_1Y
154 -5j4c_1_1Y
155 -6xhw_1_1W
156 -6cfj_1_2Y
157 -5wis_1_1W
158 -5j4b_1_1W
159 -6xhv_1_1W
160 -6xhy_1_2W
161 -5j4c_1_2W
162 -5j4b_1_2W
163 -5j4b_1_2Y
164 -5j4c_1_2Y
165 -5w4k_1_1W
166 -6nd5_1_1Y
167 -6xhw_1_2W
168 -5wis_1_2Y
169 -5wit_1_2W
170 -6xhv_1_2W
171 -5doy_1_1Y
172 -5w4k_1_2Y
173 -4y4p_1_1Y
174 -4z3s_1_1Y
175 -5doy_1_1W
176 -5doy_1_2Y
177 -6nd5_1_1W
178 -4z3s_1_2Y
179 -4z3s_1_1W
180 -5w4k_1_2W
181 -6nd5_1_2Y
182 -4y4p_1_2Y
183 -6uo1_1_2Y
184 -6uo1_1_2W
185 -4y4p_1_1W
186 -4z3s_1_2W
187 -6uo1_1_1Y
188 -6xhy_1_1Y
189 -6uo1_1_1W
190 -5wis_1_2W
191 -5wit_1_1Y
192 -6nd5_1_2W
193 -4y4p_1_2W
194 -5doy_1_2W
195 -5wit_1_2Y
196 -6xhv_1_1Y
197 -6xhy_1_2Y
198 -6xhw_1_1Y
199 -6xhw_1_2Y
200 -6ucq_1_1Y
201 -6xhv_1_2Y
202 -4v4i_1_Z
203 -6ucq_1_1X
204 -6ucq_1_2Y
205 -4w2e_1_X
206 -6ucq_1_2X
207 -7n1p_1_DT
208 -7n2u_1_DT
209 -6yss_1_W
210 -7n30_1_DT
211 -7n31_1_DT
212 -7n2c_1_DT
213 -5afi_1_Y
214 -5uq8_1_Z
215 -5wdt_1_Y
216 -5wfs_1_Y
217 -6ysr_1_W
218 -5we4_1_Y
219 -6yst_1_W
220 -5uq7_1_Z
221 -5we6_1_Y
222 -5wfk_1_Y
223 -5wf0_1_Y
224 -6o9j_1_V
225 -6ysu_1_W
226 -3j46_1_A
227 -5j8b_1_Y
228 -5j8b_1_W
229 -3bbv_1_Z
230 -5aj0_1_BV
231 -5aj0_1_BW
232 -4wt8_1_AB
233 -4wt8_1_BB
234 -4v4j_1_Y
235 -4v4i_1_Y
236 -5uq8_1_X
237 -5uq7_1_X
238 -4v4j_1_W
239 -4v4i_1_W
240 -4wt8_1_CS
241 -4wt8_1_DS
242 -4v4j_1_X
243 -4v4i_1_X
244 -6lkq_1_S
245 -5h5u_1_H
246 -7d6z_1_F
247 -5lze_1_Y
248 -5lze_1_V
249 -5lze_1_X
250 -3jcj_1_G
251 -6o7k_1_G
252 -6d30_1_C
253 -6j7z_1_C
254 -3er9_1_D
255 -5kal_1_Y
256 -4nia_1_3
257 -5kal_1_Z
258 -4nia_1_7
259 -4nia_1_4
260 -5new_1_C
261 -4nia_1_U
262 -4nia_1_6
263 -4oq9_1_7
264 -4nia_1_1
265 -4oq9_1_4
266 -4nia_1_8
267 -4oq9_1_8
268 -4nia_1_5
269 -2vrt_1_E
270 -4nia_1_W
271 -4oq9_1_6
272 -4oq8_1_D
273 -4nia_1_Z
274 -4oq9_1_W
275 -4oq9_1_5
276 -4nia_1_2
277 -2vrt_1_F
278 -4oq9_1_U
279 -4oq9_1_Z
280 -4oq9_1_2
281 -4oq9_1_3
282 -1ddl_1_E
283 -4oq9_1_1
284 -6rt5_1_A
285 -6rt5_1_E
286 -6lkq_1_T
287 -6ys3_1_B
288 -6qdw_1_B
289 -3jbv_1_B
290 -3jbu_1_B
291 -6do8_1_B
292 -6dpi_1_B
293 -6dp9_1_B
294 -6dpb_1_B
295 -6dmn_1_B
296 -6dpp_1_B
297 -6dpk_1_B
298 -6dpd_1_B
299 -6dot_1_B
300 -6dok_1_B
301 -6dp8_1_B
302 -6dpl_1_B
303 -6dpg_1_B
304 -6dou_1_B
305 -6dpc_1_B
306 -6do9_1_B
307 -6dmv_1_B
308 -6dp4_1_B
309 -6dpn_1_B
310 -6doj_1_B
311 -6dph_1_B
312 -6dos_1_B
313 -6doo_1_B
314 -6dp6_1_B
315 -6dox_1_B
316 -6dp5_1_B
317 -6dol_1_B
318 -6dp1_1_B
319 -6doz_1_B
320 -6dp7_1_B
321 -6doq_1_B
322 -6dpa_1_B
323 -6dom_1_B
324 -6dog_1_B
325 -6dop_1_B
326 -6doh_1_B
327 -6doa_1_B
328 -6don_1_B
329 -6dov_1_B
330 -6dpo_1_B
331 -6dod_1_B
332 -6dob_1_B
333 -6dow_1_B
334 -6dpm_1_B
335 -6dpf_1_B
336 -6dp3_1_B
337 -6dp2_1_B
338 -6dpe_1_B
339 -6dpj_1_B
340 -6dor_1_B
341 -6dof_1_B
342 -6dp0_1_B
343 -6doi_1_B
344 -6doc_1_B
345 -6doe_1_B
346 -6n6g_1_D
347 -4b3r_1_W
348 -4b3t_1_W
349 -4b3s_1_W
350 -7b5k_1_X
351 -5o2r_1_X
352 -5kcs_1_1X
353 -7n1p_1_PT
354 -7n2u_1_PT
355 -7n30_1_PT
356 -7n31_1_PT
357 -7n2c_1_PT
358 -6zvk_1_E2
359 -6zvk_1_H2
360 -7a01_1_E2
361 -7a01_1_H2
362 -6fti_1_U
363 -6fti_1_W
364 -6ftj_1_U
365 -6ftj_1_W
366 -6ftg_1_U
367 -6ftg_1_W
368 -6x1b_1_D
369 -6x1b_1_F
370 -5f6c_1_C
371 -6i0t_1_B
372 -1b2m_1_C
373 -1b2m_1_D
374 -1b2m_1_E
375 -2uxc_1_Y
376 -4a3g_1_P
377 -4a3j_1_P
378 -7k00_1_5
379 -5mmi_1_Z
380 -3j9m_1_U
381 -7a5k_1_U3
382 -6nu2_1_U
383 -7a5g_1_U3
384 -6nu3_1_U
385 -5c0y_1_C
386 -6n6f_1_D
387 -4ohy_1_B
388 -4oi1_1_B
389 -4oi0_1_B
390 -5ipl_1_3
391 -6utw_1_333
392 -5ipm_1_3
393 -5ipn_1_3
394 -4ylo_1_3
395 -4yln_1_6
396 -4ylo_1_6
397 -4yln_1_3
398 -4yln_1_9
399 -5lzf_1_Y
400 -1n32_1_Z
401 -5zsl_1_D
402 -5zsd_1_C
403 -5zsd_1_D
404 -5zsl_1_E
405 -4nku_1_D
406 -4nku_1_H
407 -1cwp_1_E
408 -6thn_1_A
409 -6qik_1_Y
410 -6rzz_1_Y
411 -6ri5_1_Y
412 -6qt0_1_Y
413 -6qtz_1_Y
414 -6t83_1_1B
415 -6t83_1_3B
416 -6t83_1_AA
417 -6t83_1_CA
418 -6s05_1_Y
419 -5jcs_1_X
420 -5fl8_1_X
421 -6ole_1_V
422 -6om0_1_V
423 -6oli_1_V
424 -6om7_1_V
425 -6w6l_1_V
426 -6olf_1_V
427 -3erc_1_G
428 -6of1_1_1W
429 -6cae_1_1Y
430 -6o97_1_1W
431 -6of1_1_1Y
432 -6of1_1_2W
433 -6o97_1_1Y
434 -6nd6_1_1Y
435 -6cae_1_1W
436 -6of1_1_2Y
437 -6cae_1_2Y
438 -6nd6_1_1W
439 -6cae_1_2W
440 -6o97_1_2Y
441 -6nd6_1_2Y
442 -6o97_1_2W
443 -6nd6_1_2W
444 -4wtm_1_T
445 -4wtm_1_P
446 -6gz4_1_BW
447 -6xz7_1_G
448 -6xzb_1_G2
449 -6gz5_1_BW
450 -6gz3_1_BW
451 -4hot_1_X
452 -6d2z_1_C
453 -7eh0_1_I
454 -4tu0_1_F
455 -4tu0_1_G
456 -6r9o_1_B
457 -6is0_1_C
458 -5lzc_1_X
459 -5lzb_1_X
460 -5lzd_1_Y
461 -5lzc_1_Y
462 -5lzb_1_Y
463 -6zvi_1_E
464 -6sv4_1_MC
465 -6sv4_1_MB
466 -7nrd_1_SM
467 -6i7o_1_MB
468 -6zvi_1_D
469 -6sv4_1_NB
470 -6sv4_1_NC
471 -6i7o_1_NB
472 -7nsq_1_V
473 -6swa_1_Q
474 -6swa_1_R
475 -6ole_1_T
476 -6om0_1_T
477 -6oli_1_T
478 -6om7_1_T
479 -6olf_1_T
480 -6w6l_1_T
481 -6tnu_1_M
482 -5mc6_1_M
483 -7nrc_1_SM
484 -6tb3_1_N
485 -7b7d_1_SM
486 -7b7d_1_SN
487 -6tnu_1_N
488 -7nrc_1_SN
489 -7nrd_1_SN
490 -6zot_1_C
491 -4qu6_1_B
492 -2uxb_1_X
493 -2x1f_1_B
494 -2x1a_1_B
495 -5o1y_1_B
496 -4kzy_1_I
497 -4kzz_1_I
498 -4kzx_1_I
499 -6dzi_1_H
500 -5zeu_1_A
501 -6evj_1_N
502 -6evj_1_M
503 -6wub_1_A
504 -6wua_1_A
505 -6mpi_1_W
506 -5mfx_1_B
507 -5w0m_1_J
508 -5bud_1_E
509 -5w0m_1_I
510 -5w0m_1_H
511 -4j7m_1_B
512 -5bud_1_D
513 -6a4e_1_B
514 -6a4e_1_D
515 -6hxx_1_AA
516 -6hxx_1_AB
517 -6hxx_1_AC
518 -6hxx_1_AD
519 -6hxx_1_AE
520 -6hxx_1_AF
521 -6hxx_1_AG
522 -6hxx_1_AH
523 -6hxx_1_AI
524 -6hxx_1_AJ
525 -6hxx_1_AK
526 -6hxx_1_AL
527 -6hxx_1_AM
528 -6hxx_1_AN
529 -6hxx_1_AO
530 -6hxx_1_AP
531 -6hxx_1_AQ
532 -6hxx_1_AR
533 -6hxx_1_AS
534 -6hxx_1_AT
535 -6hxx_1_AU
536 -6hxx_1_AV
537 -6hxx_1_AW
538 -6hxx_1_AX
539 -6hxx_1_AY
540 -6hxx_1_AZ
541 -6hxx_1_BA
542 -6hxx_1_BB
543 -6hxx_1_BC
544 -6hxx_1_BD
545 -6hxx_1_BE
546 -6hxx_1_BF
547 -6hxx_1_BG
548 -6hxx_1_BH
549 -6hxx_1_BI
550 -5odv_1_A
551 -5odv_1_B
552 -5odv_1_C
553 -5odv_1_D
554 -5odv_1_E
555 -5odv_1_F
556 -5odv_1_G
557 -5odv_1_H
558 -5odv_1_I
559 -5odv_1_J
560 -5odv_1_K
561 -5odv_1_L
562 -5odv_1_M
563 -5odv_1_N
564 -5odv_1_O
565 -5odv_1_P
566 -5odv_1_Q
567 -5odv_1_R
568 -5odv_1_S
569 -5odv_1_T
570 -5odv_1_U
571 -5odv_1_V
572 -5odv_1_W
573 -5odv_1_X
574 -6t34_1_A
575 -6t34_1_B
576 -6t34_1_C
577 -6t34_1_D
578 -6t34_1_E
579 -6t34_1_F
580 -6t34_1_G
581 -6t34_1_H
582 -6t34_1_I
583 -6t34_1_J
584 -6t34_1_K
585 -6t34_1_L
586 -6t34_1_M
587 -6t34_1_N
588 -6t34_1_O
589 -6t34_1_P
590 -6t34_1_Q
591 -6t34_1_R
592 -6t34_1_S
593 -6ip8_1_ZY
594 -6ip5_1_ZY
595 -6ip5_1_ZU
596 -6ip6_1_ZY
597 -6ip8_1_ZZ
598 -6ip6_1_ZZ
599 -6uu3_1_333
600 -6uu1_1_333
601 -3er8_1_H
602 -3er8_1_G
603 -3er8_1_F
604 -5o3j_1_B
605 -4dr7_1_B
606 -1i5l_1_Y
607 -1i5l_1_U
608 -4dr6_1_B
609 -6i2n_1_U
610 -4v68_1_A0
611 -6vyu_1_Y
612 -6vyw_1_Y
613 -6vz7_1_Y
614 -6vz5_1_Y
615 -6vz3_1_Y
616 -6vyy_1_Y
617 -6vyx_1_Y
618 -6vyz_1_Y
619 -6vz2_1_Y
620 -1mvr_1_1
621 -6vyt_1_Y
622 -1cgm_1_I
623 -3jb7_1_T
624 -3jb7_1_M
625 -3j0o_1_D
626 -3j0l_1_D
627 -3j0q_1_D
628 -3j0p_1_D
629 -2tmv_1_R
630 -5a79_1_R
631 -5a7a_1_R
632 -2om3_1_R
633 -2xea_1_R
634 -4v7e_1_AA
635 -4v7e_1_AC
636 -4wtl_1_T
637 -4wtl_1_P
638 -1xnq_1_W
639 -7n2v_1_DT
640 -4peh_1_Z
641 -1vq6_1_4
642 -4am3_1_D
643 -4am3_1_H
644 -4am3_1_I
645 -4lj0_1_C
646 -4lj0_1_D
647 -4lj0_1_E
648 -5lzy_1_HH
649 -4wtj_1_T
650 -4wtj_1_P
651 -4xbf_1_D
652 -6n6d_1_D
653 -6n6k_1_C
654 -6n6k_1_D
655 -3rtj_1_D
656 -6ty9_1_M
657 -6tz1_1_N
658 -6q1h_1_D
659 -6q1h_1_H
660 -6p7p_1_F
661 -6p7p_1_E
662 -6p7p_1_D
663 -6vm6_1_J
664 -6vm6_1_G
665 -6wan_1_K
666 -6wan_1_H
667 -6wan_1_G
668 -6wan_1_L
669 -6wan_1_I
670 -6ywo_1_F
671 -6wan_1_J
672 -4oau_1_A
673 -6ywo_1_E
674 -6ywo_1_K
675 -6vm6_1_I
676 -6vm6_1_H
677 -6ywo_1_I
678 -2a1r_1_C
679 -6m6v_1_F
680 -6m6v_1_E
681 -2a1r_1_D
682 -3gpq_1_E
683 -3gpq_1_F
684 -6o79_1_C
685 -6vm6_1_K
686 -6m6v_1_G
687 -6hyu_1_D
688 -1laj_1_R
689 -6ybv_1_K
690 -6sce_1_B
691 -6xl1_1_C
692 -6scf_1_I
693 -6scf_1_K
694 -6yud_1_K
695 -6yud_1_O
696 -6scf_1_M
697 -6yud_1_P
698 -6scf_1_L
699 -6yud_1_M
700 -6yud_1_Q
701 -6w11_1_C
702 -6o6x_1_D
703 -4ba2_1_R
704 -7bdv_1_F
705 -7bdv_1_H
706 -6o6x_1_C
707 -7did_1_C
708 -6o7b_1_C
709 -6o6v_1_C
710 -6wxx_1_Y
711 -6wxx_1_X
712 -6r7b_1_D
713 -6r9r_1_D
714 -6ov0_1_E
715 -6ov0_1_H
716 -6ov0_1_G
717 -6o6v_1_D
718 -6ov0_1_F
719 -6o7b_1_D
720 -5e02_1_C
721 -6r9r_1_E
722 -6r7b_1_E
723 -6o7i_1_I
724 -6o7h_1_K
725 -7l6t_1_C
726 -7jyy_1_F
727 -7jyy_1_E
728 -7jz0_1_F
729 -7jz0_1_E
730 -6rt6_1_A
731 -6rt6_1_E
732 -1y1y_1_P
733 -5zuu_1_I
734 -5zuu_1_G
735 -7am2_1_R1
736 -4peh_1_W
737 -4peh_1_V
738 -4peh_1_X
739 -4peh_1_Y
740 -7d8c_1_C
741 -6mkn_1_W
742 -7kl3_1_B
743 -4cxg_1_C
744 -4cxh_1_C
745 -4eya_1_E
746 -4eya_1_F
747 -4eya_1_Q
748 -4eya_1_R
749 -4ht9_1_E
750 -6z1p_1_AB
751 -6z1p_1_AA
752 -4ii9_1_C
753 -5mq0_1_3
754 -5uk4_1_X
755 -5uk4_1_V
756 -5uk4_1_W
757 -5uk4_1_U
758 -5f6c_1_E
759 -7nwh_1_HH
760 -4rcj_1_B
761 -1xnr_1_W
762 -6e0o_1_C
763 -6o75_1_D
764 -6o75_1_C
765 -6e0o_1_B
766 -3j06_1_R
767 -4eya_1_G
768 -4eya_1_H
769 -4eya_1_S
770 -4eya_1_T
771 -4dr4_1_V
772 -1ibl_1_Z
773 -1ibm_1_Z
774 -4dr5_1_V
775 -4d61_1_J
776 -7nwg_1_Q3
777 -5tbw_1_SR
778 -6hhq_1_SR
779 -6zvi_1_H
780 -6sv4_1_2B
781 -6sv4_1_2C
782 -6t83_1_2B
783 -6t83_1_A
784 -6i7o_1_2B
785 -6q8y_1_N
786 -6sv4_1_N
787 -6i7o_1_N
788 -6swa_1_S
789 -5k8h_1_A
790 -5z4a_1_B
791 -3jbu_1_V
792 -1h2c_1_R
793 -1h2d_1_S
794 -1h2d_1_R
795 -6szs_1_X
796 -5mgp_1_X
797 -6enu_1_X
798 -6enf_1_X
799 -6enj_1_X
800 -1pvo_1_L
801 -1pvo_1_G
802 -1pvo_1_H
803 -1pvo_1_J
804 -1pvo_1_K
805 -2ht1_1_K
806 -2ht1_1_J
807 -5sze_1_C
808 -6wre_1_D
809 -6i0u_1_B
810 -5zsa_1_C
811 -5zsa_1_D
812 -1n34_1_Z
813 -3pf5_1_S
814 -6ppn_1_A
815 -6ppn_1_I
816 -5flx_1_Z
817 -6eri_1_AX
818 -7k5l_1_R
819 -7d80_1_Y
820 -7du2_1_R
821 -4v8z_1_CX
822 -6kqe_1_I
823 -5uh8_1_I
824 -5vi5_1_Q
825 -4xln_1_T
826 -4xlr_1_T
827 -4xln_1_Q
828 -5i2d_1_K
829 -5i2d_1_V
830 -4xlr_1_Q
831 -6sty_1_C
832 -6sty_1_F
833 -2xs5_1_D
834 -3ok4_1_N
835 -3ok4_1_L
836 -3ok4_1_Z
837 -3ok4_1_4
838 -3ok4_1_V
839 -3ok4_1_X
840 -3ok4_1_P
841 -3ok4_1_H
842 -3ok4_1_J
843 -3ok4_1_R
844 -3ok4_1_T
845 -3ok4_1_2
846 -6n6h_1_D
847 -5wnt_1_B
848 -3b0u_1_B
849 -3b0u_1_A
850 -4x9e_1_G
851 -4x9e_1_H
852 -6z1p_1_BB
853 -6z1p_1_BA
854 -2uxd_1_X
855 -6ywe_1_BB
856 -3ol9_1_D
857 -3ol9_1_H
858 -3ol9_1_L
859 -3ol9_1_P
860 -3olb_1_L
861 -3olb_1_P
862 -3olb_1_D
863 -3olb_1_H
864 -3ol6_1_D
865 -3ol6_1_H
866 -3ol6_1_L
867 -3ol6_1_P
868 -3ol8_1_D
869 -3ol8_1_H
870 -3ol7_1_L
871 -3ol7_1_P
872 -3ol7_1_D
873 -3ol7_1_H
874 -3ol8_1_L
875 -3ol8_1_P
876 -6yrq_1_E
877 -6yrq_1_H
878 -6yrq_1_G
879 -6yrq_1_F
880 -6yrb_1_C
881 -6yrb_1_D
882 -6gz5_1_BV
883 -6gz4_1_BV
884 -6gz3_1_BV
885 -6fti_1_Q
886 -7njc_1_B
887 -4v7e_1_AB
888 -4v7e_1_AE
889 -4v7e_1_AD
890 -4x62_1_B
891 -4x64_1_B
892 -4x65_1_B
893 -1xmq_1_W
894 -4x66_1_B
895 -3t1h_1_W
896 -3t1y_1_W
897 -1xmo_1_W
898 -6kr6_1_B
899 -6z8k_1_X
900 -4csf_1_U
901 -4csf_1_Q
902 -4csf_1_G
903 -4csf_1_M
904 -4csf_1_K
905 -4csf_1_A
906 -4csf_1_I
907 -4csf_1_S
908 -4csf_1_C
909 -4csf_1_W
910 -4csf_1_O
911 -4csf_1_E
912 -6ywx_1_BB
913 -6th6_1_AA
914 -6skg_1_AA
915 -6skf_1_AA
916 -6q8y_1_M
917 -6i7o_1_M
918 -6zmw_1_W
919 -6ybv_1_W
920 -2fz2_1_D
921 -2xpj_1_D
922 -2vrt_1_H
923 -2vrt_1_G
924 -6r9m_1_B
925 -4nia_1_C
926 -4nia_1_A
927 -4nia_1_H
928 -4nia_1_N
929 -4nia_1_G
930 -4nia_1_D
931 -4nia_1_B
932 -4nia_1_I
933 -4nia_1_E
934 -4nia_1_M
935 -4oq9_1_I
936 -4oq9_1_G
937 -4oq9_1_C
938 -4oq9_1_H
939 -4oq9_1_N
940 -4oq9_1_A
941 -4oq9_1_D
942 -4oq9_1_E
943 -4oq9_1_M
944 -4oq9_1_B
945 -5uhc_1_I
946 -1uvn_1_F
947 -1uvn_1_B
948 -1uvn_1_D
949 -4wtk_1_T
950 -4wtk_1_P
951 -1vqn_1_4
952 -4oav_1_C
953 -4oav_1_A
954 -4i67_1_B
955 -6k32_1_T
956 -6k32_1_P
957 -5mmj_1_A
958 -5x8r_1_A
959 -6yw5_1_AA
960 -6ywe_1_AA
961 -6ywy_1_AA
962 -6ywx_1_AA
963 -3nvk_1_G
964 -3nvk_1_S
965 -1cwp_1_D
966 -1cwp_1_F
967 -5z4j_1_B
968 -5gmf_1_E
969 -5gmf_1_H
970 -6e4p_1_J
971 -5gmf_1_F
972 -5gmf_1_G
973 -5gmg_1_D
974 -5gmg_1_C
975 -6e4p_1_K
976 -3ie1_1_E
977 -3ie1_1_H
978 -3ie1_1_F
979 -4dr7_1_V
980 -3ie1_1_G
981 -3s4g_1_C
982 -3s4g_1_B
983 -2qqp_1_R
984 -1nb7_1_E
985 -1nb7_1_F
986 -4hos_1_X
987 -3p6y_1_T
988 -3p6y_1_V
989 -3p6y_1_U
990 -3p6y_1_Q
991 -3p6y_1_W
992 -5dto_1_B
993 -4cxh_1_X
994 -1uvj_1_F
995 -1uvj_1_D
996 -1uvj_1_E
997 -6kqd_1_I
998 -6kqd_1_S
999 -5uh5_1_I
1000 -1ytu_1_F
1001 -1ytu_1_D
1002 -4kzz_1_J
1003 -7a09_1_F
1004 -5t2c_1_AN
1005 -3j6b_1_E
1006 -4v4f_1_B6
1007 -4v4f_1_A5
1008 -4v4f_1_A3
1009 -4v4f_1_B0
1010 -4v4f_1_B9
1011 -4v4f_1_A2
1012 -4v4f_1_A8
1013 -4v4f_1_A1
1014 -4v4f_1_A9
1015 -4v4f_1_BZ
1016 -4v4f_1_B8
1017 -4v4f_1_B7
1018 -4v4f_1_B5
1019 -4v4f_1_A0
1020 -4v4f_1_A7
1021 -4v4f_1_A4
1022 -4v4f_1_AZ
1023 -4v4f_1_B3
1024 -4v4f_1_B1
1025 -4v4f_1_B4
1026 -4v4f_1_A6
1027 -4v4f_1_B2
1028 -7m4y_1_V
1029 -7m4x_1_V
1030 -6v3a_1_V
1031 -6v39_1_V
1032 -5it9_1_I
1033 -7jqc_1_I
1034 -5zsb_1_C
1035 -5zsb_1_D
1036 -5zsn_1_D
1037 -5zsn_1_E
1038 -6gfw_1_R
1039 -6zm6_1_X
1040 -6zm5_1_X
1041 -6zm6_1_W
1042 -6zm5_1_W
1043 -6n6e_1_D
1044 -4g7o_1_I
1045 -4g7o_1_S
1046 -5x22_1_S
1047 -5x22_1_I
1048 -5x21_1_I
1049 -5uh6_1_I
1050 -6l74_1_I
1051 -5uh9_1_I
1052 -7a5j_1_X
1053 -6sag_1_R
1054 -4udv_1_R
1055 -5zsc_1_D
1056 -5zsc_1_C
1057 -6woy_1_I
1058 -6wox_1_I
1059 -4gkk_1_W
1060 -4v9e_1_AG
1061 -4v9e_1_BM
1062 -4v9e_1_AM
1063 -4v9e_1_AA
1064 -4v9e_1_BA
1065 -4v9e_1_BG
1066 -5lzs_1_II
1067 -6fqr_1_C
1068 -6ha1_1_X
1069 -5kcr_1_1X
1070 -6uu4_1_333
1071 -6uu0_1_333
1072 -6uuc_1_333
1073 -6uu2_1_333
1074 -6xl9_1_R
1075 -6b6h_1_3
1076 -6xh8_1_3
1077 -6pb4_1_3
1078 -3m7n_1_Z
1079 -3m85_1_X
1080 -3m85_1_Z
1081 -3m85_1_Y
1082 -5wnp_1_B
1083 -5wnv_1_B
1084 -5yts_1_B
1085 -1utd_1_6
1086 -1utd_1_Z
1087 -1utd_1_4
1088 -1utd_1_7
1089 -1utd_1_9
1090 -1utd_1_5
1091 -1utd_1_3
1092 -1utd_1_2
1093 -1utd_1_8
1094 -1utd_1_1
1095 -6n6i_1_C
1096 -6n6i_1_D
1097 -6n6a_1_D
1098 -6ij2_1_F
1099 -6ij2_1_G
1100 -6ij2_1_H
1101 -6ij2_1_E
1102 -3u2e_1_D
1103 -3u2e_1_C
1104 -7eh1_1_I
1105 -5uef_1_C
1106 -5uef_1_D
1107 -7eh2_1_R
1108 -7eh2_1_I
1109 -4x4u_1_H
1110 -4afy_1_D
1111 -6oy5_1_I
1112 -6owl_1_B
1113 -6owl_1_C
1114 -4afy_1_C
1115 -4lq3_1_R
1116 -6s0m_1_C
1117 -6ymw_1_C
1118 -7a5g_1_J
1119 -6gx6_1_B
1120 -4k4s_1_D
1121 -4k4s_1_H
1122 -4k4t_1_H
1123 -4k4t_1_D
1124 -1xpu_1_G
1125 -1xpu_1_L
1126 -1xpr_1_L
1127 -1xpu_1_H
1128 -1xpo_1_K
1129 -1xpo_1_J
1130 -1xpu_1_J
1131 -1xpo_1_H
1132 -1xpr_1_J
1133 -1xpu_1_K
1134 -1xpr_1_K
1135 -1xpo_1_M
1136 -1xpo_1_L
1137 -1xpu_1_M
1138 -1xpr_1_M
1139 -1xpo_1_G
1140 -1xpr_1_H
1141 -1xpr_1_G
1142 -5x70_1_E
1143 -5x70_1_G
1144 -6gc5_1_F
1145 -6gc5_1_H
1146 -6gc5_1_G
1147 -1n1h_1_B
1148 -7n2v_1_PT
1149 -4ohz_1_B
1150 -6t83_1_6B
1151 -4gv6_1_C
1152 -4gv6_1_B
1153 -4gv3_1_C
1154 -4gv3_1_B
1155 -4gv9_1_E
1156 -6i7o_1_L
1157 -2a8v_1_D
1158 -6qx3_1_G
1159 -2xnr_1_C
1160 -4gkj_1_W
1161 -5y88_1_X
1162 -3j0o_1_H
1163 -3j0l_1_H
1164 -3j0p_1_H
1165 -3j0q_1_H
1166 -3j0o_1_F
1167 -3j0l_1_F
1168 -3j0p_1_F
1169 -3j0q_1_F
1170 -3j0o_1_B
1171 -3j0l_1_B
1172 -3j0o_1_C
1173 -3j0l_1_C
1174 -3j0q_1_C
1175 -3j0p_1_C
1176 -3j0o_1_A
1177 -3j0l_1_A
1178 -3j0q_1_A
1179 -3j0p_1_A
1180 -6ys3_1_V
1181 -6qdw_1_V
1182 -5hk0_1_F
1183 -4qm6_1_D
1184 -4qm6_1_C
1185 -4jzu_1_C
1186 -4jzv_1_C
1187 -5ytv_1_B
1188 -4k4z_1_P
1189 -4k4z_1_D
1190 -4k4x_1_L
1191 -4k4z_1_L
1192 -4k4x_1_D
1193 -4k4z_1_H
1194 -4k4x_1_H
1195 -4k4x_1_P
1196 -4a3b_1_P
1197 -4a3m_1_P
1198 -6u6y_1_E
1199 -6u6y_1_G
1200 -6u6y_1_F
1201 -6u6y_1_H
1202 -6qik_1_X
1203 -6rzz_1_X
1204 -6ri5_1_X
1205 -6qt0_1_X
1206 -6qtz_1_X
1207 -6s05_1_X
1208 -6t83_1_BB
1209 -6t83_1_4B
1210 -5fl8_1_Z
1211 -5jcs_1_Z
1212 -5mrc_1_BB
1213 -5mre_1_BB
1214 -5mrf_1_BB
1215 -3j46_1_P
1216 -4e6b_1_A
1217 -4e6b_1_B
1218 -6a6l_1_D
1219 -1uvi_1_D
1220 -1uvi_1_F
1221 -1uvi_1_E
1222 -4m7d_1_P
1223 -4k4u_1_D
1224 -4k4u_1_H
1225 -6rt7_1_E
1226 -6rt7_1_A
1227 -2voo_1_C
1228 -2voo_1_D
1229 -5k78_1_X
1230 -5k78_1_Y
1231 -4ylo_1_9
1232 -5vyc_1_I2
1233 -5vyc_1_I3
1234 -5vyc_1_I5
1235 -5vyc_1_I1
1236 -5vyc_1_I6
1237 -5vyc_1_I4
1238 -6ip8_1_2M
1239 -6ip5_1_2M
1240 -6ip6_1_2M
1241 -6qcs_1_M
1242 -7b5k_1_Z
1243 -4nia_1_O
1244 -4nia_1_J
1245 -4nia_1_K
1246 -4nia_1_L
1247 -4nia_1_F
1248 -4oq9_1_K
1249 -4oq9_1_O
1250 -4oq9_1_J
1251 -4oq9_1_F
1252 -4oq9_1_L
1253 -6r9q_1_B
1254 -7m4u_1_A
1255 -6v3a_1_SN1
1256 -6v3b_1_SN1
1257 -6v39_1_SN1
1258 -6v3e_1_SN1
1259 -4dr6_1_V
1260 -6kql_1_I
1261 -4eya_1_M
1262 -4eya_1_N
1263 -4eya_1_A
1264 -4eya_1_B
1265 -2wj8_1_D
1266 -2wj8_1_I
1267 -2wj8_1_L
1268 -2wj8_1_F
1269 -2wj8_1_C
1270 -2wj8_1_Q
1271 -2wj8_1_J
1272 -2wj8_1_P
1273 -2wj8_1_K
1274 -2wj8_1_E
1275 -2wj8_1_T
1276 -2wj8_1_B
1277 -2wj8_1_O
1278 -2wj8_1_N
1279 -2wj8_1_A
1280 -2wj8_1_H
1281 -2wj8_1_R
1282 -2wj8_1_M
1283 -2wj8_1_S
1284 -2wj8_1_G
1285 -4e6b_1_E
1286 -4e6b_1_F
1287 -6p71_1_I
1288 -3pdm_1_R
1289 -5det_1_P
1290 -5els_1_I
1291 -4n2s_1_B
1292 -5fl8_1_Y
1293 -5jcs_1_Y
1294 -4yoe_1_E
1295 -6ow3_1_I
1296 -6ovy_1_I
1297 -6oy6_1_I
1298 -4qvd_1_H
1299 -5gxi_1_B
1300 -7n06_1_G
1301 -7n06_1_H
1302 -7n06_1_I
1303 -7n06_1_J
1304 -7n06_1_K
1305 -7n06_1_L
1306 -7n33_1_G
1307 -7n33_1_H
1308 -7n33_1_I
1309 -7n33_1_J
1310 -7n33_1_K
1311 -7n33_1_L
1312 -5mc6_1_N
1313 -4eya_1_O
1314 -4eya_1_P
1315 -4eya_1_C
1316 -4eya_1_D
1317 -6htq_1_V
1318 -6htq_1_W
1319 -6htq_1_U
1320 -6uu6_1_333
1321 -5a0v_1_F
1322 -3avt_1_T
1323 -6d1v_1_C
1324 -4s2x_1_B
1325 -4s2y_1_B
1326 -5wnu_1_B
1327 -1vtm_1_R
1328 -5elt_1_F
1329 -5elt_1_E
1330 -6xlj_1_R
1331 -6u9x_1_H
1332 -6u9x_1_K
1333 -5elk_1_R
1334 -6okk_1_G
1335 -4cxg_1_A
1336 -4cxh_1_A
1337 -6bk8_1_I
1338 -4cxg_1_B
1339 -4cxh_1_B
1340 -5z4d_1_B
1341 -6o78_1_E
1342 -6xa1_1_BV
1343 -6ha8_1_X
1344 -1m8w_1_E
1345 -1m8w_1_F
1346 -5udi_1_B
1347 -5udl_1_B
1348 -5udk_1_B
1349 -5udj_1_B
1350 -5w5i_1_B
1351 -5w5i_1_D
1352 -5w5h_1_B
1353 -5w5h_1_D
1354 -4eya_1_K
1355 -4eya_1_L
1356 -4eya_1_I
1357 -4eya_1_J
1358 -4g9z_1_E
1359 -4g9z_1_F
1360 -3nma_1_B
1361 -3nma_1_C
1362 -6een_1_G
1363 -6een_1_I
1364 -6een_1_H
1365 -4wti_1_T
1366 -4wti_1_P
1367 -5l3p_1_Y
1368 -4hor_1_X
1369 -3rzo_1_R
1370 -2f4v_1_Z
1371 -1qln_1_R
1372 -3cw1_1_X
1373 -3cw1_1_W
1374 -7b0y_1_A
1375 -6ogy_1_M
1376 -6ogy_1_N
1377 -6uej_1_B
1378 -6ywy_1_BB
1379 -5ytx_1_B
1380 -4g0a_1_H
1381 -6r9p_1_B
1382 -3koa_1_C
1383 -4n48_1_D
1384 -4n48_1_G
1385 -6kug_1_B
1386 -6ktc_1_V
1387 -6ole_1_U
1388 -6om0_1_U
1389 -6olg_1_BV
1390 -6oli_1_U
1391 -6om7_1_U
1392 -6w6l_1_U
1393 -6olz_1_BV
1394 -6olf_1_U
1395 -5lzd_1_X
1396 -6m7k_1_B
1397 -3cd6_1_4
1398 -3cma_1_5
1399 -6n9e_1_2W
1400 -1vqo_1_4
1401 -1qvg_1_3
1402 -3cme_1_5
1403 -5lzd_1_W
1404 -5lze_1_W
1405 -5lzc_1_W
1406 -5lzb_1_W
1407 -3wzi_1_C
1408 -1n33_1_Z
1409 -6dti_1_W
1410 -3d2s_1_F
1411 -3d2s_1_H
1412 -5mrc_1_AA
1413 -5mre_1_AA
1414 -5mrf_1_AA
1415 -7jhy_1_Z
1416 -4wkr_1_C
1417 -4v99_1_EC
1418 -4v99_1_AC
1419 -4v99_1_BH
1420 -4v99_1_CH
1421 -4v99_1_AM
1422 -4v99_1_DC
1423 -4v99_1_JW
1424 -4v99_1_EH
1425 -4v99_1_BW
1426 -4v99_1_FW
1427 -4v99_1_AW
1428 -4v99_1_BC
1429 -4v99_1_BM
1430 -4v99_1_IC
1431 -4v99_1_EM
1432 -4v99_1_ER
1433 -4v99_1_IW
1434 -4v99_1_JH
1435 -4v99_1_JR
1436 -4v99_1_AH
1437 -4v99_1_GR
1438 -4v99_1_IR
1439 -4v99_1_BR
1440 -4v99_1_CW
1441 -4v99_1_HR
1442 -4v99_1_FH
1443 -4v99_1_HC
1444 -4v99_1_DW
1445 -4v99_1_GC
1446 -4v99_1_JC
1447 -4v99_1_DM
1448 -4v99_1_EW
1449 -4v99_1_AR
1450 -4v99_1_CR
1451 -4v99_1_JM
1452 -4v99_1_CC
1453 -4v99_1_IH
1454 -4v99_1_FR
1455 -4v99_1_CM
1456 -4v99_1_IM
1457 -4v99_1_FM
1458 -4v99_1_FC
1459 -4v99_1_GH
1460 -4v99_1_HM
1461 -4v99_1_HH
1462 -4v99_1_DR
1463 -4v99_1_HW
1464 -4v99_1_GW
1465 -4v99_1_DH
1466 -4v99_1_GM
1467 -6rt4_1_D
1468 -6rt4_1_C
1469 -6zvh_1_X
1470 -4dwa_1_D
1471 -6n6c_1_D
1472 -6n6j_1_C
1473 -6n6j_1_D
1474 -6p7q_1_E
1475 -6p7q_1_F
1476 -6p7q_1_D
1477 -6rcl_1_C
1478 -5jju_1_C
1479 -4ejt_1_G
1480 -6lkq_1_W
1481 -3qsu_1_P
1482 -3qsu_1_R
1483 -2xs7_1_B
1484 -1n38_1_B
1485 -4qvc_1_G
1486 -6mpf_1_W
1487 -6spc_1_A
1488 -6spe_1_A
1489 -6zvk_1_D2
1490 -7a01_1_D2
1491 -6fti_1_V
1492 -6ftj_1_V
1493 -6ftg_1_V
1494 -4g0a_1_G
1495 -4g0a_1_F
1496 -4g0a_1_E
1497 -2b2d_1_S
1498 -5hkc_1_C
1499 -1rmv_1_B
1500 -4qu7_1_X
1501 -4qu7_1_V
1502 -4qu7_1_U
1503 -6pmi_1_3
1504 -6pmj_1_3
1505 -5hjz_1_C
1506 6ydp_1_AA_1176-2737 1 6ydp_1_AA_1176-2737
1507 6ydw_1_AA_1176-2737 2 6ydw_1_AA_1176-2737
1508 7d1a_1_A_805-902 3 7d1a_1_A_805-902
...@@ -1514,18 +9,18 @@ ...@@ -1514,18 +9,18 @@
1514 7o7z_1_AH_144-220 9 7o7z_1_AH_144-220
1515 4c9d_1_D_29-1 10 4c9d_1_D_29-1
1516 4c9d_1_C_29-1 11 4c9d_1_C_29-1
1517 -7aih_1_1_2400-2963
1518 7aih_1_1_2984-3610 12 7aih_1_1_2984-3610
1519 -7ane_1_2_1904-2468 13 +7aih_1_1_2400-2963
1520 7ane_1_2_2489-3115 14 7ane_1_2_2489-3115
15 +7ane_1_2_1904-2468
1521 5g2x_1_A_595-692 16 5g2x_1_A_595-692
1522 -7aor_1_2_2020-2579
1523 7aor_1_2_2589-3210 17 7aor_1_2_2589-3210
18 +7aor_1_2_2020-2579
1524 7a5p_1_2_259-449 19 7a5p_1_2_259-449
1525 -7aor_1_A_2020-2579
1526 7aor_1_A_2589-3210 20 7aor_1_A_2589-3210
1527 -7am2_1_1_1904-2470 21 +7aor_1_A_2020-2579
1528 7am2_1_1_2491-3117 22 7am2_1_1_2491-3117
1529 -7ane_1_1_1904-2468 23 +7am2_1_1_1904-2470
1530 7ane_1_1_2489-3115 24 7ane_1_1_2489-3115
25 +7ane_1_1_1904-2468
1531 6uz7_1_8_2140-2825 26 6uz7_1_8_2140-2825
......
This diff could not be displayed because it is too large.
...@@ -920,14 +920,22 @@ def general_stats(): ...@@ -920,14 +920,22 @@ def general_stats():
920 920
921 @trace_unhandled_exceptions 921 @trace_unhandled_exceptions
922 def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s): 922 def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
923 - 923 + """
924 + Get the pairwise distances in one 3D molecule, given its aligned sequence (with gaps).
925 + Returns a tuple of numpy arrays:
926 + - The first is a boolean matrix, whose values are 1 if the distance is NaN (unresolved residue, or missing atom...), 0 otherwise
927 + - The second is the distance matrix (in angströms), unresolved positions are 0 (not NaN)
928 + - The third is the square of the second (square-distance matrix), unresolved positions are 0 (not NaN)
929 + """
930 +
924 # Identify the right 3D file 931 # Identify the right 3D file
925 - filename = '' 932 + filename = ""
926 for file in filelist: 933 for file in filelist:
927 if file.startswith(s.id.split("RF")[0].replace('-', '').replace('[', '_').replace(']', '_')): 934 if file.startswith(s.id.split("RF")[0].replace('-', '').replace('[', '_').replace(']', '_')):
928 filename = path_to_3D_data + "rna_mapped_to_Rfam/" + file 935 filename = path_to_3D_data + "rna_mapped_to_Rfam/" + file
929 break 936 break
930 if not len(filename): 937 if not len(filename):
938 + # chain is not in file list. Maybe you are in non-redundant mode and it is not a representative (normal case).
931 return None, None, None 939 return None, None, None
932 940
933 # Get the coordinates of every existing nt in the 3D file 941 # Get the coordinates of every existing nt in the 3D file
...@@ -938,9 +946,9 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s): ...@@ -938,9 +946,9 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
938 warn("No C1' atoms in " + filename.split('/')[-1] + ", ignoring") 946 warn("No C1' atoms in " + filename.split('/')[-1] + ", ignoring")
939 return None, None, None 947 return None, None, None
940 except FileNotFoundError: 948 except FileNotFoundError:
949 + warn(f"{label} not found in the mapped mmCIF files")
941 return None, None, None 950 return None, None, None
942 951
943 -
944 # Get the coordinates of every position in the alignment 952 # Get the coordinates of every position in the alignment
945 nb_gap = 0 953 nb_gap = 0
946 coordinates_with_gaps = [] 954 coordinates_with_gaps = []
...@@ -965,7 +973,6 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s): ...@@ -965,7 +973,6 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
965 d[i,j] = get_euclidian_distance(coordinates_with_gaps[i], coordinates_with_gaps[j]) 973 d[i,j] = get_euclidian_distance(coordinates_with_gaps[i], coordinates_with_gaps[j])
966 974
967 # Save the individual distance matrices 975 # Save the individual distance matrices
968 - # if f not in LSU_set and f not in SSU_set:
969 np.savetxt(runDir + '/results/distance_matrices/' + f + '_'+ label + '/'+ s.id.strip("\'") + '.csv', d, delimiter=",", fmt="%.3f") 976 np.savetxt(runDir + '/results/distance_matrices/' + f + '_'+ label + '/'+ s.id.strip("\'") + '.csv', d, delimiter=",", fmt="%.3f")
970 977
971 # For the average and sd, we want to consider only positions of the consensus model. This means: 978 # For the average and sd, we want to consider only positions of the consensus model. This means:
...@@ -979,7 +986,7 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s): ...@@ -979,7 +986,7 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
979 while cm_coords[i] is None: 986 while cm_coords[i] is None:
980 i += 1 987 i += 1
981 family_start = int(cm_coords[i]) 988 family_start = int(cm_coords[i])
982 - # c = np.zeros((family_end, family_end), dtype=np.float32) # new matrix of size of the consensus model for the family 989 + # new matrix of size of the consensus model for the family
983 c = np.NaN * np.ones((family_end, family_end), dtype=np.float32) 990 c = np.NaN * np.ones((family_end, family_end), dtype=np.float32)
984 # set to NaN zones that never exist in the 3D data 991 # set to NaN zones that never exist in the 3D data
985 for i in range(family_start-1): 992 for i in range(family_start-1):
...@@ -1000,8 +1007,8 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s): ...@@ -1000,8 +1007,8 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
1000 return 1-np.isnan(c).astype(int), np.nan_to_num(c), np.nan_to_num(c*c) 1007 return 1-np.isnan(c).astype(int), np.nan_to_num(c), np.nan_to_num(c*c)
1001 1008
1002 @trace_unhandled_exceptions 1009 @trace_unhandled_exceptions
1003 -def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): 1010 +def get_avg_std_distance_matrix(f, res, consider_all_atoms=False, redundancy=False, multithread=False):
1004 - np.seterr(divide='ignore') # ignore division by zero issues 1011 + # np.seterr(divide='ignore') # ignore division by zero issues
1005 1012
1006 if consider_all_atoms: 1013 if consider_all_atoms:
1007 label = "base" 1014 label = "base"
...@@ -1009,23 +1016,38 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): ...@@ -1009,23 +1016,38 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False):
1009 label = "backbone" 1016 label = "backbone"
1010 1017
1011 if not multithread: 1018 if not multithread:
1012 - # This function call is for ONE worker. 1019 + # This function call is for ONE worker. Get a worker number for it to position the progress bar.
1013 - # Get a worker number for it to position the progress bar
1014 global idxQueue 1020 global idxQueue
1015 thr_idx = idxQueue.get() 1021 thr_idx = idxQueue.get()
1016 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} {f} {label} distance matrices") 1022 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} {f} {label} distance matrices")
1017 1023
1018 os.makedirs(runDir + '/results/distance_matrices/' + f + '_' + label, exist_ok=True ) 1024 os.makedirs(runDir + '/results/distance_matrices/' + f + '_' + label, exist_ok=True )
1019 - 1025 +
1020 - align = AlignIO.read(path_to_seq_data + f"realigned/{f}_3d_only.afa", "fasta") 1026 + # Get the list of 3D files. They should exist in the folder from the last RNANet run with --extract option.
1021 - ncols = align.get_alignment_length() 1027 + if redundancy:
1028 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
1029 + conn.execute('pragma journal_mode=wal')
1030 + r = sql_ask_database(conn, f"SELECT structure_id, '_1_', chain_name, '_', CAST(pdb_start AS TEXT), '-', CAST(pdb_end AS TEXT) FROM chain WHERE rfam_acc='{f}' AND issue=0;")
1031 + filelist = sorted([ ''.join(list(x))+'.cif' for x in r ])
1032 + else:
1033 + filelist = sorted(representatives_from_nrlist(res, mapped_to=f))
1034 +
1035 + # Open the 3D-only alignment. keep only files that will be considered in 3D (e.g. representatives)
1036 + temp_align = AlignIO.read(path_to_seq_data + f"realigned/{f}_3d_only.afa", "fasta")
1037 + align = []
1038 + for s in temp_align:
1039 + filename = ""
1040 + for file in filelist:
1041 + if file.startswith(s.id.split("RF")[0].replace('-', '').replace('[', '_').replace(']', '_')):
1042 + align.append(s)
1043 + break
1044 + ncols = temp_align.get_alignment_length()
1022 found = 0 1045 found = 0
1023 notfound = 0 1046 notfound = 0
1047 +
1024 # retrieve the mappings between this family's alignment and the CM model: 1048 # retrieve the mappings between this family's alignment and the CM model:
1025 with sqlite3.connect(runDir + "/results/RNANet.db") as conn: 1049 with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
1026 conn.execute('pragma journal_mode=wal') 1050 conn.execute('pragma journal_mode=wal')
1027 - r = sql_ask_database(conn, f"SELECT structure_id, '_1_', chain_name, '_', CAST(pdb_start AS TEXT), '-', CAST(pdb_end AS TEXT) FROM chain WHERE rfam_acc='{f}';")
1028 - filelist = sorted([ ''.join(list(x))+'.cif' for x in r ])
1029 r = sql_ask_database(conn, f"SELECT cm_coord FROM align_column WHERE rfam_acc = '{f}' AND index_ali > 0 ORDER BY index_ali ASC;") 1051 r = sql_ask_database(conn, f"SELECT cm_coord FROM align_column WHERE rfam_acc = '{f}' AND index_ali > 0 ORDER BY index_ali ASC;")
1030 cm_coords = [ x[0] for x in r ] # len(cm_coords) is the number of saved columns. There are many None values in the list. 1052 cm_coords = [ x[0] for x in r ] # len(cm_coords) is the number of saved columns. There are many None values in the list.
1031 i = len(cm_coords)-1 1053 i = len(cm_coords)-1
...@@ -1042,7 +1064,7 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): ...@@ -1042,7 +1064,7 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False):
1042 counts = np.zeros((family_end, family_end)) 1064 counts = np.zeros((family_end, family_end))
1043 avg = np.zeros((family_end, family_end)) 1065 avg = np.zeros((family_end, family_end))
1044 std = np.zeros((family_end, family_end)) 1066 std = np.zeros((family_end, family_end))
1045 - 1067 +
1046 if not multithread: 1068 if not multithread:
1047 pbar = tqdm(total = len(align), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} {label} distance matrices", unit="chains", leave=False) 1069 pbar = tqdm(total = len(align), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} {label} distance matrices", unit="chains", leave=False)
1048 pbar.update(0) 1070 pbar.update(0)
...@@ -1054,16 +1076,15 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): ...@@ -1054,16 +1076,15 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False):
1054 avg += d 1076 avg += d
1055 std += dsquared 1077 std += dsquared
1056 else: 1078 else:
1079 + # d is None means the considered RNA is not in the filelist (e.g., not a representative), or is not found.
1057 notfound += 1 1080 notfound += 1
1058 pbar.update(1) 1081 pbar.update(1)
1059 pbar.close() 1082 pbar.close()
1060 else: 1083 else:
1061 # We split the work for one family on multiple workers. 1084 # We split the work for one family on multiple workers.
1062 -
1063 p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=nworkers) 1085 p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=nworkers)
1064 try: 1086 try:
1065 fam_pbar = tqdm(total=len(align), desc=f"{f} {label} pair distances", position=0, unit="chain", leave=True) 1087 fam_pbar = tqdm(total=len(align), desc=f"{f} {label} pair distances", position=0, unit="chain", leave=True)
1066 - # Apply work_pssm_remap to each RNA family
1067 for i, (contrib, d, dsquared) in enumerate(p.imap_unordered(partial(par_distance_matrix, filelist, f, label, cm_coords, consider_all_atoms), align, chunksize=1)): 1088 for i, (contrib, d, dsquared) in enumerate(p.imap_unordered(partial(par_distance_matrix, filelist, f, label, cm_coords, consider_all_atoms), align, chunksize=1)):
1068 if d is not None: 1089 if d is not None:
1069 found += 1 1090 found += 1
...@@ -1128,15 +1149,15 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): ...@@ -1128,15 +1149,15 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False):
1128 if not multithread: 1149 if not multithread:
1129 idxQueue.put(thr_idx) # replace the thread index in the queue 1150 idxQueue.put(thr_idx) # replace the thread index in the queue
1130 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") 1151 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
1131 - else: 1152 + # else:
1132 - # basically, for the rRNAs 1153 + # # basically, for the rRNAs
1133 - # we delete the unique csv files for each chain, they wheight hundreds of gigabytes together 1154 + # # we delete the unique csv files for each chain, they wheight hundreds of gigabytes together
1134 - warn(f"Removing {f} ({label}) individual distance matrices, they weight too much. keeping the averages and standard deviations.") 1155 + # warn(f"Removing {f} ({label}) individual distance matrices, they weight too much. keeping the averages and standard deviations.")
1135 - for csv in glob.glob(runDir + '/results/distance_matrices/' + f + '_'+ label + "/*-" + f + ".csv"): 1156 + # for csv in glob.glob(runDir + '/results/distance_matrices/' + f + '_'+ label + "/*-" + f + ".csv"):
1136 - try: 1157 + # try:
1137 - os.remove(csv) 1158 + # os.remove(csv)
1138 - except FileNotFoundError: 1159 + # except FileNotFoundError:
1139 - pass 1160 + # pass
1140 return 0 1161 return 0
1141 1162
1142 @trace_unhandled_exceptions 1163 @trace_unhandled_exceptions
...@@ -1195,7 +1216,7 @@ def nt_3d_centers(cif_file, consider_all_atoms): ...@@ -1195,7 +1216,7 @@ def nt_3d_centers(cif_file, consider_all_atoms):
1195 try: 1216 try:
1196 structure = MMCIFParser().get_structure(cif_file, cif_file) 1217 structure = MMCIFParser().get_structure(cif_file, cif_file)
1197 except Exception as e: 1218 except Exception as e:
1198 - warn(f"{cif_file.split('/')[-1]} : {e}", error=True) 1219 + warn(f"\n{cif_file.split('/')[-1]} : {e}", error=True)
1199 with open(runDir + "/errors.txt", "a") as f: 1220 with open(runDir + "/errors.txt", "a") as f:
1200 f.write(f"Exception in nt_3d_centers({cif_file.split('/')[-1]})\n") 1221 f.write(f"Exception in nt_3d_centers({cif_file.split('/')[-1]})\n")
1201 f.write(str(e)) 1222 f.write(str(e))
...@@ -1225,24 +1246,54 @@ def nt_3d_centers(cif_file, consider_all_atoms): ...@@ -1225,24 +1246,54 @@ def nt_3d_centers(cif_file, consider_all_atoms):
1225 result.append(res) 1246 result.append(res)
1226 return(result) 1247 return(result)
1227 1248
1228 -def representatives_from_nrlist(res): 1249 +def representatives_from_nrlist(res, mapped_to=None):
1250 + """
1251 + Returns the list of filenames corresponding to the 3D cif files of structures
1252 + that represent a "cluster" (a redundancy class) at the given resolution.
1253 +
1254 + If mapped is not None, then the database is searched for a mapping to a family.
1255 + """
1256 +
1257 + # Read the NR file
1229 nr_code = min([i for i in [1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 20.0] if i >= res]) 1258 nr_code = min([i for i in [1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 20.0] if i >= res])
1230 - fpath = f"/home/data/RNA/3D/latest_nr_list_{nr_code}A.csv" 1259 + fpath = f"{path_to_3D_data}/latest_nr_list_{nr_code}A.csv"
1231 repres = [] 1260 repres = []
1232 df = pd.read_csv(os.path.abspath(fpath)) 1261 df = pd.read_csv(os.path.abspath(fpath))
1262 +
1263 + # define a function to transform a code into a filename
1264 + def query_mapping_to(structure, model, chain, family):
1265 + if family is None:
1266 + return structure + '_' + model + '_' + chain + ".cif"
1267 +
1268 + # if we need a mapping start and end, query database
1269 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
1270 + conn.execute('pragma journal_mode=wal')
1271 + r = sql_ask_database(conn, f"SELECT pdb_start, pdb_end FROM chain WHERE rfam_acc='{family}' AND structure_id='{structure}' AND chain_name='{chain}' AND issue=0;")
1272 +
1273 + if not len(r):
1274 + # there is no chain named like this and mapped to this family
1275 + return None
1276 +
1277 + if len(r) > 1:
1278 + warn(f"Several entries found for structure {structure}-{chain} ({family}) : {len(r)} entries")
1279 + return structure + '_' + model + '_' + chain + '_' + str(r[0][0]) + '-' + str(r[0][1]) + ".cif"
1280 +
1281 + # build the list
1233 for i in range(df.shape[0]): 1282 for i in range(df.shape[0]):
1234 up_name = df["representative"][i] 1283 up_name = df["representative"][i]
1235 if '+' in up_name: 1284 if '+' in up_name:
1236 up_name = up_name.split('+') 1285 up_name = up_name.split('+')
1237 for i in range(len(up_name)): 1286 for i in range(len(up_name)):
1238 chain = up_name[i].split('|') 1287 chain = up_name[i].split('|')
1239 - chain = chain[0].lower() + '_' + chain[1] + '_' + chain[2] 1288 + reference = query_mapping_to(chain[0].lower(), chain[1], chain[2], mapped_to)
1240 - repres.append(chain + '.cif') 1289 + if reference is not None:
1290 + repres.append(reference)
1241 else : 1291 else :
1242 up_name = up_name.split('|') 1292 up_name = up_name.split('|')
1243 - low_name = up_name[0].lower() + '_' + up_name[1] + '_' + up_name[2] 1293 + reference = query_mapping_to(up_name[0].lower(), up_name[1], up_name[2], mapped_to)
1244 - repres.append(low_name + '.cif') 1294 + if reference is not None:
1245 - 1295 + repres.append(reference)
1296 +
1246 return repres 1297 return repres
1247 1298
1248 def log_to_pbar(pbar): 1299 def log_to_pbar(pbar):
...@@ -1281,11 +1332,12 @@ if __name__ == "__main__": ...@@ -1281,11 +1332,12 @@ if __name__ == "__main__":
1281 DELETE_OLD_DATA = False 1332 DELETE_OLD_DATA = False
1282 DO_WADLEY_ANALYSIS = False 1333 DO_WADLEY_ANALYSIS = False
1283 DO_AVG_DISTANCE_MATRIX = False 1334 DO_AVG_DISTANCE_MATRIX = False
1335 + REDUNDANT_DIST_MAT = True
1284 DO_HIRE_RNA_MEASURES = False 1336 DO_HIRE_RNA_MEASURES = False
1285 RESCAN_GMM_COMP_NUM = False 1337 RESCAN_GMM_COMP_NUM = False
1286 try: 1338 try:
1287 opts, _ = getopt.getopt( sys.argv[1:], "r:h", 1339 opts, _ = getopt.getopt( sys.argv[1:], "r:h",
1288 - [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", 1340 + [ "help", "from-scratch", "wadley", "distance-matrices", "non-redundant", "resolution=",
1289 "3d-folder=", "seq-folder=", "hire-rna", "rescan-nmodes" ]) 1341 "3d-folder=", "seq-folder=", "hire-rna", "rescan-nmodes" ])
1290 except getopt.GetoptError as err: 1342 except getopt.GetoptError as err:
1291 print(err) 1343 print(err)
...@@ -1301,14 +1353,17 @@ if __name__ == "__main__": ...@@ -1301,14 +1353,17 @@ if __name__ == "__main__":
1301 print() 1353 print()
1302 print("-r 20.0 [ --resolution=20.0 ]\tCompute statistics using chains of resolution 20.0A or better.") 1354 print("-r 20.0 [ --resolution=20.0 ]\tCompute statistics using chains of resolution 20.0A or better.")
1303 print("--3d-folder=…\t\t\tPath to a folder containing the 3D data files. Required subfolders should be:" 1355 print("--3d-folder=…\t\t\tPath to a folder containing the 3D data files. Required subfolders should be:"
1304 - "\n\t\t\t\t\tdatapoints/\t\tFinal results in CSV file format.") 1356 + "\n\t\t\t\t\tdatapoints/\t\tFinal results in CSV file format."
1357 + "\n\t\t\t\t\trna_mapped_to_Rfam/\tmmCIF files produced by RNANet (using --extract)."
1358 + "\n\t\t\t\t\trna_only/\t\tmmCIF files produced by RNANet in no-homology mode.")
1305 print("--seq-folder=…\t\t\tPath to a folder containing the sequence and alignment files. Required subfolder:" 1359 print("--seq-folder=…\t\t\tPath to a folder containing the sequence and alignment files. Required subfolder:"
1306 "\n\t\t\t\t\trealigned/\t\tSequences, covariance models, and alignments by family") 1360 "\n\t\t\t\t\trealigned/\t\tSequences, covariance models, and alignments by family")
1307 print("--from-scratch\t\t\tDo not use precomputed results from past runs, recompute everything") 1361 print("--from-scratch\t\t\tDo not use precomputed results from past runs, recompute everything")
1308 print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.") 1362 print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.")
1363 + print("--non-redundant\t\t\tIn distance matrix computation, only use the equivalence class representatives.\n\t\t\t\t Does not apply to rRNAs, where the option is always True.")
1309 print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.") 1364 print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.")
1310 - print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data.") 1365 + print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model,\n\t\t\t\t and plot GMMs on the data.")
1311 - print("--rescan-nmodes\t\tDo not assume the number of modes in distances and angles distributions, measure it.") 1366 + print("--rescan-nmodes\t\t\tDo not assume the number of modes in distances and angles distributions, measure it.")
1312 sys.exit() 1367 sys.exit()
1313 elif opt == "--version": 1368 elif opt == "--version":
1314 print("RNANet statistics 1.6 beta") 1369 print("RNANet statistics 1.6 beta")
...@@ -1350,6 +1405,8 @@ if __name__ == "__main__": ...@@ -1350,6 +1405,8 @@ if __name__ == "__main__":
1350 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True) 1405 os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True)
1351 elif opt == "--rescan-nmodes": 1406 elif opt == "--rescan-nmodes":
1352 RESCAN_GMM_COMP_NUM = True 1407 RESCAN_GMM_COMP_NUM = True
1408 + elif opt == "--non-redundant":
1409 + REDUNDANT_DIST_MAT = False
1353 1410
1354 # Load mappings. famlist will contain only families with structures at this resolution threshold. 1411 # Load mappings. famlist will contain only families with structures at this resolution threshold.
1355 1412
...@@ -1373,7 +1430,7 @@ if __name__ == "__main__": ...@@ -1373,7 +1430,7 @@ if __name__ == "__main__":
1373 ignored = families[families.n_chains < 3].rfam_acc.tolist() 1430 ignored = families[families.n_chains < 3].rfam_acc.tolist()
1374 famlist.sort(key=family_order) 1431 famlist.sort(key=family_order)
1375 1432
1376 - print(f"Found {len(famlist)} families with chains of resolution {res_thr}A or better.") 1433 + print(f"Found {len(famlist)} families with chains or better.")
1377 if len(ignored): 1434 if len(ignored):
1378 print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n') 1435 print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n')
1379 1436
...@@ -1413,8 +1470,8 @@ if __name__ == "__main__": ...@@ -1413,8 +1470,8 @@ if __name__ == "__main__":
1413 e3 = file.split('_')[2] 1470 e3 = file.split('_')[2]
1414 extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3) 1471 extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3)
1415 for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]: # Process the rRNAs later only 3 by 3 1472 for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]: # Process the rRNAs later only 3 by 3
1416 - joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) 1473 + joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, res_thr, True, REDUNDANT_DIST_MAT, False)))
1417 - joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) 1474 + joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, res_thr, False, REDUNDANT_DIST_MAT, False)))
1418 1475
1419 # Do general family statistics 1476 # Do general family statistics
1420 joblist.append(Job(function=stats_len)) # Computes figures about chain lengths 1477 joblist.append(Job(function=stats_len)) # Computes figures about chain lengths
...@@ -1428,7 +1485,6 @@ if __name__ == "__main__": ...@@ -1428,7 +1485,6 @@ if __name__ == "__main__":
1428 # Do geometric measures 1485 # Do geometric measures
1429 if n_unmapped_chains: 1486 if n_unmapped_chains:
1430 os.makedirs(runDir + "/results/geometry/all-atoms/distances/", exist_ok=True) 1487 os.makedirs(runDir + "/results/geometry/all-atoms/distances/", exist_ok=True)
1431 - # structure_list = os.listdir(path_to_3D_data + "rna_only")
1432 structure_list = representatives_from_nrlist(res_thr) 1488 structure_list = representatives_from_nrlist(res_thr)
1433 for f in structure_list: 1489 for f in structure_list:
1434 if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]): 1490 if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]):
...@@ -1438,12 +1494,17 @@ if __name__ == "__main__": ...@@ -1438,12 +1494,17 @@ if __name__ == "__main__":
1438 1494
1439 # Now process the memory-heavy tasks family by family 1495 # Now process the memory-heavy tasks family by family
1440 if DO_AVG_DISTANCE_MATRIX: 1496 if DO_AVG_DISTANCE_MATRIX:
1497 + print("Computing distances matrices of rRNA families using only the equivalence class representatives, for storage purposes.")
1498 + # Note that, if the user has more than 300 GB of free storage space, one could use all the rRNAs.
1499 + # Yes, within an equivalence class, the rRNA molecules are close in sequence and structure.
1500 + # But yet, having several 3D structures of the same molecule gives an insight about structure flexibility in some regions.
1501 + # Detect free space automatically ? TODISCUSS + TODECIDE + TODO
1441 for f in LSU_set: 1502 for f in LSU_set:
1442 - get_avg_std_distance_matrix(f, True, True) 1503 + get_avg_std_distance_matrix(f, res_thr, True, False, True)
1443 - get_avg_std_distance_matrix(f, False, True) 1504 + get_avg_std_distance_matrix(f, res_thr, False, False, True)
1444 for f in SSU_set: 1505 for f in SSU_set:
1445 - get_avg_std_distance_matrix(f, True, True) 1506 + get_avg_std_distance_matrix(f, res_thr, True, False, True)
1446 - get_avg_std_distance_matrix(f, False, True) 1507 + get_avg_std_distance_matrix(f, res_thr, False, False, True)
1447 1508
1448 print() 1509 print()
1449 print() 1510 print()
...@@ -1477,3 +1538,4 @@ if __name__ == "__main__": ...@@ -1477,3 +1538,4 @@ if __name__ == "__main__":
1477 process_jobs(joblist) 1538 process_jobs(joblist)
1478 merge_jsons() 1539 merge_jsons()
1479 1540
1541 +
......