Louis BECQUEY

Revision 1 for Bioinformatics completed

...@@ -12,4 +12,5 @@ esl* ...@@ -12,4 +12,5 @@ esl*
12 12
13 # environment stuff 13 # environment stuff
14 .vscode/ 14 .vscode/
15 -*.pyc
...\ No newline at end of file ...\ No newline at end of file
15 +*.pyc
16 +__pycache__/
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -94,6 +94,8 @@ The detailed list of options is below: ...@@ -94,6 +94,8 @@ The detailed list of options is below:
94 -h [ --help ] Print this help message 94 -h [ --help ] Print this help message
95 --version Print the program version 95 --version Print the program version
96 96
97 +-f [ --full-inference ] Infer new 3D->family mappings even if Rfam already provides some. Yields more copies of chains
98 + mapped to different families.
97 -r 4.0 [ --resolution=4.0 ] Maximum 3D structure resolution to consider a RNA chain. 99 -r 4.0 [ --resolution=4.0 ] Maximum 3D structure resolution to consider a RNA chain.
98 -s Run statistics computations after completion 100 -s Run statistics computations after completion
99 --extract Extract the portions of 3D RNA chains to individual mmCIF files. 101 --extract Extract the portions of 3D RNA chains to individual mmCIF files.
...@@ -105,7 +107,7 @@ The detailed list of options is below: ...@@ -105,7 +107,7 @@ The detailed list of options is below:
105 RNAcifs/ Full structures containing RNA, in mmCIF format 107 RNAcifs/ Full structures containing RNA, in mmCIF format
106 rna_mapped_to_Rfam/ Extracted 'pure' RNA chains 108 rna_mapped_to_Rfam/ Extracted 'pure' RNA chains
107 datapoints/ Final results in CSV file format. 109 datapoints/ Final results in CSV file format.
108 ---seq-folder=… Path to a folder to store the sequence and alignment files. 110 +--seq-folder=… Path to a folder to store the sequence and alignment files. Subfolders will be:
109 rfam_sequences/fasta/ Compressed hits to Rfam families 111 rfam_sequences/fasta/ Compressed hits to Rfam families
110 realigned/ Sequences, covariance models, and alignments by family 112 realigned/ Sequences, covariance models, and alignments by family
111 --no-homology Do not try to compute PSSMs and do not align sequences. 113 --no-homology Do not try to compute PSSMs and do not align sequences.
...@@ -117,11 +119,12 @@ The detailed list of options is below: ...@@ -117,11 +119,12 @@ The detailed list of options is below:
117 --update-homologous Re-download Rfam and SILVA databases, realign all families, and recompute all CSV files 119 --update-homologous Re-download Rfam and SILVA databases, realign all families, and recompute all CSV files
118 --from-scratch Delete database, local 3D and sequence files, and known issues, and recompute. 120 --from-scratch Delete database, local 3D and sequence files, and known issues, and recompute.
119 --archive Create a tar.gz archive of the datapoints text files, and update the link to the latest archive 121 --archive Create a tar.gz archive of the datapoints text files, and update the link to the latest archive
122 +--no-logs Do not save per-chain logs of the numbering modifications
120 ``` 123 ```
121 124
122 Typical usage: 125 Typical usage:
123 ``` 126 ```
124 -nohup bash -c 'time ~/Projects/RNANet/RNAnet.py --3d-folder ~/Data/RNA/3D/ --seq-folder ~/Data/RNA/sequences -s --archive' & 127 +nohup bash -c 'time ~/Projects/RNANet/RNAnet.py --3d-folder ~/Data/RNA/3D/ --seq-folder ~/Data/RNA/sequences -s' &
125 ``` 128 ```
126 129
127 ## Post-computation task: estimate quality 130 ## Post-computation task: estimate quality
......
This diff could not be displayed because it is too large.
1 -1ml5_1_a_1-2914 1 +1eg0_1_O_1-73
2 -1ml5_1_a_151-2903
3 -1ml5_1_A_7-1515
4 -1ml5_1_A_2-1520
5 -1ml5_1_A_7-1518
6 -1ml5_1_b_5-121
7 2rdo_1_A_3-118 2 2rdo_1_A_3-118
8 4v48_1_A9_3-118 3 4v48_1_A9_3-118
9 4v47_1_A9_3-118 4 4v47_1_A9_3-118
10 -6zmi_1_L8_1267-4755
11 -6zm7_1_L8_1267-4755
12 -6y6x_1_L8_1267-4755
13 -6z6n_1_L8_1267-4755
14 -6qzp_1_L8_1267-4755
15 -6zme_1_L8_1267-4755
16 -6z6l_1_L8_1267-4755
17 -6ek0_1_L8_1267-4755
18 -6zmo_1_L8_1267-4755
19 -6z6m_1_L8_1267-4755
20 -6ole_1_D_1267-4755
21 -6om0_1_D_1267-4755
22 -6y2l_1_L8_1267-4755
23 -6lqm_1_8_1267-4755
24 -6y0g_1_L8_1267-4755
25 -6lu8_1_8_1267-4755
26 -6lsr_1_8_1267-4755
27 -6lss_1_8_1267-4755
28 -6oli_1_D_1267-4755
29 -6olg_1_A3_1267-4755
30 -6y57_1_L8_1267-4755
31 -5t2c_1_C_1267-4755
32 -6om7_1_D_1267-4755
33 -4ug0_1_L8_1267-4755
34 -6olf_1_D_1267-4755
35 -6ip5_1_1C_1267-4755
36 -6ip8_1_1C_1267-4755
37 -6olz_1_A3_1267-4755
38 -5aj0_1_A3_1267-4755
39 -5lks_1_L8_1267-4755
40 -6ip6_1_1C_1267-4755
41 -4v6x_1_A8_1267-4755
42 1vy7_1_AY_1-73 5 1vy7_1_AY_1-73
43 1vy7_1_CY_1-73 6 1vy7_1_CY_1-73
44 4w2h_1_CY_1-73 7 4w2h_1_CY_1-73
45 -2z9q_1_A_1-72 8 +1jgq_1_A_2-1520
9 +4v42_1_AA_2-1520
10 +1jgo_1_A_2-1520
11 +1jgp_1_A_2-1520
12 +1ml5_1_A_2-1520
13 +4v42_1_BA_1-2914
14 +1ml5_1_a_1-2914
46 4v42_1_BB_5-121 15 4v42_1_BB_5-121
16 +1ml5_1_b_5-121
17 +2rdo_1_B_1-2904
18 +4v48_1_A0_1-2904
19 +4v47_1_A0_1-2904
20 +4v48_1_BA_1-1543
21 +4v47_1_BA_1-1542
47 1ls2_1_B_1-73 22 1ls2_1_B_1-73
48 3ep2_1_Y_1-72 23 3ep2_1_Y_1-72
49 3eq3_1_Y_1-72 24 3eq3_1_Y_1-72
50 4v48_1_A6_1-73 25 4v48_1_A6_1-73
51 -1eg0_1_O_1-73 26 +2z9q_1_A_1-72
52 1gsg_1_T_1-72 27 1gsg_1_T_1-72
53 3jcr_1_H_1-115 28 3jcr_1_H_1-115
54 -4v42_1_BA_1-2914 29 +1x1l_1_A_1-132
55 -4v42_1_BA_151-2903 30 +1zc8_1_Z_1-93
56 -4v48_1_BA_1-91 31 +2ob7_1_D_1-132
57 -4v48_1_BA_6-1541
58 -4v48_1_BA_1-1543
59 -4v48_1_BA_6-1538
60 -4v47_1_BA_1-91
61 -4v47_1_BA_6-1540
62 -4v47_1_BA_1-1542
63 -4v47_1_BA_6-1537
64 -2rdo_1_B_1-2903
65 -2rdo_1_B_6-1460
66 -2rdo_1_B_1-1528
67 -2rdo_1_B_6-1457
68 -2rdo_1_B_160-2893
69 -2rdo_1_B_1-2904
70 -2rdo_1_B_6-1522
71 -4v48_1_A0_1-2903
72 -4v48_1_A0_6-1460
73 -4v48_1_A0_1-1528
74 -4v48_1_A0_6-1457
75 -4v48_1_A0_160-2893
76 -4v48_1_A0_1-2904
77 -4v48_1_A0_6-1522
78 -4v47_1_A0_1-2903
79 -4v47_1_A0_6-1460
80 -4v47_1_A0_1-1528
81 -4v47_1_A0_6-1457
82 -4v47_1_A0_160-2893
83 -4v47_1_A0_1-2904
84 -4v47_1_A0_6-1522
85 2ob7_1_A_10-319 32 2ob7_1_A_10-319
86 -1x1l_1_A_1-130
87 -1zc8_1_Z_1-130
88 -1zc8_1_Z_1-91
89 -2ob7_1_D_1-130
90 -6rxu_1_C2_588-2386
91 -6rxu_1_C2_583-2388
92 -6rxu_1_C2_588-2383
93 -5oql_1_2_588-2386
94 -5oql_1_2_583-2388
95 -5oql_1_2_588-2383
96 -6rxv_1_C2_588-2386
97 -6rxv_1_C2_583-2388
98 -6rxv_1_C2_588-2383
99 -6rxz_1_C2_588-2386
100 -6rxz_1_C2_583-2388
101 -6rxz_1_C2_588-2383
102 -6rxy_1_C2_588-2386
103 -6rxy_1_C2_583-2388
104 -6rxy_1_C2_588-2383
105 -6rxt_1_C2_588-2386
106 -6rxt_1_C2_583-2388
107 -6rxt_1_C2_588-2383
108 1r2x_1_C_1-58 33 1r2x_1_C_1-58
109 1r2w_1_C_1-58 34 1r2w_1_C_1-58
110 -1eg0_1_L_1-57
111 1eg0_1_L_1-56 35 1eg0_1_L_1-56
112 -1jgq_1_A_7-1518
113 -1jgq_1_A_20-55
114 -1jgq_1_A_2-1520
115 -1jgq_1_A_7-1515
116 -4v42_1_AA_7-1518
117 -4v42_1_AA_20-55
118 -4v42_1_AA_2-1520
119 -4v42_1_AA_7-1515
120 -1jgo_1_A_7-1518
121 -1jgo_1_A_20-55
122 -1jgo_1_A_2-1520
123 -1jgo_1_A_7-1515
124 -1jgp_1_A_7-1518
125 -1jgp_1_A_20-55
126 -1jgp_1_A_2-1520
127 -1jgp_1_A_7-1515
128 1zc8_1_A_1-59 36 1zc8_1_A_1-59
129 -1mvr_1_D_1-59 37 +1mvr_1_D_1-61
130 -4c9d_1_D_29-1 38 +4adx_1_9_1-123
131 -4c9d_1_C_29-1
132 -4adx_1_9_1-121
133 1zn1_1_B_1-59 39 1zn1_1_B_1-59
134 1emi_1_B_1-108 40 1emi_1_B_1-108
135 3iy9_1_A_498-1027 41 3iy9_1_A_498-1027
...@@ -143,25 +49,1558 @@ ...@@ -143,25 +49,1558 @@
143 3cw1_1_V_1-138 49 3cw1_1_V_1-138
144 3cw1_1_v_1-138 50 3cw1_1_v_1-138
145 2iy3_1_B_9-105 51 2iy3_1_B_9-105
146 -3jcr_1_N_1-188 52 +3jcr_1_N_1-107
147 -3jcr_1_N_1-106
148 2vaz_1_A_64-177 53 2vaz_1_A_64-177
149 -2ftc_1_R_1-1568
150 -2ftc_1_R_792-1568
151 2ftc_1_R_81-1466 54 2ftc_1_R_81-1466
152 3jcr_1_M_1-141 55 3jcr_1_M_1-141
153 -3jcr_1_M_1-188
154 -3jcr_1_M_1-107
155 -4v5z_1_B0_1-2899
156 4v5z_1_B0_1-2902 56 4v5z_1_B0_1-2902
157 -4v5z_1_B0_1-2840
158 5g2x_1_A_595-692 57 5g2x_1_A_595-692
159 3iy8_1_A_1-540 58 3iy8_1_A_1-540
160 4v5z_1_BY_2-113 59 4v5z_1_BY_2-113
161 4v5z_1_BZ_1-70 60 4v5z_1_BZ_1-70
162 -1mvr_1_B_1-96 61 +4v5z_1_B1_2-125
163 -4adx_1_0_1-2923 62 +1mvr_1_B_3-96
164 -4adx_1_0_132-2915 63 +4adx_1_0_1-2925
165 3eq4_1_Y_1-69 64 3eq4_1_Y_1-69
65 +6uz7_1_8_2140-2827
166 4v5z_1_AA_1-1563 66 4v5z_1_AA_1-1563
167 -4v5z_1_AA_1-1562 67 +6cfj_1_1X
68 +6cfj_1_2X
69 +5hcq_1_1X
70 +6cae_1_1X
71 +5hcq_1_2X
72 +5hcr_1_1X
73 +4z8c_1_1X
74 +5j4b_1_1X
75 +5j4b_1_2X
76 +4z8c_1_2X
77 +6cae_1_2X
78 +5j4c_1_1X
79 +5w4k_1_1X
80 +6of1_1_1X
81 +5hcr_1_2X
82 +5hd1_1_1X
83 +5hcp_1_1X
84 +6of1_1_2X
85 +5hau_1_1W
86 +5j4c_1_2X
87 +5wis_1_1X
88 +6xqd_1_1X
89 +6nd5_1_1X
90 +5w4k_1_2X
91 +5hau_1_2W
92 +6xqd_1_2X
93 +4y4p_1_1X
94 +6o97_1_1X
95 +5hcp_1_2X
96 +5doy_1_1X
97 +4zer_1_1X
98 +5wit_1_1X
99 +5hd1_1_2X
100 +6nd5_1_2X
101 +4z3s_1_1X
102 +7jql_1_1X
103 +7jqm_1_1X
104 +7jql_1_2X
105 +5wis_1_2X
106 +6nd6_1_1X
107 +6o97_1_2X
108 +4y4p_1_2X
109 +7jqm_1_2X
110 +4z3s_1_2X
111 +4zer_1_2X
112 +6uo1_1_2X
113 +6uo1_1_1X
114 +5doy_1_2X
115 +5wit_1_2X
116 +5f8k_1_1X
117 +6nd6_1_2X
118 +6xqe_1_1X
119 +6xqe_1_2X
120 +6n9e_1_1X
121 +6n9e_1_2X
122 +6n9f_1_1X
123 +5f8k_1_2X
124 +6n9f_1_2X
125 +6xz7_1_F
126 +6y69_1_W
127 +5afi_1_V
128 +5afi_1_W
129 +6h4n_1_W
130 +5wdt_1_V
131 +5wfs_1_V
132 +5wdt_1_W
133 +5wfs_1_W
134 +5we4_1_V
135 +5we4_1_W
136 +5uq8_1_Y
137 +6c4i_1_Y
138 +6c4i_1_X
139 +5zeb_1_V
140 +5zep_1_W
141 +5lzd_1_V
142 +5we6_1_V
143 +5wfk_1_V
144 +5wfk_1_W
145 +5we6_1_W
146 +5u4i_1_Y
147 +5uq7_1_Y
148 +5u4i_1_X
149 +5lza_1_V
150 +5wf0_1_V
151 +5wf0_1_W
152 +5zeu_1_V
153 +5l3p_1_X
154 +3jcj_1_V
155 +6gxm_1_X
156 +6gwt_1_X
157 +6gxn_1_X
158 +6gxo_1_X
159 +3j9y_1_V
160 +6o9k_1_Y
161 +6o7k_1_V
162 +5lzf_1_V
163 +3jcn_1_V
164 +5lzc_1_V
165 +5u4j_1_X
166 +5u4j_1_Z
167 +5lzb_1_V
168 +6h58_1_W
169 +6h58_1_WW
170 +1eg0_1_O
171 +5j8b_1_X
172 +4v7j_1_AV
173 +4v7j_1_BV
174 +4v7k_1_BV
175 +4v7k_1_AV
176 +4v7k_1_BW
177 +4v7k_1_AW
178 +4v7j_1_AW
179 +4v7j_1_BW
180 +4v4j_1_Z
181 +6i0v_1_B
182 +5k77_1_X
183 +5k77_1_V
184 +5k77_1_Y
185 +5k77_1_W
186 +5k77_1_Z
187 +4pei_1_X
188 +4pei_1_V
189 +4pei_1_W
190 +4pei_1_Z
191 +4pei_1_Y
192 +4a3c_1_P
193 +4a3e_1_P
194 +6lkq_1_U
195 +7k00_1_B
196 +6qdw_1_A
197 +2rdo_1_A
198 +4v48_1_A9
199 +4v47_1_A9
200 +6hcj_1_Q3
201 +6hcq_1_Q3
202 +5mmm_1_Z
203 +4w2e_1_W
204 +5j4b_1_1Y
205 +6cfj_1_1W
206 +5w4k_1_1Y
207 +5wit_1_1W
208 +6cfj_1_1Y
209 +6cfj_1_2W
210 +5j4c_1_1W
211 +5wis_1_1Y
212 +5j4c_1_1Y
213 +6cfj_1_2Y
214 +5wis_1_1W
215 +5j4b_1_1W
216 +5j4c_1_2W
217 +5j4b_1_2W
218 +5j4b_1_2Y
219 +5j4c_1_2Y
220 +5w4k_1_1W
221 +6nd5_1_1Y
222 +5wis_1_2Y
223 +5wit_1_2W
224 +5doy_1_1Y
225 +5w4k_1_2Y
226 +4y4p_1_1Y
227 +4z3s_1_1Y
228 +5doy_1_1W
229 +5doy_1_2Y
230 +6nd5_1_1W
231 +4z3s_1_2Y
232 +4z3s_1_1W
233 +5w4k_1_2W
234 +6nd5_1_2Y
235 +4y4p_1_2Y
236 +6uo1_1_2Y
237 +6uo1_1_2W
238 +4y4p_1_1W
239 +4z3s_1_2W
240 +6uo1_1_1Y
241 +6uo1_1_1W
242 +5wis_1_2W
243 +5wit_1_1Y
244 +6nd5_1_2W
245 +4y4p_1_2W
246 +5doy_1_2W
247 +5wit_1_2Y
248 +6ucq_1_1Y
249 +4v4i_1_Z
250 +6ucq_1_1X
251 +6ucq_1_2Y
252 +4w2e_1_X
253 +6ucq_1_2X
254 +6yss_1_W
255 +5afi_1_Y
256 +5uq8_1_Z
257 +5wdt_1_Y
258 +5wfs_1_Y
259 +6ysr_1_W
260 +5we4_1_Y
261 +6yst_1_W
262 +5uq7_1_Z
263 +5we6_1_Y
264 +5wfk_1_Y
265 +5wf0_1_Y
266 +6o9j_1_V
267 +6ysu_1_W
268 +3j46_1_A
269 +5j8b_1_Y
270 +5j8b_1_W
271 +3bbv_1_Z
272 +5aj0_1_BV
273 +5aj0_1_BW
274 +4wt8_1_AB
275 +4wt8_1_BB
276 +4v4j_1_Y
277 +4v4i_1_Y
278 +5uq8_1_X
279 +5uq7_1_X
280 +1jgq_1_A
281 +4v42_1_AA
282 +1jgo_1_A
283 +1jgp_1_A
284 +1ml5_1_A
285 +4v4j_1_W
286 +4v4i_1_W
287 +4v42_1_BA
288 +4wt8_1_CS
289 +4wt8_1_DS
290 +4v4j_1_X
291 +4v4i_1_X
292 +4v42_1_BB
293 +6uu4_1_333
294 +6uu0_1_333
295 +6uuc_1_333
296 +6uu2_1_333
297 +6b6h_1_3
298 +6pb4_1_3
299 +6d30_1_C
300 +6j7z_1_C
301 +3er9_1_D
302 +5kal_1_Y
303 +4nia_1_3
304 +5kal_1_Z
305 +4nia_1_7
306 +4nia_1_4
307 +5new_1_C
308 +4nia_1_U
309 +4nia_1_6
310 +4oq9_1_7
311 +4nia_1_1
312 +4oq9_1_4
313 +4nia_1_8
314 +4oq9_1_8
315 +4nia_1_5
316 +2vrt_1_E
317 +4nia_1_W
318 +4oq9_1_6
319 +4oq8_1_D
320 +4nia_1_Z
321 +4oq9_1_W
322 +4oq9_1_5
323 +4nia_1_2
324 +2vrt_1_F
325 +4oq9_1_U
326 +4oq9_1_Z
327 +4oq9_1_2
328 +4oq9_1_3
329 +1ddl_1_E
330 +4oq9_1_1
331 +6rt5_1_A
332 +6rt5_1_E
333 +4qu6_1_B
334 +6lkq_1_T
335 +6qdw_1_B
336 +3jbv_1_B
337 +3jbu_1_B
338 +2rdo_1_B
339 +4v48_1_A0
340 +4v47_1_A0
341 +6do8_1_B
342 +6dpi_1_B
343 +6dp9_1_B
344 +6dpb_1_B
345 +6dmn_1_B
346 +6dpp_1_B
347 +6dpk_1_B
348 +6dpd_1_B
349 +6dot_1_B
350 +6dok_1_B
351 +6dp8_1_B
352 +6dpl_1_B
353 +6dpg_1_B
354 +6dou_1_B
355 +6dpc_1_B
356 +6do9_1_B
357 +6dmv_1_B
358 +6dp4_1_B
359 +6dpn_1_B
360 +6doj_1_B
361 +6dph_1_B
362 +6dos_1_B
363 +6doo_1_B
364 +6dp6_1_B
365 +6dox_1_B
366 +6dp5_1_B
367 +6dol_1_B
368 +6dp1_1_B
369 +6doz_1_B
370 +6dp7_1_B
371 +6doq_1_B
372 +6dpa_1_B
373 +6dom_1_B
374 +6dog_1_B
375 +6dop_1_B
376 +6doh_1_B
377 +6doa_1_B
378 +6don_1_B
379 +6dov_1_B
380 +6dpo_1_B
381 +6dod_1_B
382 +6dob_1_B
383 +6dow_1_B
384 +6dpm_1_B
385 +6dpf_1_B
386 +6dp3_1_B
387 +6dp2_1_B
388 +6dpe_1_B
389 +6dpj_1_B
390 +6dor_1_B
391 +6dof_1_B
392 +6dp0_1_B
393 +6doi_1_B
394 +6doc_1_B
395 +6doe_1_B
396 +6n6g_1_D
397 +6lkq_1_S
398 +5h5u_1_H
399 +5lze_1_Y
400 +5lze_1_V
401 +5lze_1_X
402 +3jcj_1_G
403 +6o7k_1_G
404 +4v48_1_BA
405 +4v47_1_BA
406 +4b3r_1_W
407 +4b3t_1_W
408 +4b3s_1_W
409 +5o2r_1_X
410 +5kcs_1_1X
411 +6fti_1_U
412 +6fti_1_W
413 +6ftj_1_U
414 +6ftj_1_W
415 +6ftg_1_U
416 +6ftg_1_W
417 +6ole_1_T
418 +6om0_1_T
419 +6oli_1_T
420 +6om7_1_T
421 +6olf_1_T
422 +6w6l_1_T
423 +6x1b_1_D
424 +6x1b_1_F
425 +5f6c_1_C
426 +6i0t_1_B
427 +1b2m_1_C
428 +1b2m_1_D
429 +1b2m_1_E
430 +2uxc_1_Y
431 +4a3g_1_P
432 +4a3j_1_P
433 +7k00_1_5
434 +5mmi_1_Z
435 +3j9m_1_U
436 +6nu2_1_U
437 +6nu3_1_U
438 +5c0y_1_C
439 +6n6f_1_D
440 +4ohy_1_B
441 +4oi1_1_B
442 +4oi0_1_B
443 +6raz_1_Y
444 +5ipl_1_3
445 +6utw_1_333
446 +5ipm_1_3
447 +5ipn_1_3
448 +4ylo_1_3
449 +4yln_1_6
450 +4ylo_1_6
451 +4yln_1_3
452 +4yln_1_9
453 +5lzf_1_Y
454 +1n32_1_Z
455 +5zsl_1_D
456 +5zsd_1_C
457 +5zsd_1_D
458 +5zsl_1_E
459 +4nku_1_D
460 +4nku_1_H
461 +1cwp_1_E
462 +6qik_1_Y
463 +6rzz_1_Y
464 +6ri5_1_Y
465 +6qt0_1_Y
466 +6qtz_1_Y
467 +6t83_1_1B
468 +6t83_1_3B
469 +6t83_1_AA
470 +6t83_1_CA
471 +6s05_1_Y
472 +5jcs_1_X
473 +5fl8_1_X
474 +3erc_1_G
475 +6of1_1_1W
476 +6cae_1_1Y
477 +6o97_1_1W
478 +6of1_1_1Y
479 +6of1_1_2W
480 +6o97_1_1Y
481 +6nd6_1_1Y
482 +6cae_1_1W
483 +6of1_1_2Y
484 +6cae_1_2Y
485 +6nd6_1_1W
486 +6cae_1_2W
487 +6o97_1_2Y
488 +6nd6_1_2Y
489 +6o97_1_2W
490 +6nd6_1_2W
491 +6xz7_1_G
492 +6gz5_1_BW
493 +6gz3_1_BW
494 +1ls2_1_B
495 +3ep2_1_Y
496 +3eq3_1_Y
497 +4v48_1_A6
498 +2z9q_1_A
499 +4hot_1_X
500 +6d2z_1_C
501 +4tu0_1_F
502 +4tu0_1_G
503 +6r9o_1_B
504 +6is0_1_C
505 +5lzc_1_X
506 +5lzb_1_X
507 +5lzd_1_Y
508 +5lzc_1_Y
509 +5lzb_1_Y
510 +1gsg_1_T
511 +6zvi_1_D
512 +6sv4_1_NB
513 +6sv4_1_NC
514 +6i7o_1_NB
515 +5y88_1_X
516 +3j6x_1_IR
517 +3j6y_1_IR
518 +6tb3_1_N
519 +6tnu_1_N
520 +2uxb_1_X
521 +2x1f_1_B
522 +2x1a_1_B
523 +3eq3_1_D
524 +3ep2_1_D
525 +1eg0_1_M
526 +3eq4_1_D
527 +5o1y_1_B
528 +3jcr_1_H
529 +6dzi_1_H
530 +5zeu_1_A
531 +6mpi_1_W
532 +5mfx_1_B
533 +5w0m_1_J
534 +5bud_1_E
535 +5w0m_1_I
536 +5w0m_1_H
537 +4j7m_1_B
538 +5bud_1_D
539 +6a4e_1_B
540 +6a4e_1_D
541 +6hxx_1_AA
542 +6hxx_1_AB
543 +6hxx_1_AC
544 +6hxx_1_AD
545 +6hxx_1_AE
546 +6hxx_1_AF
547 +6hxx_1_AG
548 +6hxx_1_AH
549 +6hxx_1_AI
550 +6hxx_1_AJ
551 +6hxx_1_AK
552 +6hxx_1_AL
553 +6hxx_1_AM
554 +6hxx_1_AN
555 +6hxx_1_AO
556 +6hxx_1_AP
557 +6hxx_1_AQ
558 +6hxx_1_AR
559 +6hxx_1_AS
560 +6hxx_1_AT
561 +6hxx_1_AU
562 +6hxx_1_AV
563 +6hxx_1_AW
564 +6hxx_1_AX
565 +6hxx_1_AY
566 +6hxx_1_AZ
567 +6hxx_1_BA
568 +6hxx_1_BB
569 +6hxx_1_BC
570 +6hxx_1_BD
571 +6hxx_1_BE
572 +6hxx_1_BF
573 +6hxx_1_BG
574 +6hxx_1_BH
575 +6hxx_1_BI
576 +5odv_1_A
577 +5odv_1_B
578 +5odv_1_C
579 +5odv_1_D
580 +5odv_1_E
581 +5odv_1_F
582 +5odv_1_G
583 +5odv_1_H
584 +5odv_1_I
585 +5odv_1_J
586 +5odv_1_K
587 +5odv_1_L
588 +5odv_1_M
589 +5odv_1_N
590 +5odv_1_O
591 +5odv_1_P
592 +5odv_1_Q
593 +5odv_1_R
594 +5odv_1_S
595 +5odv_1_T
596 +5odv_1_U
597 +5odv_1_V
598 +5odv_1_W
599 +5odv_1_X
600 +6t34_1_A
601 +6t34_1_B
602 +6t34_1_C
603 +6t34_1_D
604 +6t34_1_E
605 +6t34_1_F
606 +6t34_1_G
607 +6t34_1_H
608 +6t34_1_I
609 +6t34_1_J
610 +6t34_1_K
611 +6t34_1_L
612 +6t34_1_M
613 +6t34_1_N
614 +6t34_1_O
615 +6t34_1_P
616 +6t34_1_Q
617 +6t34_1_R
618 +6t34_1_S
619 +6ip8_1_ZY
620 +6ip5_1_ZY
621 +6ip5_1_ZU
622 +6ip6_1_ZY
623 +6ip8_1_ZZ
624 +6ip6_1_ZZ
625 +6uu3_1_333
626 +6uu1_1_333
627 +1pn8_1_D
628 +3er8_1_H
629 +3er8_1_G
630 +3er8_1_F
631 +5o3j_1_B
632 +4dr7_1_B
633 +1i5l_1_Y
634 +1i5l_1_U
635 +4dr6_1_B
636 +6i2n_1_U
637 +4v68_1_A0
638 +6vyu_1_Y
639 +6vyw_1_Y
640 +6vz7_1_Y
641 +6vz5_1_Y
642 +6vz3_1_Y
643 +6vyy_1_Y
644 +6vyx_1_Y
645 +6vyz_1_Y
646 +6vz2_1_Y
647 +1mvr_1_1
648 +6vyt_1_Y
649 +1cgm_1_I
650 +3jb7_1_T
651 +3jb7_1_M
652 +3j0o_1_D
653 +3j0l_1_D
654 +3j0q_1_D
655 +3j0p_1_D
656 +5elt_1_F
657 +5elt_1_E
658 +2tmv_1_R
659 +5a79_1_R
660 +5a7a_1_R
661 +2om3_1_R
662 +2xea_1_R
663 +4wtl_1_T
664 +4wtl_1_P
665 +1xnq_1_W
666 +1x18_1_C
667 +1x18_1_B
668 +1x18_1_D
669 +1vq6_1_4
670 +4am3_1_D
671 +4am3_1_H
672 +4am3_1_I
673 +4lj0_1_C
674 +4lj0_1_D
675 +4lj0_1_E
676 +5lzy_1_HH
677 +4wtj_1_T
678 +4wtj_1_P
679 +4xbf_1_D
680 +6ow3_1_I
681 +6ovy_1_I
682 +6oy6_1_I
683 +6n6d_1_D
684 +6n6k_1_C
685 +6n6k_1_D
686 +3rtj_1_D
687 +1apg_1_D
688 +6ty9_1_M
689 +6tz1_1_N
690 +4bbl_1_Y
691 +4bbl_1_Z
692 +6sce_1_B
693 +6scf_1_I
694 +6scf_1_K
695 +6yud_1_K
696 +6yud_1_O
697 +6scf_1_M
698 +6yud_1_P
699 +6scf_1_L
700 +6yud_1_M
701 +6yud_1_Q
702 +6o6x_1_D
703 +4ba2_1_R
704 +6o6x_1_C
705 +6o7b_1_C
706 +6o6v_1_C
707 +6r7b_1_D
708 +6r9r_1_D
709 +6ov0_1_E
710 +6ov0_1_H
711 +6ov0_1_G
712 +6o6v_1_D
713 +6ov0_1_F
714 +6o7b_1_D
715 +5e02_1_C
716 +6r9r_1_E
717 +6r7b_1_E
718 +6o7i_1_I
719 +6o7h_1_K
720 +7jyy_1_F
721 +7jyy_1_E
722 +7jz0_1_F
723 +7jz0_1_E
724 +6rt6_1_A
725 +6rt6_1_E
726 +1y1y_1_P
727 +5zuu_1_I
728 +5zuu_1_G
729 +4peh_1_W
730 +4peh_1_V
731 +4peh_1_X
732 +4peh_1_Y
733 +4peh_1_Z
734 +6mkn_1_W
735 +4cxg_1_C
736 +4cxh_1_C
737 +1x1l_1_A
738 +1zc8_1_Z
739 +2ob7_1_D
740 +2ob7_1_A
741 +4eya_1_E
742 +4eya_1_F
743 +4eya_1_Q
744 +4eya_1_R
745 +2r1g_1_B
746 +4ht9_1_E
747 +1cvj_1_M
748 +6z1p_1_AB
749 +6z1p_1_AA
750 +4ii9_1_C
751 +5mq0_1_3
752 +5uk4_1_X
753 +5uk4_1_V
754 +5uk4_1_W
755 +5uk4_1_U
756 +5f6c_1_E
757 +4rcj_1_B
758 +1xnr_1_W
759 +6e0o_1_C
760 +6o75_1_D
761 +6o75_1_C
762 +6e0o_1_B
763 +3j06_1_R
764 +1r2x_1_C
765 +1r2w_1_C
766 +1eg0_1_L
767 +4eya_1_G
768 +4eya_1_H
769 +4eya_1_S
770 +4eya_1_T
771 +4dr4_1_V
772 +1ibl_1_Z
773 +1ibm_1_Z
774 +4dr5_1_V
775 +4d61_1_J
776 +1trj_1_B
777 +1trj_1_C
778 +6q8y_1_N
779 +6sv4_1_N
780 +6i7o_1_N
781 +5k8h_1_A
782 +5z4a_1_B
783 +3jbu_1_V
784 +1h2c_1_R
785 +1h2d_1_S
786 +1h2d_1_R
787 +6szs_1_X
788 +5mgp_1_X
789 +6enu_1_X
790 +6enf_1_X
791 +6enj_1_X
792 +1pvo_1_L
793 +1pvo_1_G
794 +1pvo_1_H
795 +1pvo_1_J
796 +1pvo_1_K
797 +2ht1_1_K
798 +2ht1_1_J
799 +6eri_1_AX
800 +1zc8_1_A
801 +1zc8_1_C
802 +1zc8_1_B
803 +1zc8_1_G
804 +1zc8_1_I
805 +1zc8_1_H
806 +1zc8_1_J
807 +4v8z_1_CX
808 +6kqe_1_I
809 +5uh8_1_I
810 +5vi5_1_Q
811 +4xln_1_T
812 +4xlr_1_T
813 +4xln_1_Q
814 +5i2d_1_K
815 +5i2d_1_V
816 +4xlr_1_Q
817 +6sty_1_C
818 +6sty_1_F
819 +2xs5_1_D
820 +3ok4_1_N
821 +3ok4_1_L
822 +3ok4_1_Z
823 +3ok4_1_4
824 +3ok4_1_V
825 +3ok4_1_X
826 +3ok4_1_P
827 +3ok4_1_H
828 +3ok4_1_J
829 +3ok4_1_R
830 +3ok4_1_T
831 +3ok4_1_2
832 +6n6h_1_D
833 +5wnt_1_B
834 +3b0u_1_B
835 +3b0u_1_A
836 +4x9e_1_G
837 +4x9e_1_H
838 +6z1p_1_BB
839 +6z1p_1_BA
840 +2uxd_1_X
841 +4qvd_1_H
842 +4v7e_1_AB
843 +3ol9_1_D
844 +3ol9_1_H
845 +3ol9_1_L
846 +3ol9_1_P
847 +3olb_1_L
848 +3olb_1_P
849 +3olb_1_D
850 +3olb_1_H
851 +3ol6_1_D
852 +3ol6_1_H
853 +3ol6_1_L
854 +3ol6_1_P
855 +3ol8_1_D
856 +3ol8_1_H
857 +3ol7_1_L
858 +3ol7_1_P
859 +3ol7_1_D
860 +3ol7_1_H
861 +3ol8_1_L
862 +3ol8_1_P
863 +1qzc_1_C
864 +1qzc_1_A
865 +6ole_1_V
866 +6om0_1_V
867 +6oli_1_V
868 +6om7_1_V
869 +6w6l_1_V
870 +6olf_1_V
871 +1mvr_1_D
872 +4wtm_1_T
873 +4wtm_1_P
874 +5x70_1_E
875 +5x70_1_G
876 +6gz5_1_BV
877 +6gz4_1_BV
878 +6gz3_1_BV
879 +6fti_1_Q
880 +4v7e_1_AE
881 +4v7e_1_AD
882 +4x62_1_B
883 +4x64_1_B
884 +4x65_1_B
885 +1xmq_1_W
886 +4x66_1_B
887 +3t1h_1_W
888 +3t1y_1_W
889 +1xmo_1_W
890 +4adx_1_9
891 +6kr6_1_B
892 +1zn1_1_B
893 +6z8k_1_X
894 +1cvj_1_Q
895 +4csf_1_U
896 +4csf_1_Q
897 +4csf_1_G
898 +4csf_1_M
899 +4csf_1_K
900 +4csf_1_A
901 +4csf_1_I
902 +4csf_1_S
903 +4csf_1_C
904 +4csf_1_W
905 +4csf_1_O
906 +4csf_1_E
907 +1cvj_1_N
908 +1cvj_1_O
909 +1cvj_1_S
910 +1cvj_1_P
911 +1cvj_1_T
912 +1cvj_1_R
913 +6th6_1_AA
914 +6skg_1_AA
915 +6skf_1_AA
916 +6q8y_1_M
917 +6i7o_1_M
918 +6zmw_1_W
919 +6ybv_1_W
920 +2fz2_1_D
921 +2xpj_1_D
922 +2vrt_1_H
923 +2vrt_1_G
924 +1emi_1_B
925 +6r9m_1_B
926 +4nia_1_C
927 +4nia_1_A
928 +4nia_1_H
929 +4nia_1_N
930 +4nia_1_G
931 +4nia_1_D
932 +4nia_1_B
933 +4nia_1_I
934 +4nia_1_E
935 +4nia_1_M
936 +4oq9_1_I
937 +4oq9_1_G
938 +4oq9_1_C
939 +4oq9_1_H
940 +4oq9_1_N
941 +4oq9_1_A
942 +4oq9_1_D
943 +4oq9_1_E
944 +4oq9_1_M
945 +4oq9_1_B
946 +5uhc_1_I
947 +1uvn_1_F
948 +1uvn_1_B
949 +1uvn_1_D
950 +3iy9_1_A
951 +4wtk_1_T
952 +4wtk_1_P
953 +1vqn_1_4
954 +4oav_1_C
955 +4oav_1_A
956 +3ep2_1_E
957 +3eq3_1_E
958 +3eq4_1_E
959 +3ep2_1_A
960 +3eq3_1_A
961 +3eq4_1_A
962 +3ep2_1_C
963 +3eq3_1_C
964 +3eq4_1_C
965 +3ep2_1_B
966 +3eq3_1_B
967 +3eq4_1_B
968 +4i67_1_B
969 +3pgw_1_R
970 +3pgw_1_N
971 +3cw1_1_X
972 +3cw1_1_W
973 +3cw1_1_V
974 +5it9_1_I
975 +6k32_1_T
976 +6k32_1_P
977 +5mmj_1_A
978 +5x8r_1_A
979 +3j2k_1_3
980 +3j2k_1_2
981 +3j2k_1_1
982 +3j2k_1_0
983 +3j2k_1_4
984 +3nvk_1_G
985 +3nvk_1_S
986 +2iy3_1_B
987 +1cwp_1_F
988 +5z4j_1_B
989 +5gmf_1_E
990 +5gmf_1_H
991 +6e4p_1_J
992 +5gmf_1_F
993 +5gmf_1_G
994 +5gmg_1_D
995 +5gmg_1_C
996 +6e4p_1_K
997 +3ie1_1_E
998 +3ie1_1_H
999 +3ie1_1_F
1000 +4dr7_1_V
1001 +3ie1_1_G
1002 +3s4g_1_C
1003 +3s4g_1_B
1004 +2qqp_1_R
1005 +2zde_1_E
1006 +2zde_1_F
1007 +2zde_1_H
1008 +2zde_1_G
1009 +1nb7_1_E
1010 +1nb7_1_F
1011 +4hos_1_X
1012 +3p6y_1_T
1013 +3p6y_1_V
1014 +3p6y_1_U
1015 +3p6y_1_Q
1016 +3p6y_1_W
1017 +5dto_1_B
1018 +4cxh_1_X
1019 +1uvj_1_F
1020 +1uvj_1_D
1021 +1uvj_1_E
1022 +6kqd_1_I
1023 +6kqd_1_S
1024 +5uh5_1_I
1025 +1ytu_1_F
1026 +1ytu_1_D
1027 +4kzz_1_J
1028 +5t2c_1_AN
1029 +4v5z_1_BF
1030 +3j6b_1_E
1031 +4v4f_1_B6
1032 +4v4f_1_A5
1033 +4v4f_1_A3
1034 +4v4f_1_B0
1035 +4v4f_1_B9
1036 +4v4f_1_A2
1037 +4v4f_1_A8
1038 +4v4f_1_A1
1039 +4v4f_1_A9
1040 +4v4f_1_BZ
1041 +4v4f_1_B8
1042 +4v4f_1_B7
1043 +4v4f_1_B5
1044 +4v4f_1_A0
1045 +4v4f_1_A7
1046 +4v4f_1_A4
1047 +4v4f_1_AZ
1048 +4v4f_1_B3
1049 +4v4f_1_B1
1050 +4v4f_1_B4
1051 +4v4f_1_A6
1052 +4v4f_1_B2
1053 +5flx_1_Z
1054 +5zsb_1_C
1055 +5zsb_1_D
1056 +5zsn_1_D
1057 +5zsn_1_E
1058 +3jcr_1_N
1059 +6gfw_1_R
1060 +2vaz_1_A
1061 +1qzc_1_B
1062 +1mvr_1_C
1063 +4v5z_1_BP
1064 +6n6e_1_D
1065 +4g7o_1_I
1066 +4g7o_1_S
1067 +5x22_1_S
1068 +5x22_1_I
1069 +5x21_1_I
1070 +5uh6_1_I
1071 +6l74_1_I
1072 +5uh9_1_I
1073 +2ftc_1_R
1074 +6sag_1_R
1075 +4udv_1_R
1076 +2r1g_1_E
1077 +5zsc_1_D
1078 +5zsc_1_C
1079 +6woy_1_I
1080 +6wox_1_I
1081 +6evj_1_N
1082 +6evj_1_M
1083 +4gkk_1_W
1084 +4v9e_1_AG
1085 +4v9e_1_BM
1086 +4v9e_1_AM
1087 +4v9e_1_AA
1088 +4v9e_1_BA
1089 +4v9e_1_BG
1090 +5lzs_1_II
1091 +6fqr_1_C
1092 +6ha1_1_X
1093 +5kcr_1_1X
1094 +2r1g_1_X
1095 +3m7n_1_Z
1096 +3m85_1_X
1097 +3m85_1_Z
1098 +3m85_1_Y
1099 +1e8s_1_C
1100 +5wnp_1_B
1101 +5wnv_1_B
1102 +5yts_1_B
1103 +1utd_1_6
1104 +1utd_1_Z
1105 +1utd_1_4
1106 +1utd_1_7
1107 +1utd_1_9
1108 +1utd_1_5
1109 +1utd_1_3
1110 +1utd_1_2
1111 +1utd_1_8
1112 +1utd_1_1
1113 +6n6i_1_C
1114 +6n6i_1_D
1115 +6n6a_1_D
1116 +6ij2_1_F
1117 +6ij2_1_G
1118 +6ij2_1_H
1119 +6ij2_1_E
1120 +3u2e_1_D
1121 +3u2e_1_C
1122 +5uef_1_C
1123 +5uef_1_D
1124 +4x4u_1_H
1125 +4afy_1_D
1126 +6oy5_1_I
1127 +6owl_1_B
1128 +6owl_1_C
1129 +4afy_1_C
1130 +4lq3_1_R
1131 +6s0m_1_C
1132 +6gx6_1_B
1133 +4k4s_1_D
1134 +4k4s_1_H
1135 +4k4t_1_H
1136 +4k4t_1_D
1137 +1zn1_1_C
1138 +1zn0_1_C
1139 +1xpu_1_G
1140 +1xpu_1_L
1141 +1xpr_1_L
1142 +1xpu_1_H
1143 +1xpo_1_K
1144 +1xpo_1_J
1145 +1xpu_1_J
1146 +1xpo_1_H
1147 +1xpr_1_J
1148 +1xpu_1_K
1149 +1xpr_1_K
1150 +1xpo_1_M
1151 +1xpo_1_L
1152 +1xpu_1_M
1153 +1xpr_1_M
1154 +1xpo_1_G
1155 +1xpr_1_H
1156 +1xpr_1_G
1157 +6gc5_1_F
1158 +6gc5_1_H
1159 +6gc5_1_G
1160 +4v7e_1_AA
1161 +4v7e_1_AC
1162 +1n1h_1_B
1163 +4ohz_1_B
1164 +6t83_1_6B
1165 +4gv6_1_C
1166 +4gv6_1_B
1167 +4gv3_1_C
1168 +4gv3_1_B
1169 +4gv9_1_E
1170 +6i7o_1_L
1171 +2a8v_1_D
1172 +6qx3_1_G
1173 +2xnr_1_C
1174 +4gkj_1_W
1175 +4v5z_1_BC
1176 +4v5z_1_BB
1177 +4v5z_1_BH
1178 +3j0o_1_F
1179 +3j0l_1_F
1180 +3j0p_1_F
1181 +3j0q_1_F
1182 +3j0o_1_B
1183 +3j0l_1_B
1184 +3j0o_1_C
1185 +3j0l_1_C
1186 +3j0q_1_C
1187 +3j0p_1_C
1188 +3j0o_1_A
1189 +3j0l_1_A
1190 +3j0q_1_A
1191 +3j0p_1_A
1192 +1cwp_1_D
1193 +4v5z_1_BJ
1194 +5sze_1_C
1195 +6wre_1_D
1196 +6i0u_1_B
1197 +5zsa_1_C
1198 +5zsa_1_D
1199 +1n34_1_Z
1200 +3pf5_1_S
1201 +6ppn_1_A
1202 +6ppn_1_I
1203 +6qdw_1_V
1204 +5hk0_1_F
1205 +4qm6_1_D
1206 +4qm6_1_C
1207 +4jzu_1_C
1208 +4jzv_1_C
1209 +5ytv_1_B
1210 +4k4z_1_P
1211 +4k4z_1_D
1212 +4k4x_1_L
1213 +4k4z_1_L
1214 +4k4x_1_D
1215 +4k4z_1_H
1216 +4k4x_1_H
1217 +4k4x_1_P
1218 +1t1m_1_A
1219 +1t1m_1_B
1220 +4a3b_1_P
1221 +4a3m_1_P
1222 +6u6y_1_E
1223 +6u6y_1_G
1224 +6u6y_1_F
1225 +6u6y_1_H
1226 +6qik_1_X
1227 +6rzz_1_X
1228 +6ri5_1_X
1229 +6qt0_1_X
1230 +6qtz_1_X
1231 +6s05_1_X
1232 +6t83_1_BB
1233 +6t83_1_4B
1234 +5fl8_1_Z
1235 +5jcs_1_Z
1236 +5mrc_1_BB
1237 +5mre_1_BB
1238 +5mrf_1_BB
1239 +6gz4_1_BW
1240 +3j46_1_P
1241 +3jcr_1_M
1242 +4e6b_1_A
1243 +4e6b_1_B
1244 +6a6l_1_D
1245 +4v5z_1_BS
1246 +4v8t_1_1
1247 +1uvi_1_D
1248 +1uvi_1_F
1249 +1uvi_1_E
1250 +4m7d_1_P
1251 +4k4u_1_D
1252 +4k4u_1_H
1253 +6rt7_1_E
1254 +6rt7_1_A
1255 +2voo_1_C
1256 +2voo_1_D
1257 +5k78_1_X
1258 +5k78_1_Y
1259 +4ylo_1_9
1260 +4kzy_1_I
1261 +4kzz_1_I
1262 +4kzx_1_I
1263 +5vyc_1_I2
1264 +5vyc_1_I3
1265 +5vyc_1_I5
1266 +5vyc_1_I1
1267 +5vyc_1_I6
1268 +5vyc_1_I4
1269 +6ip8_1_2M
1270 +6ip5_1_2M
1271 +6ip6_1_2M
1272 +6qcs_1_M
1273 +486d_1_G
1274 +2r1g_1_C
1275 +486d_1_F
1276 +4v5z_1_B0
1277 +4nia_1_O
1278 +4nia_1_J
1279 +4nia_1_K
1280 +4nia_1_L
1281 +4nia_1_F
1282 +4oq9_1_K
1283 +4oq9_1_O
1284 +4oq9_1_J
1285 +4oq9_1_F
1286 +4oq9_1_L
1287 +5tbw_1_SR
1288 +6hhq_1_SR
1289 +6zvi_1_H
1290 +6sv4_1_2B
1291 +6sv4_1_2C
1292 +6t83_1_2B
1293 +6t83_1_A
1294 +6i7o_1_2B
1295 +6r9q_1_B
1296 +6v3a_1_SN1
1297 +6v3b_1_SN1
1298 +6v39_1_SN1
1299 +6v3e_1_SN1
1300 +1pn7_1_C
1301 +1mj1_1_Q
1302 +1mj1_1_R
1303 +4dr6_1_V
1304 +6kql_1_I
1305 +4eya_1_M
1306 +4eya_1_N
1307 +4eya_1_A
1308 +4eya_1_B
1309 +2wj8_1_D
1310 +2wj8_1_I
1311 +2wj8_1_L
1312 +2wj8_1_F
1313 +2wj8_1_C
1314 +2wj8_1_Q
1315 +2wj8_1_J
1316 +2wj8_1_P
1317 +2wj8_1_K
1318 +2wj8_1_E
1319 +2wj8_1_T
1320 +2wj8_1_B
1321 +2wj8_1_O
1322 +2wj8_1_N
1323 +2wj8_1_A
1324 +2wj8_1_H
1325 +2wj8_1_R
1326 +2wj8_1_M
1327 +2wj8_1_S
1328 +2wj8_1_G
1329 +4e6b_1_E
1330 +4e6b_1_F
1331 +6p71_1_I
1332 +3pdm_1_R
1333 +5det_1_P
1334 +5els_1_I
1335 +4n2s_1_B
1336 +4yoe_1_E
1337 +3j0o_1_H
1338 +3j0l_1_H
1339 +3j0p_1_H
1340 +3j0q_1_H
1341 +5gxi_1_B
1342 +3iy8_1_A
1343 +6tnu_1_M
1344 +5mc6_1_M
1345 +5mc6_1_N
1346 +4eya_1_O
1347 +4eya_1_P
1348 +4eya_1_C
1349 +4eya_1_D
1350 +6htq_1_V
1351 +6htq_1_W
1352 +6htq_1_U
1353 +6uu6_1_333
1354 +6v3a_1_V
1355 +6v39_1_V
1356 +5a0v_1_F
1357 +3avt_1_T
1358 +6d1v_1_C
1359 +4s2x_1_B
1360 +4s2y_1_B
1361 +5wnu_1_B
1362 +1zc8_1_F
1363 +1vtm_1_R
1364 +4v5z_1_BA
1365 +4v5z_1_BE
1366 +4v5z_1_BD
1367 +4v5z_1_BG
1368 +4v5z_1_BI
1369 +4v5z_1_BK
1370 +4v5z_1_BM
1371 +4v5z_1_BL
1372 +4v5z_1_BV
1373 +4v5z_1_BO
1374 +4v5z_1_BN
1375 +4v5z_1_BQ
1376 +4v5z_1_BR
1377 +4v5z_1_BT
1378 +4v5z_1_BU
1379 +4v5z_1_BW
1380 +4v5z_1_BY
1381 +4v5z_1_BX
1382 +4v5z_1_BZ
1383 +6u9x_1_H
1384 +6u9x_1_K
1385 +5elk_1_R
1386 +6okk_1_G
1387 +4cxg_1_A
1388 +4cxh_1_A
1389 +6bk8_1_I
1390 +4cxg_1_B
1391 +4cxh_1_B
1392 +4v5z_1_B1
1393 +5z4d_1_B
1394 +6o78_1_E
1395 +6ha8_1_X
1396 +1m8w_1_E
1397 +1m8w_1_F
1398 +5udi_1_B
1399 +5udl_1_B
1400 +5udk_1_B
1401 +5udj_1_B
1402 +5w5i_1_B
1403 +5w5i_1_D
1404 +5w5h_1_B
1405 +5w5h_1_D
1406 +4eya_1_K
1407 +4eya_1_L
1408 +4eya_1_I
1409 +4eya_1_J
1410 +4g9z_1_E
1411 +4g9z_1_F
1412 +3nma_1_B
1413 +3nma_1_C
1414 +6een_1_G
1415 +6een_1_I
1416 +6een_1_H
1417 +4wti_1_T
1418 +4wti_1_P
1419 +5l3p_1_Y
1420 +4hor_1_X
1421 +3rzo_1_R
1422 +2f4v_1_Z
1423 +1qln_1_R
1424 +2xs7_1_B
1425 +6zvi_1_E
1426 +6sv4_1_MC
1427 +6sv4_1_MB
1428 +6i7o_1_MB
1429 +6ogy_1_M
1430 +6ogy_1_N
1431 +6uej_1_B
1432 +1x18_1_A
1433 +5ytx_1_B
1434 +6o8w_1_U
1435 +4g0a_1_H
1436 +6r9p_1_B
1437 +3koa_1_C
1438 +4n48_1_D
1439 +4n48_1_G
1440 +6kug_1_B
1441 +6ktc_1_V
1442 +6ole_1_U
1443 +6om0_1_U
1444 +6olg_1_BV
1445 +6oli_1_U
1446 +6om7_1_U
1447 +6w6l_1_U
1448 +6olz_1_BV
1449 +6olf_1_U
1450 +5lzd_1_X
1451 +6m7k_1_B
1452 +3cd6_1_4
1453 +3cma_1_5
1454 +6n9e_1_2W
1455 +1vqo_1_4
1456 +1qvg_1_3
1457 +3cme_1_5
1458 +5lzd_1_W
1459 +5lze_1_W
1460 +5lzc_1_W
1461 +5lzb_1_W
1462 +3wzi_1_C
1463 +1mvr_1_E
1464 +1mvr_1_B
1465 +1mvr_1_A
1466 +4adx_1_0
1467 +4adx_1_8
1468 +1n33_1_Z
1469 +6dti_1_W
1470 +3d2s_1_F
1471 +3d2s_1_H
1472 +5mrc_1_AA
1473 +5mre_1_AA
1474 +5mrf_1_AA
1475 +5fl8_1_Y
1476 +5jcs_1_Y
1477 +2r1g_1_A
1478 +2r1g_1_D
1479 +2r1g_1_F
1480 +3eq4_1_Y
1481 +4wkr_1_C
1482 +4v99_1_EC
1483 +4v99_1_AC
1484 +4v99_1_BH
1485 +4v99_1_CH
1486 +4v99_1_AM
1487 +4v99_1_DC
1488 +4v99_1_JW
1489 +4v99_1_EH
1490 +4v99_1_BW
1491 +4v99_1_FW
1492 +4v99_1_AW
1493 +4v99_1_BC
1494 +4v99_1_BM
1495 +4v99_1_IC
1496 +4v99_1_EM
1497 +4v99_1_ER
1498 +4v99_1_IW
1499 +4v99_1_JH
1500 +4v99_1_JR
1501 +4v99_1_AH
1502 +4v99_1_GR
1503 +4v99_1_IR
1504 +4v99_1_BR
1505 +4v99_1_CW
1506 +4v99_1_HR
1507 +4v99_1_FH
1508 +4v99_1_HC
1509 +4v99_1_DW
1510 +4v99_1_GC
1511 +4v99_1_JC
1512 +4v99_1_DM
1513 +4v99_1_EW
1514 +4v99_1_AR
1515 +4v99_1_CR
1516 +4v99_1_JM
1517 +4v99_1_CC
1518 +4v99_1_IH
1519 +4v99_1_FR
1520 +4v99_1_CM
1521 +4v99_1_IM
1522 +4v99_1_FM
1523 +4v99_1_FC
1524 +4v99_1_GH
1525 +4v99_1_HM
1526 +4v99_1_HH
1527 +4v99_1_DR
1528 +4v99_1_HW
1529 +4v99_1_GW
1530 +4v99_1_DH
1531 +4v99_1_GM
1532 +6rt4_1_D
1533 +6rt4_1_C
1534 +6zvh_1_X
1535 +4dwa_1_D
1536 +6n6c_1_D
1537 +6n6j_1_C
1538 +6n6j_1_D
1539 +6p7q_1_E
1540 +6p7q_1_F
1541 +6p7q_1_D
1542 +6rcl_1_C
1543 +5jju_1_C
1544 +4ejt_1_G
1545 +5ceu_1_C
1546 +5ceu_1_D
1547 +6lkq_1_W
1548 +3qsu_1_P
1549 +3qsu_1_R
1550 +1n38_1_B
1551 +4qvc_1_G
1552 +6q1h_1_D
1553 +6q1h_1_H
1554 +6p7p_1_F
1555 +6p7p_1_E
1556 +6p7p_1_D
1557 +6vm6_1_J
1558 +6vm6_1_G
1559 +6wan_1_K
1560 +6wan_1_H
1561 +6wan_1_G
1562 +6wan_1_L
1563 +6wan_1_I
1564 +6ywo_1_F
1565 +6wan_1_J
1566 +4oau_1_A
1567 +6ywo_1_E
1568 +6ywo_1_K
1569 +6vm6_1_I
1570 +6vm6_1_H
1571 +6ywo_1_I
1572 +2a1r_1_C
1573 +2a1r_1_D
1574 +3gpq_1_E
1575 +3gpq_1_F
1576 +6o79_1_C
1577 +6vm6_1_K
1578 +6hyu_1_D
1579 +1laj_1_R
1580 +6ybv_1_K
1581 +6mpf_1_W
1582 +6spc_1_A
1583 +6spe_1_A
1584 +6fti_1_V
1585 +6ftj_1_V
1586 +6ftg_1_V
1587 +4g0a_1_G
1588 +4g0a_1_F
1589 +4g0a_1_E
1590 +2b2d_1_S
1591 +5hkc_1_C
1592 +1rmv_1_B
1593 +4qu7_1_X
1594 +4qu7_1_V
1595 +4qu7_1_U
1596 +4v5z_1_AH
1597 +4v5z_1_AA
1598 +4v5z_1_AB
1599 +4v5z_1_AC
1600 +4v5z_1_AD
1601 +4v5z_1_AE
1602 +4v5z_1_AF
1603 +4v5z_1_AG
1604 +6pmi_1_3
1605 +6pmj_1_3
1606 +5hjz_1_C
......
This diff could not be displayed because it is too large.
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
11 # - Use a specialised database (SILVA) : better alignments (we guess?), but two kind of jobs 11 # - Use a specialised database (SILVA) : better alignments (we guess?), but two kind of jobs
12 # - Use cmalign --small everywhere (homogeneity) 12 # - Use cmalign --small everywhere (homogeneity)
13 # Moreover, --small requires --nonbanded --cyk, which means the output alignement is the optimally scored one. 13 # Moreover, --small requires --nonbanded --cyk, which means the output alignement is the optimally scored one.
14 -# To date, we trust Infernal as the best tool to realign RNA. Is it ? 14 +# To date, we trust Infernal as the best tool to realign ncRNA. Is it ?
15 15
16 # Contact: louis.becquey@univ-evry.fr (PhD student), fariza.tahi@univ-evry.fr (PI) 16 # Contact: louis.becquey@univ-evry.fr (PhD student), fariza.tahi@univ-evry.fr (PI)
17 17
...@@ -28,7 +28,7 @@ pd.set_option('display.max_rows', None) ...@@ -28,7 +28,7 @@ pd.set_option('display.max_rows', None)
28 LSU_set = ["RF00002", "RF02540", "RF02541", "RF02543", "RF02546"] # From Rfam CLAN 00112 28 LSU_set = ["RF00002", "RF02540", "RF02541", "RF02543", "RF02546"] # From Rfam CLAN 00112
29 SSU_set = ["RF00177", "RF02542", "RF02545", "RF01959", "RF01960"] # From Rfam CLAN 00111 29 SSU_set = ["RF00177", "RF02542", "RF02545", "RF01959", "RF01960"] # From Rfam CLAN 00111
30 30
31 -with sqlite3.connect("results/RNANet.db") as conn: 31 +with sqlite3.connect(os.getcwd()+"/results/RNANet.db") as conn:
32 df = pd.read_sql("SELECT rfam_acc, max_len, nb_total_homol, comput_time, comput_peak_mem FROM family;", conn) 32 df = pd.read_sql("SELECT rfam_acc, max_len, nb_total_homol, comput_time, comput_peak_mem FROM family;", conn)
33 33
34 to_remove = [ f for f in df.rfam_acc if f in LSU_set+SSU_set ] 34 to_remove = [ f for f in df.rfam_acc if f in LSU_set+SSU_set ]
...@@ -74,7 +74,7 @@ ax.set_ylabel("Maximum length of sequences ") ...@@ -74,7 +74,7 @@ ax.set_ylabel("Maximum length of sequences ")
74 ax.set_zlabel("Computation time (s)") 74 ax.set_zlabel("Computation time (s)")
75 75
76 plt.subplots_adjust(wspace=0.4) 76 plt.subplots_adjust(wspace=0.4)
77 -plt.savefig("results/cmalign_jobs_performance.png") 77 +plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png")
78 78
79 # # ======================================================== 79 # # ========================================================
80 # # Linear Regression of max_mem as function of max_length 80 # # Linear Regression of max_mem as function of max_length
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
3 # This file computes additional statistics over the produced dataset. 3 # This file computes additional statistics over the produced dataset.
4 # Run this file if you want the base counts, pair-type counts, identity percents, etc 4 # Run this file if you want the base counts, pair-type counts, identity percents, etc
5 # in the database. 5 # in the database.
6 -# This should be run from the folder where the file is (to access the database with path "results/RNANet.db")
7 6
8 import getopt, os, pickle, sqlite3, shlex, subprocess, sys 7 import getopt, os, pickle, sqlite3, shlex, subprocess, sys
9 import numpy as np 8 import numpy as np
...@@ -22,34 +21,35 @@ from multiprocessing import Pool, Manager ...@@ -22,34 +21,35 @@ from multiprocessing import Pool, Manager
22 from os import path 21 from os import path
23 from tqdm import tqdm 22 from tqdm import tqdm
24 from collections import Counter 23 from collections import Counter
25 -from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker 24 +from setproctitle import setproctitle
25 +from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker, trace_unhandled_exceptions
26 26
27 path_to_3D_data = "tobedefinedbyoptions" 27 path_to_3D_data = "tobedefinedbyoptions"
28 path_to_seq_data = "tobedefinedbyoptions" 28 path_to_seq_data = "tobedefinedbyoptions"
29 +runDir = os.getcwd()
29 res_thr = 20.0 # default: all structures 30 res_thr = 20.0 # default: all structures
30 31
31 LSU_set = ("RF00002", "RF02540", "RF02541", "RF02543", "RF02546") # From Rfam CLAN 00112 32 LSU_set = ("RF00002", "RF02540", "RF02541", "RF02543", "RF02546") # From Rfam CLAN 00112
32 SSU_set = ("RF00177", "RF02542", "RF02545", "RF01959", "RF01960") # From Rfam CLAN 00111 33 SSU_set = ("RF00177", "RF02542", "RF02545", "RF01959", "RF01960") # From Rfam CLAN 00111
33 34
34 -def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0): 35 +@trace_unhandled_exceptions
36 +def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=2.0):
35 """ 37 """
36 Plot the joint distribution of pseudotorsion angles, in a Ramachandran-style graph. 38 Plot the joint distribution of pseudotorsion angles, in a Ramachandran-style graph.
37 - See Wadley & Pyle (2007) 39 + See Wadley & Pyle (2007).
40 + Only unique unmapped chains with resolution < res argument are considered.
38 41
39 Arguments: 42 Arguments:
40 - show: True or False, call plt.show() at this end or not 43 + carbon: 1 or 4, use C4' (eta and theta) or C1' (eta_prime and theta_prime)
41 - filter_helical: None, "form", "zone", or "both" 44 + show: True or False, call plt.show() at this end or not
42 - None: do not remove helical nucleotide 45 + sd_range: tuple, set values below avg + sd_range[0] * stdev to 0,
43 - "form": remove nucleotides if they belong to a A, B or Z form stem 46 + and values above avg + sd_range[1] * stdev to avg + sd_range[1] * stdev.
44 - "zone": remove nucleotides falling in an arbitrary zone (see zone argument) 47 + This removes noise and cuts too high peaks, to clearly see the clusters.
45 - "both": remove nucleotides fulfilling one or both of the above conditions 48 + res: Minimal resolution (maximal resolution value, actually) of the structure to
46 - carbon: 1 or 4, use C4' (eta and theta) or C1' (eta_prime and theta_prime) 49 + consider its nucleotides.
47 - sd_range: tuple, set values below avg + sd_range[0] * stdev to 0,
48 - and values above avg + sd_range[1] * stdev to avg + sd_range[1] * stdev.
49 - This removes noise and cuts too high peaks, to clearly see the clusters.
50 """ 50 """
51 51
52 - os.makedirs("results/figures/wadley_plots/", exist_ok=True) 52 + os.makedirs(runDir + "/results/figures/wadley_plots/", exist_ok=True)
53 53
54 if carbon == 4: 54 if carbon == 4:
55 angle = "eta" 55 angle = "eta"
...@@ -63,30 +63,32 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0): ...@@ -63,30 +63,32 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
63 exit("You overestimate my capabilities !") 63 exit("You overestimate my capabilities !")
64 64
65 65
66 - if not path.isfile(f"data/wadley_kernel_{angle}_{res}A.npz"): 66 + if not path.isfile(runDir + f"/data/wadley_kernel_{angle}_{res}A.npz"):
67 67
68 # Get a worker number to position the progress bar 68 # Get a worker number to position the progress bar
69 global idxQueue 69 global idxQueue
70 thr_idx = idxQueue.get() 70 thr_idx = idxQueue.get()
71 + setproctitle(f"RNANet statistics.py Worker {thr_idx+1} reproduce_wadley_results(carbon={carbon})")
72 +
71 pbar = tqdm(total=2, desc=f"Worker {thr_idx+1}: eta/theta C{carbon} kernels", position=thr_idx+1, leave=False) 73 pbar = tqdm(total=2, desc=f"Worker {thr_idx+1}: eta/theta C{carbon} kernels", position=thr_idx+1, leave=False)
72 74
73 # Extract the angle values of c2'-endo and c3'-endo nucleotides 75 # Extract the angle values of c2'-endo and c3'-endo nucleotides
74 - with sqlite3.connect("results/RNANet.db") as conn: 76 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
75 df = pd.read_sql(f"""SELECT {angle}, th{angle} 77 df = pd.read_sql(f"""SELECT {angle}, th{angle}
76 - FROM nucleotide JOIN ( 78 + FROM (
77 - SELECT chain_id FROM chain JOIN structure 79 + SELECT chain_id FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
78 - WHERE structure.resolution <= {res} 80 + WHERE chain.rfam_acc = 'unmappd' AND structure.resolution <= {res} AND issue = 0
79 - ) AS c 81 + ) AS c NATURAL JOIN nucleotide
80 WHERE puckering="C2'-endo" 82 WHERE puckering="C2'-endo"
81 AND {angle} IS NOT NULL 83 AND {angle} IS NOT NULL
82 AND th{angle} IS NOT NULL;""", conn) 84 AND th{angle} IS NOT NULL;""", conn)
83 c2_endo_etas = df[angle].values.tolist() 85 c2_endo_etas = df[angle].values.tolist()
84 c2_endo_thetas = df["th"+angle].values.tolist() 86 c2_endo_thetas = df["th"+angle].values.tolist()
85 df = pd.read_sql(f"""SELECT {angle}, th{angle} 87 df = pd.read_sql(f"""SELECT {angle}, th{angle}
86 - FROM nucleotide JOIN ( 88 + FROM (
87 - SELECT chain_id FROM chain JOIN structure 89 + SELECT chain_id FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
88 - WHERE structure.resolution <= {res} 90 + WHERE chain.rfam_acc = 'unmappd' AND structure.resolution <= {res} AND issue = 0
89 - ) AS c 91 + ) AS c NATURAL JOIN nucleotide
90 WHERE form = '.' 92 WHERE form = '.'
91 AND puckering="C3'-endo" 93 AND puckering="C3'-endo"
92 AND {angle} IS NOT NULL 94 AND {angle} IS NOT NULL
...@@ -111,14 +113,16 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0): ...@@ -111,14 +113,16 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
111 pbar.update(1) 113 pbar.update(1)
112 114
113 # Save the data to an archive for later use without the need to recompute 115 # Save the data to an archive for later use without the need to recompute
114 - np.savez(f"data/wadley_kernel_{angle}_{res}A.npz", 116 + np.savez(runDir + f"/data/wadley_kernel_{angle}_{res}A.npz",
115 c3_endo_e=c3_endo_etas, c3_endo_t=c3_endo_thetas, 117 c3_endo_e=c3_endo_etas, c3_endo_t=c3_endo_thetas,
116 c2_endo_e=c2_endo_etas, c2_endo_t=c2_endo_thetas, 118 c2_endo_e=c2_endo_etas, c2_endo_t=c2_endo_thetas,
117 kernel_c3=f_c3, kernel_c2=f_c2) 119 kernel_c3=f_c3, kernel_c2=f_c2)
118 pbar.close() 120 pbar.close()
119 idxQueue.put(thr_idx) 121 idxQueue.put(thr_idx)
120 else: 122 else:
121 - f = np.load(f"data/wadley_kernel_{angle}_{res}A.npz") 123 + setproctitle(f"RNANet statistics.py reproduce_wadley_results(carbon={carbon})")
124 +
125 + f = np.load(runDir + f"/data/wadley_kernel_{angle}_{res}A.npz")
122 c2_endo_etas = f["c2_endo_e"] 126 c2_endo_etas = f["c2_endo_e"]
123 c3_endo_etas = f["c3_endo_e"] 127 c3_endo_etas = f["c3_endo_e"]
124 c2_endo_thetas = f["c2_endo_t"] 128 c2_endo_thetas = f["c2_endo_t"]
...@@ -148,7 +152,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0): ...@@ -148,7 +152,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
148 f_low_thr = f.mean() + sd_range[0]*f.std() 152 f_low_thr = f.mean() + sd_range[0]*f.std()
149 f_cut = np.where(f > f_sup_thr, f_sup_thr, f) 153 f_cut = np.where(f > f_sup_thr, f_sup_thr, f)
150 f_cut = np.where(f_cut < f_low_thr, 0, f_cut) 154 f_cut = np.where(f_cut < f_low_thr, 0, f_cut)
151 - levels = [f.mean()+f.std(), f.mean()+2*f.std(), f.mean()+4*f.std()] 155 + levels = [ f.mean()+f.std(), f.mean()+2*f.std(), f.mean()+4*f.std()]
152 156
153 # histogram: 157 # histogram:
154 fig = plt.figure() 158 fig = plt.figure()
...@@ -157,7 +161,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0): ...@@ -157,7 +161,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
157 ax.bar3d(xpos.ravel(), ypos.ravel(), 0.0, 0.09, 0.09, hist_cut.ravel(), color=color_values, zorder="max") 161 ax.bar3d(xpos.ravel(), ypos.ravel(), 0.0, 0.09, 0.09, hist_cut.ravel(), color=color_values, zorder="max")
158 ax.set_xlabel(xlabel) 162 ax.set_xlabel(xlabel)
159 ax.set_ylabel(ylabel) 163 ax.set_ylabel(ylabel)
160 - fig.savefig(f"results/figures/wadley_plots/wadley_hist_{angle}_{l}_{res}A.png") 164 + fig.savefig(runDir + f"/results/figures/wadley_plots/wadley_hist_{angle}_{l}_{res}A.png")
161 if show: 165 if show:
162 fig.show() 166 fig.show()
163 plt.close() 167 plt.close()
...@@ -168,7 +172,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0): ...@@ -168,7 +172,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
168 ax.plot_surface(xx, yy, f_cut, cmap=cm.get_cmap("coolwarm"), linewidth=0, antialiased=True) 172 ax.plot_surface(xx, yy, f_cut, cmap=cm.get_cmap("coolwarm"), linewidth=0, antialiased=True)
169 ax.set_xlabel(xlabel) 173 ax.set_xlabel(xlabel)
170 ax.set_ylabel(ylabel) 174 ax.set_ylabel(ylabel)
171 - fig.savefig(f"results/figures/wadley_plots/wadley_distrib_{angle}_{l}_{res}A.png") 175 + fig.savefig(runDir + f"/results/figures/wadley_plots/wadley_distrib_{angle}_{l}_{res}A.png")
172 if show: 176 if show:
173 fig.show() 177 fig.show()
174 plt.close() 178 plt.close()
...@@ -177,10 +181,10 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0): ...@@ -177,10 +181,10 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
177 fig = plt.figure(figsize=(5,5)) 181 fig = plt.figure(figsize=(5,5))
178 ax = fig.gca() 182 ax = fig.gca()
179 ax.scatter(x, y, s=1, alpha=0.1) 183 ax.scatter(x, y, s=1, alpha=0.1)
180 - ax.contourf(xx, yy, f_cut, alpha=0.5, cmap=cm.get_cmap("coolwarm"), levels=levels, extend="max") 184 + ax.contourf(xx, yy, f, alpha=0.5, cmap=cm.get_cmap("coolwarm"), levels=levels, extend="max")
181 ax.set_xlabel(xlabel) 185 ax.set_xlabel(xlabel)
182 ax.set_ylabel(ylabel) 186 ax.set_ylabel(ylabel)
183 - fig.savefig(f"results/figures/wadley_plots/wadley_{angle}_{l}_{res}A.png") 187 + fig.savefig(runDir + f"/results/figures/wadley_plots/wadley_{angle}_{l}_{res}A.png")
184 if show: 188 if show:
185 fig.show() 189 fig.show()
186 plt.close() 190 plt.close()
...@@ -188,9 +192,12 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0): ...@@ -188,9 +192,12 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
188 192
189 def stats_len(): 193 def stats_len():
190 """Plots statistics on chain lengths in RNA families. 194 """Plots statistics on chain lengths in RNA families.
195 + Uses all chains mapped to a family including copies, inferred or not.
191 196
192 REQUIRES tables chain, nucleotide up to date. 197 REQUIRES tables chain, nucleotide up to date.
193 """ 198 """
199 +
200 + setproctitle(f"RNANet statistics.py stats_len({res_thr})")
194 201
195 # Get a worker number to position the progress bar 202 # Get a worker number to position the progress bar
196 global idxQueue 203 global idxQueue
...@@ -214,7 +221,7 @@ def stats_len(): ...@@ -214,7 +221,7 @@ def stats_len():
214 cols = [] 221 cols = []
215 lengths = [] 222 lengths = []
216 223
217 - for i,f in enumerate(tqdm(fam_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: Average chain lengths", leave=False)): 224 + for f in tqdm(fam_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: Average chain lengths", leave=False):
218 225
219 # Define a color for that family in the plot 226 # Define a color for that family in the plot
220 if f in LSU_set: 227 if f in LSU_set:
...@@ -229,7 +236,7 @@ def stats_len(): ...@@ -229,7 +236,7 @@ def stats_len():
229 cols.append("grey") 236 cols.append("grey")
230 237
231 # Get the lengths of chains 238 # Get the lengths of chains
232 - with sqlite3.connect("results/RNANet.db") as conn: 239 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
233 l = [ x[0] for x in sql_ask_database(conn, f"""SELECT COUNT(index_chain) 240 l = [ x[0] for x in sql_ask_database(conn, f"""SELECT COUNT(index_chain)
234 FROM ( 241 FROM (
235 SELECT chain_id 242 SELECT chain_id
...@@ -239,8 +246,6 @@ def stats_len(): ...@@ -239,8 +246,6 @@ def stats_len():
239 GROUP BY chain_id;""", warn_every=0) ] 246 GROUP BY chain_id;""", warn_every=0) ]
240 lengths.append(l) # list of chain lengths from the family 247 lengths.append(l) # list of chain lengths from the family
241 248
242 - # notify(f"[{i+1}/{len(fam_list)}] Computed {f} chains lengths")
243 -
244 # Plot the figure 249 # Plot the figure
245 fig = plt.figure(figsize=(10,3)) 250 fig = plt.figure(figsize=(10,3))
246 ax = fig.gca() 251 ax = fig.gca()
...@@ -267,7 +272,7 @@ def stats_len(): ...@@ -267,7 +272,7 @@ def stats_len():
267 ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5)) 272 ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5))
268 273
269 # Save the figure 274 # Save the figure
270 - fig.savefig(f"results/figures/lengths_{res_thr}A.png") 275 + fig.savefig(runDir + f"/results/figures/lengths_{res_thr}A.png")
271 idxQueue.put(thr_idx) # replace the thread index in the queue 276 idxQueue.put(thr_idx) # replace the thread index in the queue
272 # notify("Computed sequence length statistics and saved the figure.") 277 # notify("Computed sequence length statistics and saved the figure.")
273 278
...@@ -285,6 +290,7 @@ def format_percentage(tot, x): ...@@ -285,6 +290,7 @@ def format_percentage(tot, x):
285 290
286 def stats_freq(): 291 def stats_freq():
287 """Computes base frequencies in all RNA families. 292 """Computes base frequencies in all RNA families.
293 + Uses all chains mapped to a family including copies, inferred or not.
288 294
289 Outputs results/frequencies.csv 295 Outputs results/frequencies.csv
290 REQUIRES tables chain, nucleotide up to date.""" 296 REQUIRES tables chain, nucleotide up to date."""
...@@ -293,17 +299,18 @@ def stats_freq(): ...@@ -293,17 +299,18 @@ def stats_freq():
293 global idxQueue 299 global idxQueue
294 thr_idx = idxQueue.get() 300 thr_idx = idxQueue.get()
295 301
302 + setproctitle(f"RNANet statistics.py Worker {thr_idx+1} stats_freq()")
303 +
296 # Initialize a Counter object for each family 304 # Initialize a Counter object for each family
297 freqs = {} 305 freqs = {}
298 for f in fam_list: 306 for f in fam_list:
299 freqs[f] = Counter() 307 freqs[f] = Counter()
300 308
301 # List all nt_names happening within a RNA family and store the counts in the Counter 309 # List all nt_names happening within a RNA family and store the counts in the Counter
302 - for i,f in enumerate(tqdm(fam_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: Base frequencies", leave=False)): 310 + for f in tqdm(fam_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: Base frequencies", leave=False):
303 - with sqlite3.connect("results/RNANet.db") as conn: 311 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
304 counts = dict(sql_ask_database(conn, f"SELECT nt_name, COUNT(nt_name) FROM (SELECT chain_id from chain WHERE rfam_acc='{f}') NATURAL JOIN nucleotide GROUP BY nt_name;", warn_every=0)) 312 counts = dict(sql_ask_database(conn, f"SELECT nt_name, COUNT(nt_name) FROM (SELECT chain_id from chain WHERE rfam_acc='{f}') NATURAL JOIN nucleotide GROUP BY nt_name;", warn_every=0))
305 freqs[f].update(counts) 313 freqs[f].update(counts)
306 - # notify(f"[{i+1}/{len(fam_list)}] Computed {f} nucleotide frequencies.")
307 314
308 # Create a pandas DataFrame, and save it to CSV. 315 # Create a pandas DataFrame, and save it to CSV.
309 df = pd.DataFrame() 316 df = pd.DataFrame()
...@@ -311,7 +318,7 @@ def stats_freq(): ...@@ -311,7 +318,7 @@ def stats_freq():
311 tot = sum(freqs[f].values()) 318 tot = sum(freqs[f].values())
312 df = pd.concat([ df, pd.DataFrame([[ format_percentage(tot, x) for x in freqs[f].values() ]], columns=list(freqs[f]), index=[f]) ]) 319 df = pd.concat([ df, pd.DataFrame([[ format_percentage(tot, x) for x in freqs[f].values() ]], columns=list(freqs[f]), index=[f]) ])
313 df = df.fillna(0) 320 df = df.fillna(0)
314 - df.to_csv("results/frequencies.csv") 321 + df.to_csv(runDir + "/results/frequencies.csv")
315 idxQueue.put(thr_idx) # replace the thread index in the queue 322 idxQueue.put(thr_idx) # replace the thread index in the queue
316 # notify("Saved nucleotide frequencies to CSV file.") 323 # notify("Saved nucleotide frequencies to CSV file.")
317 324
...@@ -327,11 +334,13 @@ def parallel_stats_pairs(f): ...@@ -327,11 +334,13 @@ def parallel_stats_pairs(f):
327 global idxQueue 334 global idxQueue
328 thr_idx = idxQueue.get() 335 thr_idx = idxQueue.get()
329 336
337 + setproctitle(f"RNANet statistics.py Worker {thr_idx+1} p_stats_pairs({f})")
338 +
330 chain_id_list = mappings_list[f] 339 chain_id_list = mappings_list[f]
331 data = [] 340 data = []
332 sqldata = [] 341 sqldata = []
333 for cid in tqdm(chain_id_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} basepair types", leave=False): 342 for cid in tqdm(chain_id_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} basepair types", leave=False):
334 - with sqlite3.connect("results/RNANet.db") as conn: 343 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
335 # Get comma separated lists of basepairs per nucleotide 344 # Get comma separated lists of basepairs per nucleotide
336 interactions = pd.DataFrame( 345 interactions = pd.DataFrame(
337 sql_ask_database(conn, 346 sql_ask_database(conn,
...@@ -398,7 +407,7 @@ def parallel_stats_pairs(f): ...@@ -398,7 +407,7 @@ def parallel_stats_pairs(f):
398 data.append(expanded_list) 407 data.append(expanded_list)
399 408
400 # Update the database 409 # Update the database
401 - with sqlite3.connect("results/RNANet.db", isolation_level=None) as conn: 410 + with sqlite3.connect(runDir + "/results/RNANet.db", isolation_level=None) as conn:
402 conn.execute('pragma journal_mode=wal') # Allow multiple other readers to ask things while we execute this writing query 411 conn.execute('pragma journal_mode=wal') # Allow multiple other readers to ask things while we execute this writing query
403 sql_execute(conn, """UPDATE chain SET pair_count_cWW = ?, pair_count_cWH = ?, pair_count_cWS = ?, pair_count_cHH = ?, 412 sql_execute(conn, """UPDATE chain SET pair_count_cWW = ?, pair_count_cWH = ?, pair_count_cWS = ?, pair_count_cHH = ?,
404 pair_count_cHS = ?, pair_count_cSS = ?, pair_count_tWW = ?, pair_count_tWH = ?, pair_count_tWS = ?, 413 pair_count_cHS = ?, pair_count_cSS = ?, pair_count_tWW = ?, pair_count_tWH = ?, pair_count_tWS = ?,
...@@ -416,8 +425,8 @@ def parallel_stats_pairs(f): ...@@ -416,8 +425,8 @@ def parallel_stats_pairs(f):
416 425
417 # Create an output DataFrame 426 # Create an output DataFrame
418 f_df = pd.DataFrame([[ x for x in cnt.values() ]], columns=list(cnt), index=[f]) 427 f_df = pd.DataFrame([[ x for x in cnt.values() ]], columns=list(cnt), index=[f])
419 - f_df.to_csv(f"data/{f}_counts.csv") 428 + f_df.to_csv(runDir + f"/data/{f}_counts.csv")
420 - expanded_list.to_csv(f"data/{f}_pairs.csv") 429 + expanded_list.to_csv(runDir + f"/data/{f}_pairs.csv")
421 430
422 idxQueue.put(thr_idx) # replace the thread index in the queue 431 idxQueue.put(thr_idx) # replace the thread index in the queue
423 432
...@@ -430,28 +439,34 @@ def to_dist_matrix(f): ...@@ -430,28 +439,34 @@ def to_dist_matrix(f):
430 global idxQueue 439 global idxQueue
431 thr_idx = idxQueue.get() 440 thr_idx = idxQueue.get()
432 441
433 - # notify(f"Computing {f} distance matrix from alignment...") 442 + setproctitle(f"RNANet statistics.py Worker {thr_idx+1} to_dist_matrix({f})")
434 - command = f"esl-alipid --rna --noheader --informat stockholm {f}_3d_only.stk"
435 443
436 # Prepare a file 444 # Prepare a file
437 with open(path_to_seq_data+f"/realigned/{f}++.afa") as al_file: 445 with open(path_to_seq_data+f"/realigned/{f}++.afa") as al_file:
438 al = AlignIO.read(al_file, "fasta") 446 al = AlignIO.read(al_file, "fasta")
439 names = [ x.id for x in al if '[' in x.id ] 447 names = [ x.id for x in al if '[' in x.id ]
440 al = al[-len(names):] 448 al = al[-len(names):]
441 - with open(f + "_3d_only.stk", "w") as only_3d: 449 + with open(path_to_seq_data+f"/realigned/{f}_3d_only_tmp.stk", "w") as only_3d:
442 - only_3d.write(al.format("stockholm")) 450 + try:
451 + only_3d.write(al.format("stockholm"))
452 + except ValueError as e:
453 + warn(e)
443 del al 454 del al
455 + subprocess.run(["esl-reformat", "--informat", "stockholm", "--mingap", "-o", path_to_seq_data+f"/realigned/{f}_3d_only.stk", "stockholm", path_to_seq_data+f"/realigned/{f}_3d_only_tmp.stk"])
444 456
445 # Prepare the job 457 # Prepare the job
446 - process = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE) 458 + process = subprocess.Popen(shlex.split(f"esl-alipid --rna --noheader --informat stockholm {path_to_seq_data}realigned/{f}_3d_only.stk"),
459 + stdout=subprocess.PIPE, stderr=subprocess.PIPE)
447 id_matrix = np.zeros((len(names), len(names))) 460 id_matrix = np.zeros((len(names), len(names)))
448 461
449 pbar = tqdm(total = len(names)*(len(names)-1)*0.5, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} idty matrix", leave=False) 462 pbar = tqdm(total = len(names)*(len(names)-1)*0.5, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} idty matrix", leave=False)
450 - while process.poll() is None: 463 + cnt = 0
451 - output = process.stdout.readline() 464 + while not cnt or process.poll() is None:
465 + output = process.stdout.read()
452 if output: 466 if output:
453 lines = output.strip().split(b'\n') 467 lines = output.strip().split(b'\n')
454 for l in lines: 468 for l in lines:
469 + cnt += 1
455 line = l.split() 470 line = l.split()
456 s1 = line[0].decode('utf-8') 471 s1 = line[0].decode('utf-8')
457 s2 = line[1].decode('utf-8') 472 s2 = line[1].decode('utf-8')
...@@ -460,9 +475,14 @@ def to_dist_matrix(f): ...@@ -460,9 +475,14 @@ def to_dist_matrix(f):
460 id2 = names.index(s2) 475 id2 = names.index(s2)
461 id_matrix[id1, id2] = float(score) 476 id_matrix[id1, id2] = float(score)
462 pbar.update(1) 477 pbar.update(1)
478 + if cnt != len(names)*(len(names)-1)*0.5:
479 + warn(f"{f} got {cnt} updates on {len(names)*(len(names)-1)*0.5}")
480 + if process.poll() != 0:
481 + l = process.stderr.read().strip().split(b'\n')
482 + warn("\n".join([ line.decode('utf-8') for line in l ]))
463 pbar.close() 483 pbar.close()
464 484
465 - subprocess.run(["rm", "-f", f + "_3d_only.stk"]) 485 + subprocess.run(["rm", "-f", f + "_3d_only_tmp.stk"])
466 np.save("data/"+f+".npy", id_matrix) 486 np.save("data/"+f+".npy", id_matrix)
467 idxQueue.put(thr_idx) # replace the thread index in the queue 487 idxQueue.put(thr_idx) # replace the thread index in the queue
468 return 0 488 return 0
...@@ -471,21 +491,26 @@ def seq_idty(): ...@@ -471,21 +491,26 @@ def seq_idty():
471 """Computes identity matrices for each of the RNA families. 491 """Computes identity matrices for each of the RNA families.
472 492
473 REQUIRES temporary results files in data/*.npy 493 REQUIRES temporary results files in data/*.npy
474 - REQUIRES tables chain, family un to date.""" 494 + REQUIRES tables chain, family up to date."""
475 495
476 # load distance matrices 496 # load distance matrices
497 + fams_to_plot = [ f for f in famlist if f not in ignored ]
477 fam_arrays = [] 498 fam_arrays = []
478 - for f in famlist: 499 + for f in fams_to_plot:
479 if path.isfile("data/"+f+".npy"): 500 if path.isfile("data/"+f+".npy"):
480 - fam_arrays.append(np.load("data/"+f+".npy")) 501 + fam_arrays.append(np.load("data/"+f+".npy") / 100.0) # normalize percentages in [0,1]
481 else: 502 else:
482 - fam_arrays.append([]) 503 + warn("data/"+f+".npy not found !")
504 + fam_arrays.append(np.array([]))
483 505
484 # Update database with identity percentages 506 # Update database with identity percentages
485 - conn = sqlite3.connect("results/RNANet.db") 507 + conn = sqlite3.connect(runDir + "/results/RNANet.db")
486 - for f, D in zip(famlist, fam_arrays): 508 + for f, D in zip(fams_to_plot, fam_arrays):
487 if not len(D): continue 509 if not len(D): continue
488 - a = 1.0 - np.average(D + D.T) # Get symmetric matrix instead of lower triangle + convert from distance matrix to identity matrix 510 + if D.shape[0] > 1:
511 + a = np.sum(D) * 2 / D.shape[0] / (D.shape[0] - 1) # SUM(D) / (n(n-1)/2)
512 + else:
513 + a = D[0][0]
489 conn.execute(f"UPDATE family SET idty_percent = {round(float(a),2)} WHERE rfam_acc = '{f}';") 514 conn.execute(f"UPDATE family SET idty_percent = {round(float(a),2)} WHERE rfam_acc = '{f}';")
490 conn.commit() 515 conn.commit()
491 conn.close() 516 conn.close()
...@@ -495,10 +520,11 @@ def seq_idty(): ...@@ -495,10 +520,11 @@ def seq_idty():
495 axs = axs.ravel() 520 axs = axs.ravel()
496 [axi.set_axis_off() for axi in axs] 521 [axi.set_axis_off() for axi in axs]
497 im = "" # Just to declare the variable, it will be set in the loop 522 im = "" # Just to declare the variable, it will be set in the loop
498 - for f, D, ax in zip(famlist, fam_arrays, axs): 523 + for f, D, ax in zip(fams_to_plot, fam_arrays, axs):
499 - if not len(D): continue 524 + D = D + D.T # Copy the lower triangle to upper, to get a symetrical matrix
500 if D.shape[0] > 2: # Cluster only if there is more than 2 sequences to organize 525 if D.shape[0] > 2: # Cluster only if there is more than 2 sequences to organize
501 - D = D + D.T # Copy the lower triangle to upper, to get a symetrical matrix 526 + D = 1.0 - D
527 + np.fill_diagonal(D, 0.0)
502 condensedD = squareform(D) 528 condensedD = squareform(D)
503 529
504 # Compute basic dendrogram by Ward's method 530 # Compute basic dendrogram by Ward's method
...@@ -507,15 +533,20 @@ def seq_idty(): ...@@ -507,15 +533,20 @@ def seq_idty():
507 533
508 # Reorganize rows and cols 534 # Reorganize rows and cols
509 idx1 = Z['leaves'] 535 idx1 = Z['leaves']
510 - D = D[idx1,:] 536 + D = D[idx1[::-1],:]
511 D = D[:,idx1[::-1]] 537 D = D[:,idx1[::-1]]
512 - im = ax.matshow(1.0 - D, vmin=0, vmax=1, origin='lower') # convert to identity matrix 1 - D from distance matrix D 538 + D = 1.0 - D
513 - ax.set_title(f + "\n(" + str(len(mappings_list[f]))+ " chains)", fontsize=10) 539 + elif D.shape[0] == 2:
540 + np.fill_diagonal(D, 1.0) # the diagonal has been ignored until now
541 + ax.text(np.floor(D.shape[0]/2.0)-(0.5 if not D.shape[0]%2 else 0), -0.5, f + "\n(" + str(D.shape[0]) + " chains)",
542 + fontsize=9, horizontalalignment = 'center', verticalalignment='bottom')
543 + im = ax.matshow(D, vmin=0, vmax=1)
544 +
514 fig.tight_layout() 545 fig.tight_layout()
515 - fig.subplots_adjust(wspace=0.1, hspace=0.3) 546 + fig.subplots_adjust(hspace=0.3, wspace=0.1)
516 - fig.colorbar(im, ax=axs[-1], shrink=0.8) 547 + fig.colorbar(im, ax=axs[-4], shrink=0.8)
517 - fig.savefig(f"results/figures/distances.png") 548 + fig.savefig(runDir + f"/results/figures/distances.png")
518 - notify("Computed all identity matrices and saved the figure.") 549 + print("> Computed all identity matrices and saved the figure.", flush=True)
519 550
520 def stats_pairs(): 551 def stats_pairs():
521 """Counts occurrences of intra-chain base-pair types in RNA families 552 """Counts occurrences of intra-chain base-pair types in RNA families
...@@ -523,6 +554,8 @@ def stats_pairs(): ...@@ -523,6 +554,8 @@ def stats_pairs():
523 Creates a temporary results file in data/pair_counts.csv, and a results file in results/pairings.csv. 554 Creates a temporary results file in data/pair_counts.csv, and a results file in results/pairings.csv.
524 REQUIRES tables chain, nucleotide up-to-date.""" 555 REQUIRES tables chain, nucleotide up-to-date."""
525 556
557 + setproctitle(f"RNANet statistics.py stats_pairs()")
558 +
526 def line_format(family_data): 559 def line_format(family_data):
527 return family_data.apply(partial(format_percentage, sum(family_data))) 560 return family_data.apply(partial(format_percentage, sum(family_data)))
528 561
...@@ -530,12 +563,12 @@ def stats_pairs(): ...@@ -530,12 +563,12 @@ def stats_pairs():
530 results = [] 563 results = []
531 allpairs = [] 564 allpairs = []
532 for f in fam_list: 565 for f in fam_list:
533 - newpairs = pd.read_csv(f"data/{f}_pairs.csv", index_col=0) 566 + newpairs = pd.read_csv(runDir + f"/data/{f}_pairs.csv", index_col=0)
534 - fam_df = pd.read_csv(f"data/{f}_counts.csv", index_col=0) 567 + fam_df = pd.read_csv(runDir + f"/data/{f}_counts.csv", index_col=0)
535 results.append(fam_df) 568 results.append(fam_df)
536 allpairs.append(newpairs) 569 allpairs.append(newpairs)
537 - subprocess.run(["rm", "-f", f"data/{f}_pairs.csv"]) 570 + subprocess.run(["rm", "-f", runDir + f"/data/{f}_pairs.csv"])
538 - subprocess.run(["rm", "-f", f"data/{f}_counts.csv"]) 571 + subprocess.run(["rm", "-f", runDir + f"/data/{f}_counts.csv"])
539 all_pairs = pd.concat(allpairs) 572 all_pairs = pd.concat(allpairs)
540 df = pd.concat(results).fillna(0) 573 df = pd.concat(results).fillna(0)
541 df.to_csv("data/pair_counts.csv") 574 df.to_csv("data/pair_counts.csv")
...@@ -573,14 +606,14 @@ def stats_pairs(): ...@@ -573,14 +606,14 @@ def stats_pairs():
573 crosstab = crosstab[["AU", "GC", "Wobble", "Other"]] 606 crosstab = crosstab[["AU", "GC", "Wobble", "Other"]]
574 607
575 # Save to CSV 608 # Save to CSV
576 - df.to_csv("results/pair_types.csv") 609 + df.to_csv(runDir + "/results/pair_types.csv")
577 610
578 # Plot barplot of overall types 611 # Plot barplot of overall types
579 ax = crosstab.plot(figsize=(8,5), kind='bar', stacked=True, log=False, fontsize=13) 612 ax = crosstab.plot(figsize=(8,5), kind='bar', stacked=True, log=False, fontsize=13)
580 ax.set_ylabel("Number of observations (millions)", fontsize=13) 613 ax.set_ylabel("Number of observations (millions)", fontsize=13)
581 ax.set_xlabel(None) 614 ax.set_xlabel(None)
582 plt.subplots_adjust(left=0.1, bottom=0.16, top=0.95, right=0.99) 615 plt.subplots_adjust(left=0.1, bottom=0.16, top=0.95, right=0.99)
583 - plt.savefig("results/figures/pairings.png") 616 + plt.savefig(runDir + "/results/figures/pairings.png")
584 617
585 notify("Computed nucleotide statistics and saved CSV and PNG file.") 618 notify("Computed nucleotide statistics and saved CSV and PNG file.")
586 619
...@@ -588,8 +621,10 @@ def per_chain_stats(): ...@@ -588,8 +621,10 @@ def per_chain_stats():
588 """Computes per-chain frequencies and base-pair type counts. 621 """Computes per-chain frequencies and base-pair type counts.
589 622
590 REQUIRES tables chain, nucleotide up to date. """ 623 REQUIRES tables chain, nucleotide up to date. """
624 +
625 + setproctitle(f"RNANet statistics.py per_chain_stats()")
591 626
592 - with sqlite3.connect("results/RNANet.db", isolation_level=None) as conn: 627 + with sqlite3.connect(runDir + "/results/RNANet.db", isolation_level=None) as conn:
593 # Compute per-chain nucleotide frequencies 628 # Compute per-chain nucleotide frequencies
594 df = pd.read_sql("SELECT SUM(is_A) as A, SUM(is_C) AS C, SUM(is_G) AS G, SUM(is_U) AS U, SUM(is_other) AS O, chain_id FROM nucleotide GROUP BY chain_id;", conn) 629 df = pd.read_sql("SELECT SUM(is_A) as A, SUM(is_C) AS C, SUM(is_G) AS G, SUM(is_U) AS U, SUM(is_other) AS O, chain_id FROM nucleotide GROUP BY chain_id;", conn)
595 df["total"] = pd.Series(df.A + df.C + df.G + df.U + df.O, dtype=np.float64) 630 df["total"] = pd.Series(df.A + df.C + df.G + df.U + df.O, dtype=np.float64)
...@@ -600,35 +635,143 @@ def per_chain_stats(): ...@@ -600,35 +635,143 @@ def per_chain_stats():
600 conn.execute('pragma journal_mode=wal') 635 conn.execute('pragma journal_mode=wal')
601 sql_execute(conn, "UPDATE chain SET chain_freq_A = ?, chain_freq_C = ?, chain_freq_G = ?, chain_freq_U = ?, chain_freq_other = ? WHERE chain_id= ?;", 636 sql_execute(conn, "UPDATE chain SET chain_freq_A = ?, chain_freq_C = ?, chain_freq_G = ?, chain_freq_U = ?, chain_freq_other = ? WHERE chain_id= ?;",
602 many=True, data=list(df.to_records(index=False)), warn_every=10) 637 many=True, data=list(df.to_records(index=False)), warn_every=10)
603 - notify("Updated the database with per-chain base frequencies") 638 + print("> Updated the database with per-chain base frequencies", flush=True)
604 639
605 def general_stats(): 640 def general_stats():
606 """ 641 """
607 Number of structures as function of the resolution threshold 642 Number of structures as function of the resolution threshold
608 Number of Rfam families as function of the resolution threshold 643 Number of Rfam families as function of the resolution threshold
609 """ 644 """
610 - with sqlite3.connect("results/RNANet.db") as conn: 645 +
611 - df_unique = pd.read_sql(f"""SELECT distinct pdb_id, chain_name, exp_method, resolution 646 + setproctitle(f"RNANet statistics.py general_stats()")
612 - FROM chain JOIN structure ON chain.structure_id = structure.pdb_id 647 +
613 - WHERE rfam_acc = 'unmappd' AND ISSUE=0;""", conn) 648 + reqs = [
614 - df_mapped_unique = pd.read_sql(f"""SELECT distinct pdb_id, chain_name, exp_method, resolution 649 + # unique unmapped chains with no issues
615 - FROM chain JOIN structure ON chain.structure_id = structure.pdb_id 650 + """ SELECT distinct pdb_id, chain_name, exp_method, resolution
616 - WHERE rfam_acc != 'unmappd' AND ISSUE=0;""", conn) 651 + FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
617 - df_mapped_copies = pd.read_sql(f"""SELECT pdb_id, chain_name, inferred, rfam_acc, pdb_start, pdb_end, exp_method, resolution 652 + WHERE rfam_acc = 'unmappd' AND ISSUE=0;""",
618 - FROM chain JOIN structure ON chain.structure_id = structure.pdb_id 653 +
619 - WHERE rfam_acc != 'unmappd' AND ISSUE=0;""", conn) 654 + # unique mapped chains with no issues
620 - df_inferred_only_unique = pd.read_sql(f"""SELECT DISTINCT pdb_id, c.chain_name, exp_method, resolution 655 + """ SELECT distinct pdb_id, chain_name, exp_method, resolution
621 - FROM (SELECT inferred, rfam_acc, pdb_start, pdb_end, chain.structure_id, chain.chain_name, r.redundancy, r.inf_redundancy 656 + FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
622 - FROM chain 657 + WHERE rfam_acc != 'unmappd' AND ISSUE=0;""",
623 - JOIN (SELECT structure_id, chain_name, COUNT(distinct rfam_acc) AS redundancy, SUM(inferred) AS inf_redundancy 658 +
624 - FROM chain 659 + # mapped chains with no issues
625 - WHERE rfam_acc != 'unmappd' AND issue=0 660 + """ SELECT pdb_id, chain_name, inferred, rfam_acc, pdb_start, pdb_end, exp_method, resolution
626 - GROUP BY structure_id, chain_name 661 + FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
627 - ) AS r ON chain.structure_id=r.structure_id AND chain.chain_name = r.chain_name 662 + WHERE rfam_acc != 'unmappd' AND ISSUE=0;""",
628 - WHERE r.redundancy=r.inf_redundancy AND rfam_acc != 'unmappd' and issue=0 663 +
629 - ) AS c 664 + # mapped chains with no issues that are all inferred
630 - JOIN structure ON c.structure_id=structure.pdb_id;""", conn) 665 + """ SELECT DISTINCT pdb_id, c.chain_name, exp_method, resolution
631 - print("> found", len(df_inferred_only_unique.index), "chains which are mapped only by inference using BGSU NR Lists.") 666 + FROM (
667 + SELECT inferred, rfam_acc, pdb_start, pdb_end, chain.structure_id, chain.chain_name, r.redundancy, r.inf_redundancy
668 + FROM chain
669 + JOIN (SELECT structure_id, chain_name, COUNT(distinct rfam_acc) AS redundancy, SUM(inferred) AS inf_redundancy
670 + FROM chain
671 + WHERE rfam_acc != 'unmappd' AND issue=0
672 + GROUP BY structure_id, chain_name
673 + ) AS r ON chain.structure_id=r.structure_id AND chain.chain_name = r.chain_name
674 + WHERE r.redundancy=r.inf_redundancy AND rfam_acc != 'unmappd' and issue=0
675 + ) AS c
676 + JOIN structure ON c.structure_id=structure.pdb_id;""",
677 +
678 + # Number of mapped chains (not inferred)
679 + """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 0);""",
680 +
681 + # Number of unique mapped chains (not inferred)
682 + """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 0);""",
683 +
684 + # Number of mapped chains (inferred)
685 + """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 1);""",
686 +
687 + # Number of unique mapped chains (inferred)
688 + """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 1);""",
689 +
690 + # Number of mapped chains inferred once
691 + """SELECT count(*) FROM (
692 + SELECT structure_id, chain_name, COUNT(DISTINCT rfam_acc) as c
693 + FROM chain where rfam_acc!='unmappd' and inferred=1
694 + GROUP BY structure_id, chain_name
695 + ) WHERE c=1;""",
696 +
697 + # Number of mapped chains inferred twice
698 + """select count(*) from (
699 + select structure_id, chain_name, count(distinct rfam_acc) as c
700 + from chain where rfam_acc!='unmappd' and inferred=1
701 + group by structure_id, chain_name
702 + ) where c=2;""",
703 +
704 + # Number of mapped chains inferred 3 times or more
705 + """select count(*) from (
706 + select structure_id, chain_name, count(distinct rfam_acc) as c
707 + from chain where rfam_acc!='unmappd' and inferred=1
708 + group by structure_id, chain_name
709 + ) where c>2;""",
710 +
711 + # Number of chains both mapped with and without inferrence
712 + """ SELECT COUNT(*) FROM (
713 + SELECT structure_id, chain_name, sum(inferred) AS s, COUNT(rfam_acc) AS c
714 + FROM chain
715 + WHERE rfam_acc!='unmappd'
716 + GROUP BY structure_id, chain_name
717 + )
718 + WHERE s < c AND s > 0;""",
719 +
720 + # Number of mapped chains (total)
721 + """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd');""",
722 +
723 + # Number of unique mapped chains
724 + """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd');""",
725 +
726 + # Number of unmapped chains
727 + """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc = 'unmappd');""",
728 +
729 + # Number of mapped chains without issues (not inferred)
730 + """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 0 AND issue = 0);""",
731 +
732 + # Number of unique mapped chains without issues (not inferred)
733 + """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 0 AND issue = 0);""",
734 +
735 + # Number of mapped chains without issues (inferred)
736 + """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 1 AND issue=0);""",
737 +
738 + # Number of unique mapped chains without issues (inferred)
739 + """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 1 AND issue=0);""",
740 +
741 + # Number of mapped chains without issues (total)
742 + """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND issue=0);""",
743 +
744 + # Number of unique mapped chains without issues
745 + """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND issue=0);""",
746 +
747 + # Number of unmapped chains without issues
748 + """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc = 'unmappd' AND issue=0);"""
749 + ]
750 +
751 + answers = []
752 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
753 + for r in reqs:
754 + answers.append(pd.read_sql(r, conn))
755 + df_unique = answers[0]
756 + df_mapped_unique = answers[1]
757 + df_mapped_copies = answers[2]
758 + df_inferred_only_unique = answers[3]
759 + print()
760 + print("> found", answers[4].iloc[0][0], f"chains ({answers[5].iloc[0][0]} unique chains) that are mapped thanks to Rfam. Removing chains with issues, only {answers[15].iloc[0][0]} ({answers[16].iloc[0][0]} unique)")
761 + if answers[4].iloc[0][0] != answers[5].iloc[0][0]:
762 + print("\t> This happens because different parts of the same chain can be mapped to different families.")
763 + print("> found", answers[6].iloc[0][0], f"chains ({answers[7].iloc[0][0]} unique chains) that are mapped by inferrence. Removing chains with issues, only {answers[17].iloc[0][0]} ({answers[18].iloc[0][0]} unique).")
764 + print("\t> ", answers[8].iloc[0][0], "chains are mapped only once,")
765 + print("\t> ", answers[9].iloc[0][0], "are mapped to 2 families,")
766 + print("\t> ", answers[10].iloc[0][0], "are mapped to 3 or more.")
767 + print("> Among them,", answers[11].iloc[0][0], "chains are mapped both with families found on Rfam and by inferrence.")
768 + if answers[11].iloc[0][0]:
769 + print("\t> this is normal if you used option -f (--full-inference). Otherwise, there might be a problem.")
770 + print("> TOTAL:", answers[12].iloc[0][0], f"chains ({answers[13].iloc[0][0]} unique chains) mapped to a family. Removing chains with issues, only {answers[19].iloc[0][0]} ({answers[20].iloc[0][0]} unique).")
771 + print("> TOTAL:", answers[14].iloc[0][0], f"unmapped chains. Removing chains with issues, {answers[21].iloc[0][0]}.")
772 + if answers[14].iloc[0][0]:
773 + print("\t> this is normal if you used option --no-homology. Otherwise, there might be a problem.")
774 + print()
632 775
633 ########################################## 776 ##########################################
634 # plot N = f(resolution, exp_method) 777 # plot N = f(resolution, exp_method)
...@@ -642,7 +785,7 @@ def general_stats(): ...@@ -642,7 +785,7 @@ def general_stats():
642 df_inferred_only_unique.sort_values('resolution', inplace=True, ignore_index=True) 785 df_inferred_only_unique.sort_values('resolution', inplace=True, ignore_index=True)
643 df_mapped_copies.sort_values('resolution', inplace=True, ignore_index=True) 786 df_mapped_copies.sort_values('resolution', inplace=True, ignore_index=True)
644 max_res = max(df_unique.resolution) 787 max_res = max(df_unique.resolution)
645 - max_structs = len(df_mapped_copies.index.tolist()) 788 + max_structs = max(len(df_mapped_copies.index), len(df_unique.index))
646 colors = np.linspace(0,1,1+len(methods)) 789 colors = np.linspace(0,1,1+len(methods))
647 plt.xticks( np.arange(0, max_res+2, 2.0).tolist(), np.arange(0, max_res+2, 2.0).tolist() ) 790 plt.xticks( np.arange(0, max_res+2, 2.0).tolist(), np.arange(0, max_res+2, 2.0).tolist() )
648 791
...@@ -654,7 +797,7 @@ def general_stats(): ...@@ -654,7 +797,7 @@ def general_stats():
654 axs[0][0].set_ylabel("ALL", fontsize=14) 797 axs[0][0].set_ylabel("ALL", fontsize=14)
655 axs[0][0].set_title("Number of unique RNA chains", fontsize=14) 798 axs[0][0].set_title("Number of unique RNA chains", fontsize=14)
656 axs[0][0].set_ylim((0, max_structs * 1.05)) 799 axs[0][0].set_ylim((0, max_structs * 1.05))
657 - axs[0][0].legend(loc="best", fontsize=14) 800 + axs[0][0].legend(loc="lower right", fontsize=14)
658 801
659 axs[0][1].grid(axis='y', ls='dotted', lw=1) 802 axs[0][1].grid(axis='y', ls='dotted', lw=1)
660 axs[0][1].set_yticklabels([]) 803 axs[0][1].set_yticklabels([])
...@@ -663,9 +806,9 @@ def general_stats(): ...@@ -663,9 +806,9 @@ def general_stats():
663 axs[0][1].hist(df_inferred_only_unique.resolution, bins=np.arange(0, max_res, 0.5), fc=(0.2, 0, colors[0], 0.5), cumulative=True, label='only by inference') 806 axs[0][1].hist(df_inferred_only_unique.resolution, bins=np.arange(0, max_res, 0.5), fc=(0.2, 0, colors[0], 0.5), cumulative=True, label='only by inference')
664 axs[0][1].text(0.95*max_res, 0.95*len(df_mapped_unique.resolution), "%d " % len(df_mapped_unique.resolution), 807 axs[0][1].text(0.95*max_res, 0.95*len(df_mapped_unique.resolution), "%d " % len(df_mapped_unique.resolution),
665 horizontalalignment='right', verticalalignment='top', fontsize=14) 808 horizontalalignment='right', verticalalignment='top', fontsize=14)
666 - axs[0][1].set_title("Number of unique RNA chains\nmapped to $\geq 1$ family", fontsize=14) 809 + axs[0][1].set_title(r"Number of unique RNA chains\nmapped to $\geq 1$ family", fontsize=14)
667 axs[0][1].set_ylim((0, max_structs * 1.05)) 810 axs[0][1].set_ylim((0, max_structs * 1.05))
668 - axs[0][1].legend(loc="best", fontsize=14) 811 + axs[0][1].legend(loc="upper left", fontsize=14)
669 812
670 axs[0][2].grid(axis='y', ls='dotted', lw=1) 813 axs[0][2].grid(axis='y', ls='dotted', lw=1)
671 axs[0][2].set_yticklabels([]) 814 axs[0][2].set_yticklabels([])
...@@ -675,7 +818,7 @@ def general_stats(): ...@@ -675,7 +818,7 @@ def general_stats():
675 axs[0][2].text(0.95*max_res, 0.95*len(df_mapped_copies.resolution), "%d " % len(df_mapped_copies.resolution), 818 axs[0][2].text(0.95*max_res, 0.95*len(df_mapped_copies.resolution), "%d " % len(df_mapped_copies.resolution),
676 horizontalalignment='right', verticalalignment='top', fontsize=14) 819 horizontalalignment='right', verticalalignment='top', fontsize=14)
677 axs[0][2].set_title("Number of RNA chains mapped to a\nfamily (with copies)", fontsize=14) 820 axs[0][2].set_title("Number of RNA chains mapped to a\nfamily (with copies)", fontsize=14)
678 - axs[0][2].legend(loc="right", fontsize=14) 821 + axs[0][2].legend(loc="upper left", fontsize=14)
679 axs[0][2].set_ylim((0, max_structs * 1.05)) 822 axs[0][2].set_ylim((0, max_structs * 1.05))
680 823
681 for i,m in enumerate(methods): 824 for i,m in enumerate(methods):
...@@ -683,7 +826,7 @@ def general_stats(): ...@@ -683,7 +826,7 @@ def general_stats():
683 df_mapped_unique_m = df_mapped_unique[df_mapped_unique.exp_method == m] 826 df_mapped_unique_m = df_mapped_unique[df_mapped_unique.exp_method == m]
684 df_inferred_only_unique_m = df_inferred_only_unique[df_inferred_only_unique.exp_method == m] 827 df_inferred_only_unique_m = df_inferred_only_unique[df_inferred_only_unique.exp_method == m]
685 df_mapped_copies_m = df_mapped_copies[ df_mapped_copies.exp_method == m] 828 df_mapped_copies_m = df_mapped_copies[ df_mapped_copies.exp_method == m]
686 - max_structs = len(df_mapped_copies_m.resolution.tolist()) 829 + max_structs = max(len(df_mapped_copies_m.index), len(df_unique_m.index))
687 print("> found", max_structs, "structures with method", m, flush=True) 830 print("> found", max_structs, "structures with method", m, flush=True)
688 831
689 axs[1+i][0].grid(axis='y', ls='dotted', lw=1) 832 axs[1+i][0].grid(axis='y', ls='dotted', lw=1)
...@@ -693,7 +836,7 @@ def general_stats(): ...@@ -693,7 +836,7 @@ def general_stats():
693 horizontalalignment='right', verticalalignment='top', fontsize=14) 836 horizontalalignment='right', verticalalignment='top', fontsize=14)
694 axs[1+i][0].set_ylim((0, max_structs * 1.05)) 837 axs[1+i][0].set_ylim((0, max_structs * 1.05))
695 axs[1+i][0].set_ylabel(m, fontsize=14) 838 axs[1+i][0].set_ylabel(m, fontsize=14)
696 - axs[1+i][0].legend(loc="best", fontsize=14) 839 + axs[1+i][0].legend(loc="lower right", fontsize=14)
697 840
698 axs[1+i][1].grid(axis='y', ls='dotted', lw=1) 841 axs[1+i][1].grid(axis='y', ls='dotted', lw=1)
699 axs[1+i][1].set_yticklabels([]) 842 axs[1+i][1].set_yticklabels([])
...@@ -703,7 +846,7 @@ def general_stats(): ...@@ -703,7 +846,7 @@ def general_stats():
703 axs[1+i][1].text(0.95*max_res, 0.95*len(df_mapped_unique_m.resolution), "%d " % len(df_mapped_unique_m.resolution), 846 axs[1+i][1].text(0.95*max_res, 0.95*len(df_mapped_unique_m.resolution), "%d " % len(df_mapped_unique_m.resolution),
704 horizontalalignment='right', verticalalignment='top', fontsize=14) 847 horizontalalignment='right', verticalalignment='top', fontsize=14)
705 axs[1+i][1].set_ylim((0, max_structs * 1.05)) 848 axs[1+i][1].set_ylim((0, max_structs * 1.05))
706 - axs[1+i][1].legend(loc="best", fontsize=14) 849 + axs[1+i][1].legend(loc="upper left", fontsize=14)
707 850
708 axs[1+i][2].grid(axis='y', ls='dotted', lw=1) 851 axs[1+i][2].grid(axis='y', ls='dotted', lw=1)
709 axs[1+i][2].set_yticklabels([]) 852 axs[1+i][2].set_yticklabels([])
...@@ -713,7 +856,7 @@ def general_stats(): ...@@ -713,7 +856,7 @@ def general_stats():
713 axs[1+i][2].text(0.95*max_res, 0.95*len(df_mapped_copies_m.resolution), "%d " % len(df_mapped_copies_m.resolution), 856 axs[1+i][2].text(0.95*max_res, 0.95*len(df_mapped_copies_m.resolution), "%d " % len(df_mapped_copies_m.resolution),
714 horizontalalignment='right', verticalalignment='top', fontsize=14) 857 horizontalalignment='right', verticalalignment='top', fontsize=14)
715 axs[1+i][2].set_ylim((0, max_structs * 1.05)) 858 axs[1+i][2].set_ylim((0, max_structs * 1.05))
716 - axs[1+i][2].legend(loc="right", fontsize=14) 859 + axs[1+i][2].legend(loc="upper left", fontsize=14)
717 860
718 axs[-1][0].set_xlabel("Structure resolution\n(Angströms, lower is better)", fontsize=14) 861 axs[-1][0].set_xlabel("Structure resolution\n(Angströms, lower is better)", fontsize=14)
719 axs[-1][1].set_xlabel("Structure resolution\n(Angströms, lower is better)", fontsize=14) 862 axs[-1][1].set_xlabel("Structure resolution\n(Angströms, lower is better)", fontsize=14)
...@@ -722,7 +865,7 @@ def general_stats(): ...@@ -722,7 +865,7 @@ def general_stats():
722 fig.suptitle("Number of RNA chains by experimental method and resolution", fontsize=16) 865 fig.suptitle("Number of RNA chains by experimental method and resolution", fontsize=16)
723 fig.subplots_adjust(left=0.07, right=0.98, wspace=0.05, 866 fig.subplots_adjust(left=0.07, right=0.98, wspace=0.05,
724 hspace=0.05, bottom=0.05, top=0.92) 867 hspace=0.05, bottom=0.05, top=0.92)
725 - fig.savefig("results/figures/resolutions.png") 868 + fig.savefig(runDir + "/results/figures/resolutions.png")
726 plt.close() 869 plt.close()
727 870
728 ########################################## 871 ##########################################
...@@ -765,7 +908,7 @@ def general_stats(): ...@@ -765,7 +908,7 @@ def general_stats():
765 fig.suptitle("Number of RNA families used by experimental method and resolution", fontsize=16) 908 fig.suptitle("Number of RNA families used by experimental method and resolution", fontsize=16)
766 fig.subplots_adjust(left=0.05, right=0.98, wspace=0.05, 909 fig.subplots_adjust(left=0.05, right=0.98, wspace=0.05,
767 hspace=0.05, bottom=0.12, top=0.84) 910 hspace=0.05, bottom=0.12, top=0.84)
768 - fig.savefig("results/figures/Nfamilies.png") 911 + fig.savefig(runDir + "/results/figures/Nfamilies.png")
769 plt.close() 912 plt.close()
770 913
771 def log_to_pbar(pbar): 914 def log_to_pbar(pbar):
...@@ -776,8 +919,10 @@ def log_to_pbar(pbar): ...@@ -776,8 +919,10 @@ def log_to_pbar(pbar):
776 if __name__ == "__main__": 919 if __name__ == "__main__":
777 920
778 # parse options 921 # parse options
922 + DELETE_OLD_DATA = False
923 + DO_WADLEY_ANALYSIS = False
779 try: 924 try:
780 - opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "resolution=", "3d-folder=", "seq-folder=" ]) 925 + opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "resolution=", "3d-folder=", "seq-folder=" ])
781 except getopt.GetoptError as err: 926 except getopt.GetoptError as err:
782 print(err) 927 print(err)
783 sys.exit(2) 928 sys.exit(2)
...@@ -795,6 +940,7 @@ if __name__ == "__main__": ...@@ -795,6 +940,7 @@ if __name__ == "__main__":
795 "\n\t\t\t\t\tdatapoints/\t\tFinal results in CSV file format.") 940 "\n\t\t\t\t\tdatapoints/\t\tFinal results in CSV file format.")
796 print("--seq-folder=…\t\t\tPath to a folder containing the sequence and alignment files. Required subfolder:" 941 print("--seq-folder=…\t\t\tPath to a folder containing the sequence and alignment files. Required subfolder:"
797 "\n\t\t\t\t\trealigned/\t\tSequences, covariance models, and alignments by family") 942 "\n\t\t\t\t\trealigned/\t\tSequences, covariance models, and alignments by family")
943 + print("--from-scratch\t\t\tDo not use precomputed results from past runs, recompute everything")
798 sys.exit() 944 sys.exit()
799 elif opt == '--version': 945 elif opt == '--version':
800 print("RNANet statistics 1.1 beta") 946 print("RNANet statistics 1.1 beta")
...@@ -810,25 +956,37 @@ if __name__ == "__main__": ...@@ -810,25 +956,37 @@ if __name__ == "__main__":
810 path_to_seq_data = path.abspath(arg) 956 path_to_seq_data = path.abspath(arg)
811 if path_to_seq_data[-1] != '/': 957 if path_to_seq_data[-1] != '/':
812 path_to_seq_data += '/' 958 path_to_seq_data += '/'
959 + elif opt=='--from-scratch':
960 + DELETE_OLD_DATA = True
961 + DO_WADLEY_ANALYSIS = True
962 + subprocess.run(["rm","-f", "data/wadley_kernel_eta.npz", "data/wadley_kernel_eta_prime.npz", "data/pair_counts.csv"])
963 + elif opt=='--wadley':
964 + DO_WADLEY_ANALYSIS = True
813 965
814 966
815 # Load mappings 967 # Load mappings
816 print("Loading mappings list...") 968 print("Loading mappings list...")
817 - with sqlite3.connect("results/RNANet.db") as conn: 969 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
818 fam_list = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from family ORDER BY rfam_acc ASC;") ] 970 fam_list = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from family ORDER BY rfam_acc ASC;") ]
819 mappings_list = {} 971 mappings_list = {}
820 for k in fam_list: 972 for k in fam_list:
821 - mappings_list[k] = [ x[0] for x in sql_ask_database(conn, f"SELECT chain_id from chain WHERE rfam_acc='{k}' and issue=0;") ] 973 + mappings_list[k] = [ x[0] for x in sql_ask_database(conn, f"SELECT chain_id from chain JOIN structure ON chain.structure_id=structure.pdb_id WHERE rfam_acc='{k}' AND issue=0 AND resolution <= {res_thr};") ]
822 974
823 # List the families for which we will compute sequence identity matrices 975 # List the families for which we will compute sequence identity matrices
824 - with sqlite3.connect("results/RNANet.db") as conn: 976 + with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
825 - famlist = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain GROUP BY rfam_acc) WHERE n_chains > 0 ORDER BY rfam_acc ASC;") ] 977 + famlist = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain WHERE issue = 0 GROUP BY rfam_acc) WHERE n_chains > 0 ORDER BY rfam_acc ASC;") ]
826 - ignored = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain GROUP BY rfam_acc) WHERE n_chains < 2 ORDER BY rfam_acc ASC;") ] 978 + ignored = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain WHERE issue = 0 GROUP BY rfam_acc) WHERE n_chains < 3 ORDER BY rfam_acc ASC;") ]
979 + n_unmapped_chains = sql_ask_database(conn, "SELECT COUNT(*) FROM chain WHERE rfam_acc='unmappd' AND issue=0;")[0][0]
827 if len(ignored): 980 if len(ignored):
828 print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n') 981 print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n')
829 982
983 + if DELETE_OLD_DATA:
984 + for f in fam_list:
985 + subprocess.run(["rm","-f", runDir + f"/data/{f}.npy", runDir + f"/data/{f}_pairs.csv", runDir + f"/data/{f}_counts.csv"])
986 +
987 +
830 # Prepare the multiprocessing execution environment 988 # Prepare the multiprocessing execution environment
831 - nworkers = max(read_cpu_number()-1, 32) 989 + nworkers = min(read_cpu_number()-1, 32)
832 thr_idx_mgr = Manager() 990 thr_idx_mgr = Manager()
833 idxQueue = thr_idx_mgr.Queue() 991 idxQueue = thr_idx_mgr.Queue()
834 for i in range(nworkers): 992 for i in range(nworkers):
...@@ -836,14 +994,15 @@ if __name__ == "__main__": ...@@ -836,14 +994,15 @@ if __name__ == "__main__":
836 994
837 # Define the tasks 995 # Define the tasks
838 joblist = [] 996 joblist = []
839 - # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), 4.0))) # res threshold is 4.0 Angstroms by default 997 + if n_unmapped_chains and DO_WADLEY_ANALYSIS:
840 - # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), 4.0))) # 998 + joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), 20.0))) # res threshold is 4.0 Angstroms by default
999 + joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), 20.0))) #
841 joblist.append(Job(function=stats_len)) # Computes figures 1000 joblist.append(Job(function=stats_len)) # Computes figures
842 - # joblist.append(Job(function=stats_freq)) # updates the database 1001 + joblist.append(Job(function=stats_freq)) # updates the database
843 - # for f in famlist: 1002 + for f in famlist:
844 - # joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database 1003 + joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database
845 - # if f not in ignored: 1004 + if f not in ignored:
846 - # joblist.append(Job(function=to_dist_matrix, args=(f,))) # updates the database 1005 + joblist.append(Job(function=to_dist_matrix, args=(f,))) # updates the database
847 1006
848 p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) 1007 p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers)
849 pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, leave=True) 1008 pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, leave=True)
...@@ -867,7 +1026,8 @@ if __name__ == "__main__": ...@@ -867,7 +1026,8 @@ if __name__ == "__main__":
867 print() 1026 print()
868 1027
869 # finish the work after the parallel portions 1028 # finish the work after the parallel portions
870 - # per_chain_stats() 1029 + per_chain_stats()
871 - # seq_idty() 1030 + seq_idty()
872 - # stats_pairs() 1031 + stats_pairs()
873 - general_stats() 1032 + if n_unmapped_chains:
1033 + general_stats()
......