Louis BECQUEY

Sanitizing post computation checks

...@@ -1286,46 +1286,37 @@ class Pipeline: ...@@ -1286,46 +1286,37 @@ class Pipeline:
1286 conn = sqlite3.connect(runDir + "/results/RNANet.db") 1286 conn = sqlite3.connect(runDir + "/results/RNANet.db")
1287 1287
1288 # Assert every structure is used 1288 # Assert every structure is used
1289 - r = sql_ask_database(conn, """SELECT structure_id FROM structure 1289 + r = sql_ask_database(conn, """SELECT DISTINCT pdb_id FROM structure WHERE pdb_id NOT IN (SELECT DISTINCT structure_id FROM chain);""")
1290 - LEFT JOIN chain ON structure.pdb_id = chain.structure_id
1291 - WHERE chain_id IS NULL;""")
1292 if len(r) and r[0][0] is not None: 1290 if len(r) and r[0][0] is not None:
1293 - warn("Structures without referenced chains have been detected") 1291 + warn("Structures without referenced chains have been detected. This happens if we have known issues, for example.")
1294 - for x in r: 1292 + print(" ".join([x[0] for x in r]))
1295 - print(x)
1296 1293
1297 # Assert every chain is attached to a structure 1294 # Assert every chain is attached to a structure
1298 - r = sql_ask_database(conn, """SELECT chain_id, structure_id FROM chain 1295 + r = sql_ask_database(conn, """SELECT DISTINCT chain_id, structure_id FROM chain WHERE structure_id NOT IN (SELECT DISTINCT pdb_id FROM structure);""")
1299 - LEFT JOIN structure ON chain.structure_id = structure.pdb_id
1300 - WHERE pdb_id IS NULL;""")
1301 if len(r) and r[0][0] is not None: 1296 if len(r) and r[0][0] is not None:
1302 warn("Chains without referenced structures have been detected") 1297 warn("Chains without referenced structures have been detected")
1303 - for x in r: 1298 + print(" ".join([x[1]+'-'+x[0] for x in r]))
1304 - print(x)
1305 -
1306 1299
1307 if self.HOMOLOGY: 1300 if self.HOMOLOGY:
1308 # check if chains have been re_mapped: 1301 # check if chains have been re_mapped:
1309 - r = sql_ask_database(conn, """SELECT COUNT(chain.chain_id) as Count, rfam_acc 1302 + r = sql_ask_database(conn, """SELECT COUNT(DISTINCT chain_id) AS Count, rfam_acc FROM chain
1310 - FROM chain LEFT JOIN re_mapping 1303 + WHERE chain_id NOT IN (SELECT DISTINCT chain_id FROM re_mapping)
1311 - ON chain.chain_id = re_mapping.chain_id 1304 + GROUP BY rfam_acc;""")
1312 - WHERE index_ali IS NULL GROUP BY rfam_acc;""")
1313 if len(r) and r[0][0] is not None: 1305 if len(r) and r[0][0] is not None:
1314 - warn("Structures were not remapped:") 1306 + warn("Chains were not remapped (This happens if we have known issues for example):")
1315 for x in r: 1307 for x in r:
1316 print(str(x[0]) + " chains of family " + x[1]) 1308 print(str(x[0]) + " chains of family " + x[1])
1317 1309
1318 - # check if some columns are missing in the remappings: 1310 + # # TODO : Optimize this (too slow)
1319 - r = sql_ask_database(conn, """SELECT c.chain_id, c.structure_id, c.chain_name, c.rfam_acc, re_mapping.index_chain, re_mapping.index_ali 1311 + # # check if some columns are missing in the remappings:
1320 - FROM chain as c 1312 + # r = sql_ask_database(conn, """SELECT c.chain_id, c.structure_id, c.chain_name, c.rfam_acc, r.index_chain, r.index_ali
1321 - NATURAL JOIN re_mapping 1313 + # FROM chain as c
1322 - LEFT JOIN align_column 1314 + # NATURAL JOIN re_mapping as r
1323 - ON re_mapping.index_ali=align_column.index_ali AND c.rfam_acc=align_column.rfam_acc 1315 + # WHERE index_ali NOT IN (SELECT index_ali FROM align_column WHERE rfam_acc = c.rfam_acc);""")
1324 - WHERE freq_A IS NULL;""") 1316 + # if len(r) and r[0][0] is not None:
1325 - if len(r) and r[0][0] is not None: 1317 + # warn("Missing positions in the re-mapping:")
1326 - warn("Structures were not remapped:") 1318 + # for x in r:
1327 - for x in r: 1319 + # print(x)
1328 - print(x)
1329 1320
1330 conn.close() 1321 conn.close()
1331 1322
...@@ -1806,7 +1797,7 @@ def work_mmcif(pdb_id): ...@@ -1806,7 +1797,7 @@ def work_mmcif(pdb_id):
1806 with sqlite3.connect(runDir + "/results/RNANet.db") as conn: 1797 with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
1807 sql_execute(conn, """INSERT OR REPLACE INTO structure (pdb_id, pdb_model, date, exp_method, resolution) 1798 sql_execute(conn, """INSERT OR REPLACE INTO structure (pdb_id, pdb_model, date, exp_method, resolution)
1808 VALUES (?, ?, DATE(?), ?, ?);""", data = (pdb_id, 1, date, exp_meth, reso)) 1799 VALUES (?, ?, DATE(?), ?, ?);""", data = (pdb_id, 1, date, exp_meth, reso))
1809 - 1800 +
1810 # run DSSR (you need to have it in your $PATH, follow x3dna installation instructions) 1801 # run DSSR (you need to have it in your $PATH, follow x3dna installation instructions)
1811 output = subprocess.run(["x3dna-dssr", f"-i={final_filepath}", "--json", "--auxfile=no"], 1802 output = subprocess.run(["x3dna-dssr", f"-i={final_filepath}", "--json", "--auxfile=no"],
1812 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 1803 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
...@@ -1964,11 +1955,12 @@ def work_realign(rfam_acc): ...@@ -1964,11 +1955,12 @@ def work_realign(rfam_acc):
1964 notify("Aligned new sequences together") 1955 notify("Aligned new sequences together")
1965 1956
1966 # And we merge the two alignments 1957 # And we merge the two alignments
1967 - p2= subprocess.run(["esl-alimerge", "-o", path_to_seq_data + f"realigned/{rfam_acc}++.stk", "--rna", 1958 + p2= subprocess.run(["esl-alimerge", "-o", path_to_seq_data + f"realigned/{rfam_acc}_merged.stk", "--rna",
1968 path_to_seq_data + f"realigned/{rfam_acc}++.stk", 1959 path_to_seq_data + f"realigned/{rfam_acc}++.stk",
1969 path_to_seq_data + f"realigned/{rfam_acc}_new.stk" ], 1960 path_to_seq_data + f"realigned/{rfam_acc}_new.stk" ],
1970 stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) 1961 stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
1971 stderr = p1.stderr.decode('utf-8') + p2.stderr.decode('utf-8') 1962 stderr = p1.stderr.decode('utf-8') + p2.stderr.decode('utf-8')
1963 + subprocess.run(["mv", path_to_seq_data + f"realigned/{rfam_acc}_merged.stk", path_to_seq_data + f"realigned/{rfam_acc}++.stk"])
1972 notify("Merged alignments into one") 1964 notify("Merged alignments into one")
1973 1965
1974 # remove the partial files 1966 # remove the partial files
......
1 +2du3_1_D_1-71
2 +2du4_1_C_1-71
3 +2du5_1_D_1-71
4 +2du6_1_D_1-71
5 +1ml5_1_a_151-2903
6 +1ml5_1_a_1-2914
7 +1ml5_1_b_5-121
8 +1ml5_1_A_2-1520
9 +1ml5_1_A_7-1518
10 +1ml5_1_A_7-1515
11 +5b63_1_D_1-74
12 +5b63_1_B_1-74
13 +5yyn_1_D_1-74
14 +5yyn_1_B_1-74
15 +6prv_1_A_1-58
16 +6prv_1_B_1-58
17 +6prv_1_C_1-58
18 +6prv_1_D_1-58
19 +1hc8_1_C_1-58
20 +1hc8_1_D_1-58
21 +1y39_1_C_1-58
22 +1y39_1_D_1-58
23 +1qa6_1_C_1-58
24 +1qa6_1_D_1-58
25 +2go5_1_9_3-88
26 +3ds7_1_B_1-67
27 +6qzp_1_L8_1267-4755
28 +6ek0_1_L8_1267-4755
29 +6ole_1_D_1267-4755
30 +6om0_1_D_1267-4755
31 +6y2l_1_L8_1267-4755
32 +6y0g_1_L8_1267-4755
33 +6oli_1_D_1267-4755
34 +6olg_1_A3_1267-4755
35 +6y57_1_L8_1267-4755
36 +5t2c_1_C_1267-4755
37 +6om7_1_D_1267-4755
38 +4ug0_1_L8_1267-4755
39 +6olf_1_D_1267-4755
40 +6ip5_1_1C_1267-4755
41 +6ip8_1_1C_1267-4755
42 +6olz_1_A3_1267-4755
43 +5aj0_1_A3_1267-4755
44 +5lks_1_L8_1267-4755
45 +6ip6_1_1C_1267-4755
46 +4v6x_1_A8_1267-4755
47 +4woi_1_BB_3-118
48 +4woi_1_CB_3-118
49 +2rdo_1_A_3-118
50 +4v48_1_A9_3-118
51 +4v47_1_A9_3-118
52 +1vy7_1_AY_1-73
53 +1vy7_1_CY_1-73
54 +4w2h_1_CY_1-73
55 +2z9q_1_A_1-72
56 +4v42_1_BB_5-121
57 +1ls2_1_B_1-73
58 +3u4m_1_B_1-80
59 +4qvi_1_B_1-80
60 +3umy_1_B_1-80
61 +4qg3_1_B_1-80
62 +3u56_1_B_1-80
63 +3ep2_1_Y_1-72
64 +3eq3_1_Y_1-72
65 +4v48_1_A6_1-73
66 +2qex_1_9_1-121
67 +2otl_1_9_1-121
68 +2otj_1_9_1-121
69 +1kqs_1_9_1-121
70 +3ow2_1_9_1-121
71 +1vq8_1_9_1-121
72 +1vqo_1_9_1-121
73 +1vqk_1_9_1-121
74 +1vq9_1_9_1-121
75 +1vqp_1_9_1-121
76 +1vq7_1_9_1-121
77 +1vqn_1_9_1-121
78 +1vq4_1_9_1-121
79 +1vq6_1_9_1-121
80 +1m90_1_B_1-121
81 +1qvg_1_9_1-121
82 +1vq5_1_9_1-121
83 +1nji_1_B_1-121
84 +3cxc_1_9_1-121
85 +3cpw_1_9_1-121
86 +1q81_1_B_1-121
87 +1q86_1_B_1-121
88 +1kc8_1_B_1-121
89 +1k73_1_B_1-121
90 +1q82_1_B_1-121
91 +1k9m_1_B_1-121
92 +1qvf_1_9_1-121
93 +1m1k_1_B_1-121
94 +1kd1_1_B_1-121
95 +1k8a_1_B_1-121
96 +1q7y_1_B_1-121
97 +1n8r_1_B_1-121
98 +1eg0_1_O_1-73
99 +1qtq_1_B_1-72
100 +4jxx_1_B_1-72
101 +1zjw_1_B_1-72
102 +1euy_1_B_1-71
103 +1euq_1_B_1-69
104 +2rd2_1_B_1-72
105 +2re8_1_B_1-72
1 1gsg_1_T_1-72 106 1gsg_1_T_1-72
107 +1o0c_1_B_1-72
108 +1o0b_1_B_1-72
109 +1exd_1_B_1-70
110 +1fg0_1_A_1-602
111 +1ffz_1_A_1-602
112 +3ktv_1_C_2-106
113 +3ktv_1_A_2-106
114 +3jcr_1_H_1-115
115 +6d9j_1_4_1-187
116 +4v42_1_BA_151-2903
117 +4v42_1_BA_1-2914
118 +3moj_1_A_1-73
119 +4v48_1_BA_1-91
120 +4v48_1_BA_6-1538
121 +4v48_1_BA_1-1543
122 +4v48_1_BA_6-1541
123 +4v47_1_BA_1-91
124 +4v47_1_BA_6-1537
125 +4v47_1_BA_1-1542
126 +4v47_1_BA_6-1540
127 +5u4j_1_A_6-1522
128 +5u4j_1_A_1-1528
129 +5u4j_1_A_6-1460
130 +5u4j_1_A_6-1457
131 +2rdo_1_B_1-2903
132 +2rdo_1_B_6-1522
133 +2rdo_1_B_1-1528
134 +2rdo_1_B_6-1460
135 +2rdo_1_B_160-2893
136 +2rdo_1_B_1-2904
137 +2rdo_1_B_6-1457
138 +4v48_1_A0_1-2903
139 +4v48_1_A0_6-1522
140 +4v48_1_A0_1-1528
141 +4v48_1_A0_6-1460
142 +4v48_1_A0_160-2893
143 +4v48_1_A0_1-2904
144 +4v48_1_A0_6-1457
145 +4v47_1_A0_1-2903
146 +4v47_1_A0_6-1522
147 +4v47_1_A0_1-1528
148 +4v47_1_A0_6-1460
149 +4v47_1_A0_160-2893
150 +4v47_1_A0_1-2904
151 +4v47_1_A0_6-1457
152 +1il2_1_C_1-72
153 +1il2_1_D_1-72
154 +1asz_1_S_1-72
155 +1asy_1_S_1-72
156 +1asz_1_R_1-72
157 +1asy_1_R_1-72
158 +2ob7_1_A_10-319
159 +2om7_1_C_1-96
160 +2om7_1_C_1-130
161 +1x1l_1_A_1-130
162 +1zc8_1_Z_1-91
163 +1zc8_1_Z_1-130
164 +2ob7_1_D_1-130
165 +1wz2_1_D_1-85
166 +1wz2_1_C_1-85
167 +6b19_1_C_1-73
168 +6b19_1_C_3-101
169 +5ndk_1_1G_20-55
170 +5ndk_1_13_20-55
171 +5ndj_1_1G_20-55
172 +5ndj_1_13_20-55
173 +1jgq_1_A_7-1515
174 +1jgq_1_A_2-1520
175 +1jgq_1_A_7-1518
176 +1jgq_1_A_20-55
177 +4v42_1_AA_7-1515
178 +4v42_1_AA_2-1520
179 +4v42_1_AA_7-1518
180 +4v42_1_AA_20-55
181 +1jgo_1_A_7-1515
182 +1jgo_1_A_2-1520
183 +1jgo_1_A_7-1518
184 +1jgo_1_A_20-55
185 +1jgp_1_A_7-1515
186 +1jgp_1_A_2-1520
187 +1jgp_1_A_7-1518
188 +1jgp_1_A_20-55
189 +1mms_1_C_1-58
190 +1mms_1_D_1-58
191 +1r2x_1_C_1-58
192 +1r2w_1_C_1-58
193 +1eg0_1_L_1-56
194 +1eg0_1_L_1-57
195 +6rxu_1_C2_588-2383
196 +6rxu_1_C2_583-2388
197 +6rxu_1_C2_588-2386
198 +5oql_1_2_588-2383
199 +5oql_1_2_583-2388
200 +5oql_1_2_588-2386
201 +6rxv_1_C2_588-2383
202 +6rxv_1_C2_583-2388
203 +6rxv_1_C2_588-2386
204 +6rxz_1_C2_588-2383
205 +6rxz_1_C2_583-2388
206 +6rxz_1_C2_588-2386
207 +6rxy_1_C2_588-2383
208 +6rxy_1_C2_583-2388
209 +6rxy_1_C2_588-2386
210 +6rxt_1_C2_588-2383
211 +6rxt_1_C2_583-2388
212 +6rxt_1_C2_588-2386
213 +4d61_1_j_1-199
214 +4d5n_1_X_1-199
215 +3k0j_1_F_2-86
216 +4jxz_1_B_1-72
217 +4jyz_1_B_1-72
218 +3wqy_1_C_1-72
219 +3wqz_1_C_1-72
220 +1zc8_1_A_1-59
221 +1mvr_1_D_1-59
222 +4c9d_1_D_29-1
223 +4c9d_1_C_29-1
224 +4xej_1_BIRE_4-193
225 +4xej_1_AIRE_4-193
226 +1zn1_1_B_1-59
227 +1n78_1_C_1-72
228 +1n78_1_D_1-72
229 +2dxi_1_C_1-72
230 +1n77_1_C_1-72
231 +2dxi_1_D_1-72
232 +2cv2_1_C_1-72
233 +1n77_1_D_1-72
234 +2cv2_1_D_1-72
235 +2cv1_1_D_1-72
236 +2cv1_1_C_1-72
237 +2cv0_1_C_1-72
238 +2cv0_1_D_1-72
239 +1g59_1_B_1-72
240 +1g59_1_D_1-72
241 +1emi_1_B_1-108
242 +3iy9_1_A_498-1027
243 +3j0o_1_2_1-112
244 +3j0o_1_2_1-111
245 +3j0l_1_2_1-112
246 +3j0l_1_2_1-111
247 +3j0p_1_2_1-112
248 +3j0p_1_2_1-111
249 +3j0q_1_2_1-112
250 +3j0q_1_2_1-111
251 +3ep2_1_B_1-50
252 +3eq3_1_B_1-50
253 +3eq4_1_B_1-50
254 +3j0d_1_A_1-50
255 +1gax_1_D_1-72
256 +1gax_1_C_1-72
257 +1ivs_1_C_1-72
258 +1ivs_1_D_1-72
259 +5it9_1_i_1-191
260 +3pgw_1_R_1-164
261 +3pgw_1_N_1-164
262 +3cw1_1_x_1-138
263 +3cw1_1_w_1-138
264 +3cw1_1_V_1-138
265 +3cw1_1_v_1-138
266 +2ftc_1_R_792-1568
267 +2ftc_1_R_81-1466
268 +2ftc_1_R_1-1568
269 +3j2c_1_O_1-144
270 +3j2c_1_M_1-462
271 +2dlc_1_Y_1-73
272 +2iy3_1_B_9-105
273 +2zue_1_B_1-75
274 +2zuf_1_B_1-75
275 +2d6f_1_F_1-72
276 +2d6f_1_E_1-72
277 +3jcr_1_N_1-106
278 +3jcr_1_N_1-188
279 +2vaz_1_A_64-177
280 +5ool_1_A_747-1472
281 +5ool_1_A_1-1454
282 +5ool_1_A_771-1559
283 +3j9m_1_A_747-1472
284 +3j9m_1_A_1-1454
285 +3j9m_1_A_771-1559
286 +5oom_1_A_747-1472
287 +5oom_1_A_1-1454
288 +5oom_1_A_771-1559
289 +3j7y_1_A_747-1472
290 +3j7y_1_A_1-1454
291 +3j7y_1_A_771-1559
292 +6nu2_1_A_747-1472
293 +6nu2_1_A_1-1381
294 +6nu2_1_A_738-1472
295 +6nu3_1_A_747-1472
296 +6nu3_1_A_1-1381
297 +6nu3_1_A_738-1472
298 +5aka_1_7_1-74
299 +1f7u_1_B_1-73
300 +1f7v_1_B_1-73
301 +2om7_1_I_1-58
302 +2om7_1_J_1-102
303 +1ysh_1_B_1-101
304 +3iwn_1_B_1-91
305 +3ndb_1_M_65-109
306 +4xco_1_M_46-90
307 +4xco_1_M_1-136
308 +4xco_1_E_46-90
309 +4xco_1_E_1-136
310 +1z43_1_A_50-94
311 +1z43_1_A_1-136
312 +2v3c_1_N_46-90
313 +2v3c_1_N_1-136
314 +2v3c_1_M_46-90
315 +2v3c_1_M_1-136
316 +1lng_1_B_48-92
317 +1lng_1_B_1-136
318 +3j5s_1_B_1-101
319 +3j5s_1_B_1-99
320 +3j5s_1_A_1-360
321 +3j45_1_5_1-108
322 +3j46_1_4_2-109
323 +3jcr_1_M_1-141
324 +3jcr_1_M_1-107
325 +3jcr_1_M_1-188
326 +4v5z_1_B0_1-2902
327 +4v5z_1_B0_1-2840
328 +4v5z_1_B0_1-2899
329 +3deg_1_G_1-70
330 +5d8h_1_A_1-74
331 +5g2x_1_A_595-692
332 +5g2y_1_A_595-692
333 +3j0o_1_h_1-111
334 +3j0l_1_h_1-111
335 +3j0p_1_h_1-111
336 +3j0q_1_h_1-111
337 +3iy8_1_A_1-540
338 +1j1u_1_B_1-74
339 +4v5z_1_BY_2-113
340 +4v5z_1_BZ_1-70
341 +4cxg_1_1_1-135
342 +4cxh_1_1_1-135
343 +4cxg_1_2_1-50
344 +4cxh_1_2_1-50
345 +4v8y_1_CN_1-87
346 +1c0a_1_B_1-74
347 +1j2b_1_C_1-74
348 +1j2b_1_D_1-74
349 +6d90_1_4_1-187
350 +3j16_1_K_1-60
351 +1mvr_1_B_1-96
352 +4adx_1_0_132-2915
353 +4adx_1_0_1-2923
354 +4adx_1_9_1-121
355 +2azx_1_D_1-72
356 +2azx_1_C_1-72
357 +3eq4_1_Y_1-69
358 +2il9_1_A_2-142
359 +2il9_1_M_2-142
360 +4v92_1_AZ_1-184
361 +3v7e_1_D_1-125
362 +4v5z_1_AA_1-1563
363 +4v5z_1_AA_1-1562
......
This diff is collapsed. Click to expand it.