Louis BECQUEY

Sanitizing post computation checks

......@@ -1286,46 +1286,37 @@ class Pipeline:
conn = sqlite3.connect(runDir + "/results/RNANet.db")
# Assert every structure is used
r = sql_ask_database(conn, """SELECT structure_id FROM structure
LEFT JOIN chain ON structure.pdb_id = chain.structure_id
WHERE chain_id IS NULL;""")
r = sql_ask_database(conn, """SELECT DISTINCT pdb_id FROM structure WHERE pdb_id NOT IN (SELECT DISTINCT structure_id FROM chain);""")
if len(r) and r[0][0] is not None:
warn("Structures without referenced chains have been detected")
for x in r:
print(x)
warn("Structures without referenced chains have been detected. This happens if we have known issues, for example.")
print(" ".join([x[0] for x in r]))
# Assert every chain is attached to a structure
r = sql_ask_database(conn, """SELECT chain_id, structure_id FROM chain
LEFT JOIN structure ON chain.structure_id = structure.pdb_id
WHERE pdb_id IS NULL;""")
r = sql_ask_database(conn, """SELECT DISTINCT chain_id, structure_id FROM chain WHERE structure_id NOT IN (SELECT DISTINCT pdb_id FROM structure);""")
if len(r) and r[0][0] is not None:
warn("Chains without referenced structures have been detected")
for x in r:
print(x)
print(" ".join([x[1]+'-'+x[0] for x in r]))
if self.HOMOLOGY:
# check if chains have been re_mapped:
r = sql_ask_database(conn, """SELECT COUNT(chain.chain_id) as Count, rfam_acc
FROM chain LEFT JOIN re_mapping
ON chain.chain_id = re_mapping.chain_id
WHERE index_ali IS NULL GROUP BY rfam_acc;""")
r = sql_ask_database(conn, """SELECT COUNT(DISTINCT chain_id) AS Count, rfam_acc FROM chain
WHERE chain_id NOT IN (SELECT DISTINCT chain_id FROM re_mapping)
GROUP BY rfam_acc;""")
if len(r) and r[0][0] is not None:
warn("Structures were not remapped:")
warn("Chains were not remapped (This happens if we have known issues for example):")
for x in r:
print(str(x[0]) + " chains of family " + x[1])
# check if some columns are missing in the remappings:
r = sql_ask_database(conn, """SELECT c.chain_id, c.structure_id, c.chain_name, c.rfam_acc, re_mapping.index_chain, re_mapping.index_ali
FROM chain as c
NATURAL JOIN re_mapping
LEFT JOIN align_column
ON re_mapping.index_ali=align_column.index_ali AND c.rfam_acc=align_column.rfam_acc
WHERE freq_A IS NULL;""")
if len(r) and r[0][0] is not None:
warn("Structures were not remapped:")
for x in r:
print(x)
# # TODO : Optimize this (too slow)
# # check if some columns are missing in the remappings:
# r = sql_ask_database(conn, """SELECT c.chain_id, c.structure_id, c.chain_name, c.rfam_acc, r.index_chain, r.index_ali
# FROM chain as c
# NATURAL JOIN re_mapping as r
# WHERE index_ali NOT IN (SELECT index_ali FROM align_column WHERE rfam_acc = c.rfam_acc);""")
# if len(r) and r[0][0] is not None:
# warn("Missing positions in the re-mapping:")
# for x in r:
# print(x)
conn.close()
......@@ -1806,7 +1797,7 @@ def work_mmcif(pdb_id):
with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
sql_execute(conn, """INSERT OR REPLACE INTO structure (pdb_id, pdb_model, date, exp_method, resolution)
VALUES (?, ?, DATE(?), ?, ?);""", data = (pdb_id, 1, date, exp_meth, reso))
# run DSSR (you need to have it in your $PATH, follow x3dna installation instructions)
output = subprocess.run(["x3dna-dssr", f"-i={final_filepath}", "--json", "--auxfile=no"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
......@@ -1964,11 +1955,12 @@ def work_realign(rfam_acc):
notify("Aligned new sequences together")
# And we merge the two alignments
p2= subprocess.run(["esl-alimerge", "-o", path_to_seq_data + f"realigned/{rfam_acc}++.stk", "--rna",
p2= subprocess.run(["esl-alimerge", "-o", path_to_seq_data + f"realigned/{rfam_acc}_merged.stk", "--rna",
path_to_seq_data + f"realigned/{rfam_acc}++.stk",
path_to_seq_data + f"realigned/{rfam_acc}_new.stk" ],
stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
stderr = p1.stderr.decode('utf-8') + p2.stderr.decode('utf-8')
subprocess.run(["mv", path_to_seq_data + f"realigned/{rfam_acc}_merged.stk", path_to_seq_data + f"realigned/{rfam_acc}++.stk"])
notify("Merged alignments into one")
# remove the partial files
......
2du3_1_D_1-71
2du4_1_C_1-71
2du5_1_D_1-71
2du6_1_D_1-71
1ml5_1_a_151-2903
1ml5_1_a_1-2914
1ml5_1_b_5-121
1ml5_1_A_2-1520
1ml5_1_A_7-1518
1ml5_1_A_7-1515
5b63_1_D_1-74
5b63_1_B_1-74
5yyn_1_D_1-74
5yyn_1_B_1-74
6prv_1_A_1-58
6prv_1_B_1-58
6prv_1_C_1-58
6prv_1_D_1-58
1hc8_1_C_1-58
1hc8_1_D_1-58
1y39_1_C_1-58
1y39_1_D_1-58
1qa6_1_C_1-58
1qa6_1_D_1-58
2go5_1_9_3-88
3ds7_1_B_1-67
6qzp_1_L8_1267-4755
6ek0_1_L8_1267-4755
6ole_1_D_1267-4755
6om0_1_D_1267-4755
6y2l_1_L8_1267-4755
6y0g_1_L8_1267-4755
6oli_1_D_1267-4755
6olg_1_A3_1267-4755
6y57_1_L8_1267-4755
5t2c_1_C_1267-4755
6om7_1_D_1267-4755
4ug0_1_L8_1267-4755
6olf_1_D_1267-4755
6ip5_1_1C_1267-4755
6ip8_1_1C_1267-4755
6olz_1_A3_1267-4755
5aj0_1_A3_1267-4755
5lks_1_L8_1267-4755
6ip6_1_1C_1267-4755
4v6x_1_A8_1267-4755
4woi_1_BB_3-118
4woi_1_CB_3-118
2rdo_1_A_3-118
4v48_1_A9_3-118
4v47_1_A9_3-118
1vy7_1_AY_1-73
1vy7_1_CY_1-73
4w2h_1_CY_1-73
2z9q_1_A_1-72
4v42_1_BB_5-121
1ls2_1_B_1-73
3u4m_1_B_1-80
4qvi_1_B_1-80
3umy_1_B_1-80
4qg3_1_B_1-80
3u56_1_B_1-80
3ep2_1_Y_1-72
3eq3_1_Y_1-72
4v48_1_A6_1-73
2qex_1_9_1-121
2otl_1_9_1-121
2otj_1_9_1-121
1kqs_1_9_1-121
3ow2_1_9_1-121
1vq8_1_9_1-121
1vqo_1_9_1-121
1vqk_1_9_1-121
1vq9_1_9_1-121
1vqp_1_9_1-121
1vq7_1_9_1-121
1vqn_1_9_1-121
1vq4_1_9_1-121
1vq6_1_9_1-121
1m90_1_B_1-121
1qvg_1_9_1-121
1vq5_1_9_1-121
1nji_1_B_1-121
3cxc_1_9_1-121
3cpw_1_9_1-121
1q81_1_B_1-121
1q86_1_B_1-121
1kc8_1_B_1-121
1k73_1_B_1-121
1q82_1_B_1-121
1k9m_1_B_1-121
1qvf_1_9_1-121
1m1k_1_B_1-121
1kd1_1_B_1-121
1k8a_1_B_1-121
1q7y_1_B_1-121
1n8r_1_B_1-121
1eg0_1_O_1-73
1qtq_1_B_1-72
4jxx_1_B_1-72
1zjw_1_B_1-72
1euy_1_B_1-71
1euq_1_B_1-69
2rd2_1_B_1-72
2re8_1_B_1-72
1gsg_1_T_1-72
1o0c_1_B_1-72
1o0b_1_B_1-72
1exd_1_B_1-70
1fg0_1_A_1-602
1ffz_1_A_1-602
3ktv_1_C_2-106
3ktv_1_A_2-106
3jcr_1_H_1-115
6d9j_1_4_1-187
4v42_1_BA_151-2903
4v42_1_BA_1-2914
3moj_1_A_1-73
4v48_1_BA_1-91
4v48_1_BA_6-1538
4v48_1_BA_1-1543
4v48_1_BA_6-1541
4v47_1_BA_1-91
4v47_1_BA_6-1537
4v47_1_BA_1-1542
4v47_1_BA_6-1540
5u4j_1_A_6-1522
5u4j_1_A_1-1528
5u4j_1_A_6-1460
5u4j_1_A_6-1457
2rdo_1_B_1-2903
2rdo_1_B_6-1522
2rdo_1_B_1-1528
2rdo_1_B_6-1460
2rdo_1_B_160-2893
2rdo_1_B_1-2904
2rdo_1_B_6-1457
4v48_1_A0_1-2903
4v48_1_A0_6-1522
4v48_1_A0_1-1528
4v48_1_A0_6-1460
4v48_1_A0_160-2893
4v48_1_A0_1-2904
4v48_1_A0_6-1457
4v47_1_A0_1-2903
4v47_1_A0_6-1522
4v47_1_A0_1-1528
4v47_1_A0_6-1460
4v47_1_A0_160-2893
4v47_1_A0_1-2904
4v47_1_A0_6-1457
1il2_1_C_1-72
1il2_1_D_1-72
1asz_1_S_1-72
1asy_1_S_1-72
1asz_1_R_1-72
1asy_1_R_1-72
2ob7_1_A_10-319
2om7_1_C_1-96
2om7_1_C_1-130
1x1l_1_A_1-130
1zc8_1_Z_1-91
1zc8_1_Z_1-130
2ob7_1_D_1-130
1wz2_1_D_1-85
1wz2_1_C_1-85
6b19_1_C_1-73
6b19_1_C_3-101
5ndk_1_1G_20-55
5ndk_1_13_20-55
5ndj_1_1G_20-55
5ndj_1_13_20-55
1jgq_1_A_7-1515
1jgq_1_A_2-1520
1jgq_1_A_7-1518
1jgq_1_A_20-55
4v42_1_AA_7-1515
4v42_1_AA_2-1520
4v42_1_AA_7-1518
4v42_1_AA_20-55
1jgo_1_A_7-1515
1jgo_1_A_2-1520
1jgo_1_A_7-1518
1jgo_1_A_20-55
1jgp_1_A_7-1515
1jgp_1_A_2-1520
1jgp_1_A_7-1518
1jgp_1_A_20-55
1mms_1_C_1-58
1mms_1_D_1-58
1r2x_1_C_1-58
1r2w_1_C_1-58
1eg0_1_L_1-56
1eg0_1_L_1-57
6rxu_1_C2_588-2383
6rxu_1_C2_583-2388
6rxu_1_C2_588-2386
5oql_1_2_588-2383
5oql_1_2_583-2388
5oql_1_2_588-2386
6rxv_1_C2_588-2383
6rxv_1_C2_583-2388
6rxv_1_C2_588-2386
6rxz_1_C2_588-2383
6rxz_1_C2_583-2388
6rxz_1_C2_588-2386
6rxy_1_C2_588-2383
6rxy_1_C2_583-2388
6rxy_1_C2_588-2386
6rxt_1_C2_588-2383
6rxt_1_C2_583-2388
6rxt_1_C2_588-2386
4d61_1_j_1-199
4d5n_1_X_1-199
3k0j_1_F_2-86
4jxz_1_B_1-72
4jyz_1_B_1-72
3wqy_1_C_1-72
3wqz_1_C_1-72
1zc8_1_A_1-59
1mvr_1_D_1-59
4c9d_1_D_29-1
4c9d_1_C_29-1
4xej_1_BIRE_4-193
4xej_1_AIRE_4-193
1zn1_1_B_1-59
1n78_1_C_1-72
1n78_1_D_1-72
2dxi_1_C_1-72
1n77_1_C_1-72
2dxi_1_D_1-72
2cv2_1_C_1-72
1n77_1_D_1-72
2cv2_1_D_1-72
2cv1_1_D_1-72
2cv1_1_C_1-72
2cv0_1_C_1-72
2cv0_1_D_1-72
1g59_1_B_1-72
1g59_1_D_1-72
1emi_1_B_1-108
3iy9_1_A_498-1027
3j0o_1_2_1-112
3j0o_1_2_1-111
3j0l_1_2_1-112
3j0l_1_2_1-111
3j0p_1_2_1-112
3j0p_1_2_1-111
3j0q_1_2_1-112
3j0q_1_2_1-111
3ep2_1_B_1-50
3eq3_1_B_1-50
3eq4_1_B_1-50
3j0d_1_A_1-50
1gax_1_D_1-72
1gax_1_C_1-72
1ivs_1_C_1-72
1ivs_1_D_1-72
5it9_1_i_1-191
3pgw_1_R_1-164
3pgw_1_N_1-164
3cw1_1_x_1-138
3cw1_1_w_1-138
3cw1_1_V_1-138
3cw1_1_v_1-138
2ftc_1_R_792-1568
2ftc_1_R_81-1466
2ftc_1_R_1-1568
3j2c_1_O_1-144
3j2c_1_M_1-462
2dlc_1_Y_1-73
2iy3_1_B_9-105
2zue_1_B_1-75
2zuf_1_B_1-75
2d6f_1_F_1-72
2d6f_1_E_1-72
3jcr_1_N_1-106
3jcr_1_N_1-188
2vaz_1_A_64-177
5ool_1_A_747-1472
5ool_1_A_1-1454
5ool_1_A_771-1559
3j9m_1_A_747-1472
3j9m_1_A_1-1454
3j9m_1_A_771-1559
5oom_1_A_747-1472
5oom_1_A_1-1454
5oom_1_A_771-1559
3j7y_1_A_747-1472
3j7y_1_A_1-1454
3j7y_1_A_771-1559
6nu2_1_A_747-1472
6nu2_1_A_1-1381
6nu2_1_A_738-1472
6nu3_1_A_747-1472
6nu3_1_A_1-1381
6nu3_1_A_738-1472
5aka_1_7_1-74
1f7u_1_B_1-73
1f7v_1_B_1-73
2om7_1_I_1-58
2om7_1_J_1-102
1ysh_1_B_1-101
3iwn_1_B_1-91
3ndb_1_M_65-109
4xco_1_M_46-90
4xco_1_M_1-136
4xco_1_E_46-90
4xco_1_E_1-136
1z43_1_A_50-94
1z43_1_A_1-136
2v3c_1_N_46-90
2v3c_1_N_1-136
2v3c_1_M_46-90
2v3c_1_M_1-136
1lng_1_B_48-92
1lng_1_B_1-136
3j5s_1_B_1-101
3j5s_1_B_1-99
3j5s_1_A_1-360
3j45_1_5_1-108
3j46_1_4_2-109
3jcr_1_M_1-141
3jcr_1_M_1-107
3jcr_1_M_1-188
4v5z_1_B0_1-2902
4v5z_1_B0_1-2840
4v5z_1_B0_1-2899
3deg_1_G_1-70
5d8h_1_A_1-74
5g2x_1_A_595-692
5g2y_1_A_595-692
3j0o_1_h_1-111
3j0l_1_h_1-111
3j0p_1_h_1-111
3j0q_1_h_1-111
3iy8_1_A_1-540
1j1u_1_B_1-74
4v5z_1_BY_2-113
4v5z_1_BZ_1-70
4cxg_1_1_1-135
4cxh_1_1_1-135
4cxg_1_2_1-50
4cxh_1_2_1-50
4v8y_1_CN_1-87
1c0a_1_B_1-74
1j2b_1_C_1-74
1j2b_1_D_1-74
6d90_1_4_1-187
3j16_1_K_1-60
1mvr_1_B_1-96
4adx_1_0_132-2915
4adx_1_0_1-2923
4adx_1_9_1-121
2azx_1_D_1-72
2azx_1_C_1-72
3eq4_1_Y_1-69
2il9_1_A_2-142
2il9_1_M_2-142
4v92_1_AZ_1-184
3v7e_1_D_1-125
4v5z_1_AA_1-1563
4v5z_1_AA_1-1562
......
This diff is collapsed. Click to expand it.