Revision 1 for Bioinformatics completed

Louis BECQUEY
Commit 7196427d1340f4cf45c6878c26e440a111e7e55d 7196427d 1 parent d266c643
Showing 7 changed files with 1864 additions and 261 deletions
.gitignore
README.md
RNAnet.py
known_issues.txt
known_issues_reasons.txt
regression.py
statistics.py
--- a/.gitignore
View file @7196427
+++ b/.gitignore
View file @7196427
@@ -12,4 +12,5 @@ esl*
 
 # environment stuff
 .vscode/
- *.pyc
\ No newline at end of file
+ *.pyc
+ __pycache__/
\ No newline at end of file
--- a/README.md
View file @7196427
+++ b/README.md
View file @7196427
@@ -94,6 +94,8 @@ The detailed list of options is below:
 -h [ --help ]                   Print this help message
 --version                       Print the program version
 
+ -f [ --full-inference ]         Infer new 3D->family mappings even if Rfam already provides some. Yields more copies of chains
+                                 mapped to different families.
 -r 4.0 [ --resolution=4.0 ]     Maximum 3D structure resolution to consider a RNA chain.
 -s                              Run statistics computations after completion
 --extract                       Extract the portions of 3D RNA chains to individual mmCIF files.
@@ -105,7 +107,7 @@ The detailed list of options is below:
                                         RNAcifs/                Full structures containing RNA, in mmCIF format
                                         rna_mapped_to_Rfam/     Extracted 'pure' RNA chains
                                         datapoints/             Final results in CSV file format.
- --seq-folder=…                  Path to a folder to store the sequence and alignment files.
+ --seq-folder=…                  Path to a folder to store the sequence and alignment files. Subfolders will be:
                                         rfam_sequences/fasta/   Compressed hits to Rfam families
                                         realigned/              Sequences, covariance models, and alignments by family
 --no-homology                   Do not try to compute PSSMs and do not align sequences.
@@ -117,11 +119,12 @@ The detailed list of options is below:
 --update-homologous             Re-download Rfam and SILVA databases, realign all families, and recompute all CSV files
 --from-scratch                  Delete database, local 3D and sequence files, and known issues, and recompute.
 --archive                       Create a tar.gz archive of the datapoints text files, and update the link to the latest archive
+ --no-logs                       Do not save per-chain logs of the numbering modifications
 ```
 
 Typical usage:
 ```
- nohup bash -c 'time ~/Projects/RNANet/RNAnet.py --3d-folder ~/Data/RNA/3D/ --seq-folder ~/Data/RNA/sequences -s --archive' &
+ nohup bash -c 'time ~/Projects/RNANet/RNAnet.py --3d-folder ~/Data/RNA/3D/ --seq-folder ~/Data/RNA/sequences -s' &
 ```
 
 ## Post-computation task: estimate quality
--- a/RNAnet.py
View file @7196427
+++ b/RNAnet.py
View file @7196427
--- a/known_issues.txt
View file @7196427
+++ b/known_issues.txt
View file @7196427
- 1ml5_1_a_1-2914
- 1ml5_1_a_151-2903
- 1ml5_1_A_7-1515
- 1ml5_1_A_2-1520
- 1ml5_1_A_7-1518
- 1ml5_1_b_5-121
+ 1eg0_1_O_1-73
 2rdo_1_A_3-118
 4v48_1_A9_3-118
 4v47_1_A9_3-118
- 6zmi_1_L8_1267-4755
- 6zm7_1_L8_1267-4755
- 6y6x_1_L8_1267-4755
- 6z6n_1_L8_1267-4755
- 6qzp_1_L8_1267-4755
- 6zme_1_L8_1267-4755
- 6z6l_1_L8_1267-4755
- 6ek0_1_L8_1267-4755
- 6zmo_1_L8_1267-4755
- 6z6m_1_L8_1267-4755
- 6ole_1_D_1267-4755
- 6om0_1_D_1267-4755
- 6y2l_1_L8_1267-4755
- 6lqm_1_8_1267-4755
- 6y0g_1_L8_1267-4755
- 6lu8_1_8_1267-4755
- 6lsr_1_8_1267-4755
- 6lss_1_8_1267-4755
- 6oli_1_D_1267-4755
- 6olg_1_A3_1267-4755
- 6y57_1_L8_1267-4755
- 5t2c_1_C_1267-4755
- 6om7_1_D_1267-4755
- 4ug0_1_L8_1267-4755
- 6olf_1_D_1267-4755
- 6ip5_1_1C_1267-4755
- 6ip8_1_1C_1267-4755
- 6olz_1_A3_1267-4755
- 5aj0_1_A3_1267-4755
- 5lks_1_L8_1267-4755
- 6ip6_1_1C_1267-4755
- 4v6x_1_A8_1267-4755
 1vy7_1_AY_1-73
 1vy7_1_CY_1-73
 4w2h_1_CY_1-73
- 2z9q_1_A_1-72
+ 1jgq_1_A_2-1520
+ 4v42_1_AA_2-1520
+ 1jgo_1_A_2-1520
+ 1jgp_1_A_2-1520
+ 1ml5_1_A_2-1520
+ 4v42_1_BA_1-2914
+ 1ml5_1_a_1-2914
 4v42_1_BB_5-121
+ 1ml5_1_b_5-121
+ 2rdo_1_B_1-2904
+ 4v48_1_A0_1-2904
+ 4v47_1_A0_1-2904
+ 4v48_1_BA_1-1543
+ 4v47_1_BA_1-1542
 1ls2_1_B_1-73
 3ep2_1_Y_1-72
 3eq3_1_Y_1-72
 4v48_1_A6_1-73
- 1eg0_1_O_1-73
+ 2z9q_1_A_1-72
 1gsg_1_T_1-72
 3jcr_1_H_1-115
- 4v42_1_BA_1-2914
- 4v42_1_BA_151-2903
- 4v48_1_BA_1-91
- 4v48_1_BA_6-1541
- 4v48_1_BA_1-1543
- 4v48_1_BA_6-1538
- 4v47_1_BA_1-91
- 4v47_1_BA_6-1540
- 4v47_1_BA_1-1542
- 4v47_1_BA_6-1537
- 2rdo_1_B_1-2903
- 2rdo_1_B_6-1460
- 2rdo_1_B_1-1528
- 2rdo_1_B_6-1457
- 2rdo_1_B_160-2893
- 2rdo_1_B_1-2904
- 2rdo_1_B_6-1522
- 4v48_1_A0_1-2903
- 4v48_1_A0_6-1460
- 4v48_1_A0_1-1528
- 4v48_1_A0_6-1457
- 4v48_1_A0_160-2893
- 4v48_1_A0_1-2904
- 4v48_1_A0_6-1522
- 4v47_1_A0_1-2903
- 4v47_1_A0_6-1460
- 4v47_1_A0_1-1528
- 4v47_1_A0_6-1457
- 4v47_1_A0_160-2893
- 4v47_1_A0_1-2904
- 4v47_1_A0_6-1522
+ 1x1l_1_A_1-132
+ 1zc8_1_Z_1-93
+ 2ob7_1_D_1-132
 2ob7_1_A_10-319
- 1x1l_1_A_1-130
- 1zc8_1_Z_1-130
- 1zc8_1_Z_1-91
- 2ob7_1_D_1-130
- 6rxu_1_C2_588-2386
- 6rxu_1_C2_583-2388
- 6rxu_1_C2_588-2383
- 5oql_1_2_588-2386
- 5oql_1_2_583-2388
- 5oql_1_2_588-2383
- 6rxv_1_C2_588-2386
- 6rxv_1_C2_583-2388
- 6rxv_1_C2_588-2383
- 6rxz_1_C2_588-2386
- 6rxz_1_C2_583-2388
- 6rxz_1_C2_588-2383
- 6rxy_1_C2_588-2386
- 6rxy_1_C2_583-2388
- 6rxy_1_C2_588-2383
- 6rxt_1_C2_588-2386
- 6rxt_1_C2_583-2388
- 6rxt_1_C2_588-2383
 1r2x_1_C_1-58
 1r2w_1_C_1-58
- 1eg0_1_L_1-57
 1eg0_1_L_1-56
- 1jgq_1_A_7-1518
- 1jgq_1_A_20-55
- 1jgq_1_A_2-1520
- 1jgq_1_A_7-1515
- 4v42_1_AA_7-1518
- 4v42_1_AA_20-55
- 4v42_1_AA_2-1520
- 4v42_1_AA_7-1515
- 1jgo_1_A_7-1518
- 1jgo_1_A_20-55
- 1jgo_1_A_2-1520
- 1jgo_1_A_7-1515
- 1jgp_1_A_7-1518
- 1jgp_1_A_20-55
- 1jgp_1_A_2-1520
- 1jgp_1_A_7-1515
 1zc8_1_A_1-59
- 1mvr_1_D_1-59
- 4c9d_1_D_29-1
- 4c9d_1_C_29-1
- 4adx_1_9_1-121
+ 1mvr_1_D_1-61
+ 4adx_1_9_1-123
 1zn1_1_B_1-59
 1emi_1_B_1-108
 3iy9_1_A_498-1027
@@ -143,25 +49,1558 @@
 3cw1_1_V_1-138
 3cw1_1_v_1-138
 2iy3_1_B_9-105
- 3jcr_1_N_1-188
- 3jcr_1_N_1-106
+ 3jcr_1_N_1-107
 2vaz_1_A_64-177
- 2ftc_1_R_1-1568
- 2ftc_1_R_792-1568
 2ftc_1_R_81-1466
 3jcr_1_M_1-141
- 3jcr_1_M_1-188
- 3jcr_1_M_1-107
- 4v5z_1_B0_1-2899
 4v5z_1_B0_1-2902
- 4v5z_1_B0_1-2840
 5g2x_1_A_595-692
 3iy8_1_A_1-540
 4v5z_1_BY_2-113
 4v5z_1_BZ_1-70
- 1mvr_1_B_1-96
- 4adx_1_0_1-2923
- 4adx_1_0_132-2915
+ 4v5z_1_B1_2-125
+ 1mvr_1_B_3-96
+ 4adx_1_0_1-2925
 3eq4_1_Y_1-69
+ 6uz7_1_8_2140-2827
 4v5z_1_AA_1-1563
- 4v5z_1_AA_1-1562
+ 6cfj_1_1X
+ 6cfj_1_2X
+ 5hcq_1_1X
+ 6cae_1_1X
+ 5hcq_1_2X
+ 5hcr_1_1X
+ 4z8c_1_1X
+ 5j4b_1_1X
+ 5j4b_1_2X
+ 4z8c_1_2X
+ 6cae_1_2X
+ 5j4c_1_1X
+ 5w4k_1_1X
+ 6of1_1_1X
+ 5hcr_1_2X
+ 5hd1_1_1X
+ 5hcp_1_1X
+ 6of1_1_2X
+ 5hau_1_1W
+ 5j4c_1_2X
+ 5wis_1_1X
+ 6xqd_1_1X
+ 6nd5_1_1X
+ 5w4k_1_2X
+ 5hau_1_2W
+ 6xqd_1_2X
+ 4y4p_1_1X
+ 6o97_1_1X
+ 5hcp_1_2X
+ 5doy_1_1X
+ 4zer_1_1X
+ 5wit_1_1X
+ 5hd1_1_2X
+ 6nd5_1_2X
+ 4z3s_1_1X
+ 7jql_1_1X
+ 7jqm_1_1X
+ 7jql_1_2X
+ 5wis_1_2X
+ 6nd6_1_1X
+ 6o97_1_2X
+ 4y4p_1_2X
+ 7jqm_1_2X
+ 4z3s_1_2X
+ 4zer_1_2X
+ 6uo1_1_2X
+ 6uo1_1_1X
+ 5doy_1_2X
+ 5wit_1_2X
+ 5f8k_1_1X
+ 6nd6_1_2X
+ 6xqe_1_1X
+ 6xqe_1_2X
+ 6n9e_1_1X
+ 6n9e_1_2X
+ 6n9f_1_1X
+ 5f8k_1_2X
+ 6n9f_1_2X
+ 6xz7_1_F
+ 6y69_1_W
+ 5afi_1_V
+ 5afi_1_W
+ 6h4n_1_W
+ 5wdt_1_V
+ 5wfs_1_V
+ 5wdt_1_W
+ 5wfs_1_W
+ 5we4_1_V
+ 5we4_1_W
+ 5uq8_1_Y
+ 6c4i_1_Y
+ 6c4i_1_X
+ 5zeb_1_V
+ 5zep_1_W
+ 5lzd_1_V
+ 5we6_1_V
+ 5wfk_1_V
+ 5wfk_1_W
+ 5we6_1_W
+ 5u4i_1_Y
+ 5uq7_1_Y
+ 5u4i_1_X
+ 5lza_1_V
+ 5wf0_1_V
+ 5wf0_1_W
+ 5zeu_1_V
+ 5l3p_1_X
+ 3jcj_1_V
+ 6gxm_1_X
+ 6gwt_1_X
+ 6gxn_1_X
+ 6gxo_1_X
+ 3j9y_1_V
+ 6o9k_1_Y
+ 6o7k_1_V
+ 5lzf_1_V
+ 3jcn_1_V
+ 5lzc_1_V
+ 5u4j_1_X
+ 5u4j_1_Z
+ 5lzb_1_V
+ 6h58_1_W
+ 6h58_1_WW
+ 1eg0_1_O
+ 5j8b_1_X
+ 4v7j_1_AV
+ 4v7j_1_BV
+ 4v7k_1_BV
+ 4v7k_1_AV
+ 4v7k_1_BW
+ 4v7k_1_AW
+ 4v7j_1_AW
+ 4v7j_1_BW
+ 4v4j_1_Z
+ 6i0v_1_B
+ 5k77_1_X
+ 5k77_1_V
+ 5k77_1_Y
+ 5k77_1_W
+ 5k77_1_Z
+ 4pei_1_X
+ 4pei_1_V
+ 4pei_1_W
+ 4pei_1_Z
+ 4pei_1_Y
+ 4a3c_1_P
+ 4a3e_1_P
+ 6lkq_1_U
+ 7k00_1_B
+ 6qdw_1_A
+ 2rdo_1_A
+ 4v48_1_A9
+ 4v47_1_A9
+ 6hcj_1_Q3
+ 6hcq_1_Q3
+ 5mmm_1_Z
+ 4w2e_1_W
+ 5j4b_1_1Y
+ 6cfj_1_1W
+ 5w4k_1_1Y
+ 5wit_1_1W
+ 6cfj_1_1Y
+ 6cfj_1_2W
+ 5j4c_1_1W
+ 5wis_1_1Y
+ 5j4c_1_1Y
+ 6cfj_1_2Y
+ 5wis_1_1W
+ 5j4b_1_1W
+ 5j4c_1_2W
+ 5j4b_1_2W
+ 5j4b_1_2Y
+ 5j4c_1_2Y
+ 5w4k_1_1W
+ 6nd5_1_1Y
+ 5wis_1_2Y
+ 5wit_1_2W
+ 5doy_1_1Y
+ 5w4k_1_2Y
+ 4y4p_1_1Y
+ 4z3s_1_1Y
+ 5doy_1_1W
+ 5doy_1_2Y
+ 6nd5_1_1W
+ 4z3s_1_2Y
+ 4z3s_1_1W
+ 5w4k_1_2W
+ 6nd5_1_2Y
+ 4y4p_1_2Y
+ 6uo1_1_2Y
+ 6uo1_1_2W
+ 4y4p_1_1W
+ 4z3s_1_2W
+ 6uo1_1_1Y
+ 6uo1_1_1W
+ 5wis_1_2W
+ 5wit_1_1Y
+ 6nd5_1_2W
+ 4y4p_1_2W
+ 5doy_1_2W
+ 5wit_1_2Y
+ 6ucq_1_1Y
+ 4v4i_1_Z
+ 6ucq_1_1X
+ 6ucq_1_2Y
+ 4w2e_1_X
+ 6ucq_1_2X
+ 6yss_1_W
+ 5afi_1_Y
+ 5uq8_1_Z
+ 5wdt_1_Y
+ 5wfs_1_Y
+ 6ysr_1_W
+ 5we4_1_Y
+ 6yst_1_W
+ 5uq7_1_Z
+ 5we6_1_Y
+ 5wfk_1_Y
+ 5wf0_1_Y
+ 6o9j_1_V
+ 6ysu_1_W
+ 3j46_1_A
+ 5j8b_1_Y
+ 5j8b_1_W
+ 3bbv_1_Z
+ 5aj0_1_BV
+ 5aj0_1_BW
+ 4wt8_1_AB
+ 4wt8_1_BB
+ 4v4j_1_Y
+ 4v4i_1_Y
+ 5uq8_1_X
+ 5uq7_1_X
+ 1jgq_1_A
+ 4v42_1_AA
+ 1jgo_1_A
+ 1jgp_1_A
+ 1ml5_1_A
+ 4v4j_1_W
+ 4v4i_1_W
+ 4v42_1_BA
+ 4wt8_1_CS
+ 4wt8_1_DS
+ 4v4j_1_X
+ 4v4i_1_X
+ 4v42_1_BB
+ 6uu4_1_333
+ 6uu0_1_333
+ 6uuc_1_333
+ 6uu2_1_333
+ 6b6h_1_3
+ 6pb4_1_3
+ 6d30_1_C
+ 6j7z_1_C
+ 3er9_1_D
+ 5kal_1_Y
+ 4nia_1_3
+ 5kal_1_Z
+ 4nia_1_7
+ 4nia_1_4
+ 5new_1_C
+ 4nia_1_U
+ 4nia_1_6
+ 4oq9_1_7
+ 4nia_1_1
+ 4oq9_1_4
+ 4nia_1_8
+ 4oq9_1_8
+ 4nia_1_5
+ 2vrt_1_E
+ 4nia_1_W
+ 4oq9_1_6
+ 4oq8_1_D
+ 4nia_1_Z
+ 4oq9_1_W
+ 4oq9_1_5
+ 4nia_1_2
+ 2vrt_1_F
+ 4oq9_1_U
+ 4oq9_1_Z
+ 4oq9_1_2
+ 4oq9_1_3
+ 1ddl_1_E
+ 4oq9_1_1
+ 6rt5_1_A
+ 6rt5_1_E
+ 4qu6_1_B
+ 6lkq_1_T
+ 6qdw_1_B
+ 3jbv_1_B
+ 3jbu_1_B
+ 2rdo_1_B
+ 4v48_1_A0
+ 4v47_1_A0
+ 6do8_1_B
+ 6dpi_1_B
+ 6dp9_1_B
+ 6dpb_1_B
+ 6dmn_1_B
+ 6dpp_1_B
+ 6dpk_1_B
+ 6dpd_1_B
+ 6dot_1_B
+ 6dok_1_B
+ 6dp8_1_B
+ 6dpl_1_B
+ 6dpg_1_B
+ 6dou_1_B
+ 6dpc_1_B
+ 6do9_1_B
+ 6dmv_1_B
+ 6dp4_1_B
+ 6dpn_1_B
+ 6doj_1_B
+ 6dph_1_B
+ 6dos_1_B
+ 6doo_1_B
+ 6dp6_1_B
+ 6dox_1_B
+ 6dp5_1_B
+ 6dol_1_B
+ 6dp1_1_B
+ 6doz_1_B
+ 6dp7_1_B
+ 6doq_1_B
+ 6dpa_1_B
+ 6dom_1_B
+ 6dog_1_B
+ 6dop_1_B
+ 6doh_1_B
+ 6doa_1_B
+ 6don_1_B
+ 6dov_1_B
+ 6dpo_1_B
+ 6dod_1_B
+ 6dob_1_B
+ 6dow_1_B
+ 6dpm_1_B
+ 6dpf_1_B
+ 6dp3_1_B
+ 6dp2_1_B
+ 6dpe_1_B
+ 6dpj_1_B
+ 6dor_1_B
+ 6dof_1_B
+ 6dp0_1_B
+ 6doi_1_B
+ 6doc_1_B
+ 6doe_1_B
+ 6n6g_1_D
+ 6lkq_1_S
+ 5h5u_1_H
+ 5lze_1_Y
+ 5lze_1_V
+ 5lze_1_X
+ 3jcj_1_G
+ 6o7k_1_G
+ 4v48_1_BA
+ 4v47_1_BA
+ 4b3r_1_W
+ 4b3t_1_W
+ 4b3s_1_W
+ 5o2r_1_X
+ 5kcs_1_1X
+ 6fti_1_U
+ 6fti_1_W
+ 6ftj_1_U
+ 6ftj_1_W
+ 6ftg_1_U
+ 6ftg_1_W
+ 6ole_1_T
+ 6om0_1_T
+ 6oli_1_T
+ 6om7_1_T
+ 6olf_1_T
+ 6w6l_1_T
+ 6x1b_1_D
+ 6x1b_1_F
+ 5f6c_1_C
+ 6i0t_1_B
+ 1b2m_1_C
+ 1b2m_1_D
+ 1b2m_1_E
+ 2uxc_1_Y
+ 4a3g_1_P
+ 4a3j_1_P
+ 7k00_1_5
+ 5mmi_1_Z
+ 3j9m_1_U
+ 6nu2_1_U
+ 6nu3_1_U
+ 5c0y_1_C
+ 6n6f_1_D
+ 4ohy_1_B
+ 4oi1_1_B
+ 4oi0_1_B
+ 6raz_1_Y
+ 5ipl_1_3
+ 6utw_1_333
+ 5ipm_1_3
+ 5ipn_1_3
+ 4ylo_1_3
+ 4yln_1_6
+ 4ylo_1_6
+ 4yln_1_3
+ 4yln_1_9
+ 5lzf_1_Y
+ 1n32_1_Z
+ 5zsl_1_D
+ 5zsd_1_C
+ 5zsd_1_D
+ 5zsl_1_E
+ 4nku_1_D
+ 4nku_1_H
+ 1cwp_1_E
+ 6qik_1_Y
+ 6rzz_1_Y
+ 6ri5_1_Y
+ 6qt0_1_Y
+ 6qtz_1_Y
+ 6t83_1_1B
+ 6t83_1_3B
+ 6t83_1_AA
+ 6t83_1_CA
+ 6s05_1_Y
+ 5jcs_1_X
+ 5fl8_1_X
+ 3erc_1_G
+ 6of1_1_1W
+ 6cae_1_1Y
+ 6o97_1_1W
+ 6of1_1_1Y
+ 6of1_1_2W
+ 6o97_1_1Y
+ 6nd6_1_1Y
+ 6cae_1_1W
+ 6of1_1_2Y
+ 6cae_1_2Y
+ 6nd6_1_1W
+ 6cae_1_2W
+ 6o97_1_2Y
+ 6nd6_1_2Y
+ 6o97_1_2W
+ 6nd6_1_2W
+ 6xz7_1_G
+ 6gz5_1_BW
+ 6gz3_1_BW
+ 1ls2_1_B
+ 3ep2_1_Y
+ 3eq3_1_Y
+ 4v48_1_A6
+ 2z9q_1_A
+ 4hot_1_X
+ 6d2z_1_C
+ 4tu0_1_F
+ 4tu0_1_G
+ 6r9o_1_B
+ 6is0_1_C
+ 5lzc_1_X
+ 5lzb_1_X
+ 5lzd_1_Y
+ 5lzc_1_Y
+ 5lzb_1_Y
+ 1gsg_1_T
+ 6zvi_1_D
+ 6sv4_1_NB
+ 6sv4_1_NC
+ 6i7o_1_NB
+ 5y88_1_X
+ 3j6x_1_IR
+ 3j6y_1_IR
+ 6tb3_1_N
+ 6tnu_1_N
+ 2uxb_1_X
+ 2x1f_1_B
+ 2x1a_1_B
+ 3eq3_1_D
+ 3ep2_1_D
+ 1eg0_1_M
+ 3eq4_1_D
+ 5o1y_1_B
+ 3jcr_1_H
+ 6dzi_1_H
+ 5zeu_1_A
+ 6mpi_1_W
+ 5mfx_1_B
+ 5w0m_1_J
+ 5bud_1_E
+ 5w0m_1_I
+ 5w0m_1_H
+ 4j7m_1_B
+ 5bud_1_D
+ 6a4e_1_B
+ 6a4e_1_D
+ 6hxx_1_AA
+ 6hxx_1_AB
+ 6hxx_1_AC
+ 6hxx_1_AD
+ 6hxx_1_AE
+ 6hxx_1_AF
+ 6hxx_1_AG
+ 6hxx_1_AH
+ 6hxx_1_AI
+ 6hxx_1_AJ
+ 6hxx_1_AK
+ 6hxx_1_AL
+ 6hxx_1_AM
+ 6hxx_1_AN
+ 6hxx_1_AO
+ 6hxx_1_AP
+ 6hxx_1_AQ
+ 6hxx_1_AR
+ 6hxx_1_AS
+ 6hxx_1_AT
+ 6hxx_1_AU
+ 6hxx_1_AV
+ 6hxx_1_AW
+ 6hxx_1_AX
+ 6hxx_1_AY
+ 6hxx_1_AZ
+ 6hxx_1_BA
+ 6hxx_1_BB
+ 6hxx_1_BC
+ 6hxx_1_BD
+ 6hxx_1_BE
+ 6hxx_1_BF
+ 6hxx_1_BG
+ 6hxx_1_BH
+ 6hxx_1_BI
+ 5odv_1_A
+ 5odv_1_B
+ 5odv_1_C
+ 5odv_1_D
+ 5odv_1_E
+ 5odv_1_F
+ 5odv_1_G
+ 5odv_1_H
+ 5odv_1_I
+ 5odv_1_J
+ 5odv_1_K
+ 5odv_1_L
+ 5odv_1_M
+ 5odv_1_N
+ 5odv_1_O
+ 5odv_1_P
+ 5odv_1_Q
+ 5odv_1_R
+ 5odv_1_S
+ 5odv_1_T
+ 5odv_1_U
+ 5odv_1_V
+ 5odv_1_W
+ 5odv_1_X
+ 6t34_1_A
+ 6t34_1_B
+ 6t34_1_C
+ 6t34_1_D
+ 6t34_1_E
+ 6t34_1_F
+ 6t34_1_G
+ 6t34_1_H
+ 6t34_1_I
+ 6t34_1_J
+ 6t34_1_K
+ 6t34_1_L
+ 6t34_1_M
+ 6t34_1_N
+ 6t34_1_O
+ 6t34_1_P
+ 6t34_1_Q
+ 6t34_1_R
+ 6t34_1_S
+ 6ip8_1_ZY
+ 6ip5_1_ZY
+ 6ip5_1_ZU
+ 6ip6_1_ZY
+ 6ip8_1_ZZ
+ 6ip6_1_ZZ
+ 6uu3_1_333
+ 6uu1_1_333
+ 1pn8_1_D
+ 3er8_1_H
+ 3er8_1_G
+ 3er8_1_F
+ 5o3j_1_B
+ 4dr7_1_B
+ 1i5l_1_Y
+ 1i5l_1_U
+ 4dr6_1_B
+ 6i2n_1_U
+ 4v68_1_A0
+ 6vyu_1_Y
+ 6vyw_1_Y
+ 6vz7_1_Y
+ 6vz5_1_Y
+ 6vz3_1_Y
+ 6vyy_1_Y
+ 6vyx_1_Y
+ 6vyz_1_Y
+ 6vz2_1_Y
+ 1mvr_1_1
+ 6vyt_1_Y
+ 1cgm_1_I
+ 3jb7_1_T
+ 3jb7_1_M
+ 3j0o_1_D
+ 3j0l_1_D
+ 3j0q_1_D
+ 3j0p_1_D
+ 5elt_1_F
+ 5elt_1_E
+ 2tmv_1_R
+ 5a79_1_R
+ 5a7a_1_R
+ 2om3_1_R
+ 2xea_1_R
+ 4wtl_1_T
+ 4wtl_1_P
+ 1xnq_1_W
+ 1x18_1_C
+ 1x18_1_B
+ 1x18_1_D
+ 1vq6_1_4
+ 4am3_1_D
+ 4am3_1_H
+ 4am3_1_I
+ 4lj0_1_C
+ 4lj0_1_D
+ 4lj0_1_E
+ 5lzy_1_HH
+ 4wtj_1_T
+ 4wtj_1_P
+ 4xbf_1_D
+ 6ow3_1_I
+ 6ovy_1_I
+ 6oy6_1_I
+ 6n6d_1_D
+ 6n6k_1_C
+ 6n6k_1_D
+ 3rtj_1_D
+ 1apg_1_D
+ 6ty9_1_M
+ 6tz1_1_N
+ 4bbl_1_Y
+ 4bbl_1_Z
+ 6sce_1_B
+ 6scf_1_I
+ 6scf_1_K
+ 6yud_1_K
+ 6yud_1_O
+ 6scf_1_M
+ 6yud_1_P
+ 6scf_1_L
+ 6yud_1_M
+ 6yud_1_Q
+ 6o6x_1_D
+ 4ba2_1_R
+ 6o6x_1_C
+ 6o7b_1_C
+ 6o6v_1_C
+ 6r7b_1_D
+ 6r9r_1_D
+ 6ov0_1_E
+ 6ov0_1_H
+ 6ov0_1_G
+ 6o6v_1_D
+ 6ov0_1_F
+ 6o7b_1_D
+ 5e02_1_C
+ 6r9r_1_E
+ 6r7b_1_E
+ 6o7i_1_I
+ 6o7h_1_K
+ 7jyy_1_F
+ 7jyy_1_E
+ 7jz0_1_F
+ 7jz0_1_E
+ 6rt6_1_A
+ 6rt6_1_E
+ 1y1y_1_P
+ 5zuu_1_I
+ 5zuu_1_G
+ 4peh_1_W
+ 4peh_1_V
+ 4peh_1_X
+ 4peh_1_Y
+ 4peh_1_Z
+ 6mkn_1_W
+ 4cxg_1_C
+ 4cxh_1_C
+ 1x1l_1_A
+ 1zc8_1_Z
+ 2ob7_1_D
+ 2ob7_1_A
+ 4eya_1_E
+ 4eya_1_F
+ 4eya_1_Q
+ 4eya_1_R
+ 2r1g_1_B
+ 4ht9_1_E
+ 1cvj_1_M
+ 6z1p_1_AB
+ 6z1p_1_AA
+ 4ii9_1_C
+ 5mq0_1_3
+ 5uk4_1_X
+ 5uk4_1_V
+ 5uk4_1_W
+ 5uk4_1_U
+ 5f6c_1_E
+ 4rcj_1_B
+ 1xnr_1_W
+ 6e0o_1_C
+ 6o75_1_D
+ 6o75_1_C
+ 6e0o_1_B
+ 3j06_1_R
+ 1r2x_1_C
+ 1r2w_1_C
+ 1eg0_1_L
+ 4eya_1_G
+ 4eya_1_H
+ 4eya_1_S
+ 4eya_1_T
+ 4dr4_1_V
+ 1ibl_1_Z
+ 1ibm_1_Z
+ 4dr5_1_V
+ 4d61_1_J
+ 1trj_1_B
+ 1trj_1_C
+ 6q8y_1_N
+ 6sv4_1_N
+ 6i7o_1_N
+ 5k8h_1_A
+ 5z4a_1_B
+ 3jbu_1_V
+ 1h2c_1_R
+ 1h2d_1_S
+ 1h2d_1_R
+ 6szs_1_X
+ 5mgp_1_X
+ 6enu_1_X
+ 6enf_1_X
+ 6enj_1_X
+ 1pvo_1_L
+ 1pvo_1_G
+ 1pvo_1_H
+ 1pvo_1_J
+ 1pvo_1_K
+ 2ht1_1_K
+ 2ht1_1_J
+ 6eri_1_AX
+ 1zc8_1_A
+ 1zc8_1_C
+ 1zc8_1_B
+ 1zc8_1_G
+ 1zc8_1_I
+ 1zc8_1_H
+ 1zc8_1_J
+ 4v8z_1_CX
+ 6kqe_1_I
+ 5uh8_1_I
+ 5vi5_1_Q
+ 4xln_1_T
+ 4xlr_1_T
+ 4xln_1_Q
+ 5i2d_1_K
+ 5i2d_1_V
+ 4xlr_1_Q
+ 6sty_1_C
+ 6sty_1_F
+ 2xs5_1_D
+ 3ok4_1_N
+ 3ok4_1_L
+ 3ok4_1_Z
+ 3ok4_1_4
+ 3ok4_1_V
+ 3ok4_1_X
+ 3ok4_1_P
+ 3ok4_1_H
+ 3ok4_1_J
+ 3ok4_1_R
+ 3ok4_1_T
+ 3ok4_1_2
+ 6n6h_1_D
+ 5wnt_1_B
+ 3b0u_1_B
+ 3b0u_1_A
+ 4x9e_1_G
+ 4x9e_1_H
+ 6z1p_1_BB
+ 6z1p_1_BA
+ 2uxd_1_X
+ 4qvd_1_H
+ 4v7e_1_AB
+ 3ol9_1_D
+ 3ol9_1_H
+ 3ol9_1_L
+ 3ol9_1_P
+ 3olb_1_L
+ 3olb_1_P
+ 3olb_1_D
+ 3olb_1_H
+ 3ol6_1_D
+ 3ol6_1_H
+ 3ol6_1_L
+ 3ol6_1_P
+ 3ol8_1_D
+ 3ol8_1_H
+ 3ol7_1_L
+ 3ol7_1_P
+ 3ol7_1_D
+ 3ol7_1_H
+ 3ol8_1_L
+ 3ol8_1_P
+ 1qzc_1_C
+ 1qzc_1_A
+ 6ole_1_V
+ 6om0_1_V
+ 6oli_1_V
+ 6om7_1_V
+ 6w6l_1_V
+ 6olf_1_V
+ 1mvr_1_D
+ 4wtm_1_T
+ 4wtm_1_P
+ 5x70_1_E
+ 5x70_1_G
+ 6gz5_1_BV
+ 6gz4_1_BV
+ 6gz3_1_BV
+ 6fti_1_Q
+ 4v7e_1_AE
+ 4v7e_1_AD
+ 4x62_1_B
+ 4x64_1_B
+ 4x65_1_B
+ 1xmq_1_W
+ 4x66_1_B
+ 3t1h_1_W
+ 3t1y_1_W
+ 1xmo_1_W
+ 4adx_1_9
+ 6kr6_1_B
+ 1zn1_1_B
+ 6z8k_1_X
+ 1cvj_1_Q
+ 4csf_1_U
+ 4csf_1_Q
+ 4csf_1_G
+ 4csf_1_M
+ 4csf_1_K
+ 4csf_1_A
+ 4csf_1_I
+ 4csf_1_S
+ 4csf_1_C
+ 4csf_1_W
+ 4csf_1_O
+ 4csf_1_E
+ 1cvj_1_N
+ 1cvj_1_O
+ 1cvj_1_S
+ 1cvj_1_P
+ 1cvj_1_T
+ 1cvj_1_R
+ 6th6_1_AA
+ 6skg_1_AA
+ 6skf_1_AA
+ 6q8y_1_M
+ 6i7o_1_M
+ 6zmw_1_W
+ 6ybv_1_W
+ 2fz2_1_D
+ 2xpj_1_D
+ 2vrt_1_H
+ 2vrt_1_G
+ 1emi_1_B
+ 6r9m_1_B
+ 4nia_1_C
+ 4nia_1_A
+ 4nia_1_H
+ 4nia_1_N
+ 4nia_1_G
+ 4nia_1_D
+ 4nia_1_B
+ 4nia_1_I
+ 4nia_1_E
+ 4nia_1_M
+ 4oq9_1_I
+ 4oq9_1_G
+ 4oq9_1_C
+ 4oq9_1_H
+ 4oq9_1_N
+ 4oq9_1_A
+ 4oq9_1_D
+ 4oq9_1_E
+ 4oq9_1_M
+ 4oq9_1_B
+ 5uhc_1_I
+ 1uvn_1_F
+ 1uvn_1_B
+ 1uvn_1_D
+ 3iy9_1_A
+ 4wtk_1_T
+ 4wtk_1_P
+ 1vqn_1_4
+ 4oav_1_C
+ 4oav_1_A
+ 3ep2_1_E
+ 3eq3_1_E
+ 3eq4_1_E
+ 3ep2_1_A
+ 3eq3_1_A
+ 3eq4_1_A
+ 3ep2_1_C
+ 3eq3_1_C
+ 3eq4_1_C
+ 3ep2_1_B
+ 3eq3_1_B
+ 3eq4_1_B
+ 4i67_1_B
+ 3pgw_1_R
+ 3pgw_1_N
+ 3cw1_1_X
+ 3cw1_1_W
+ 3cw1_1_V
+ 5it9_1_I
+ 6k32_1_T
+ 6k32_1_P
+ 5mmj_1_A
+ 5x8r_1_A
+ 3j2k_1_3
+ 3j2k_1_2
+ 3j2k_1_1
+ 3j2k_1_0
+ 3j2k_1_4
+ 3nvk_1_G
+ 3nvk_1_S
+ 2iy3_1_B
+ 1cwp_1_F
+ 5z4j_1_B
+ 5gmf_1_E
+ 5gmf_1_H
+ 6e4p_1_J
+ 5gmf_1_F
+ 5gmf_1_G
+ 5gmg_1_D
+ 5gmg_1_C
+ 6e4p_1_K
+ 3ie1_1_E
+ 3ie1_1_H
+ 3ie1_1_F
+ 4dr7_1_V
+ 3ie1_1_G
+ 3s4g_1_C
+ 3s4g_1_B
+ 2qqp_1_R
+ 2zde_1_E
+ 2zde_1_F
+ 2zde_1_H
+ 2zde_1_G
+ 1nb7_1_E
+ 1nb7_1_F
+ 4hos_1_X
+ 3p6y_1_T
+ 3p6y_1_V
+ 3p6y_1_U
+ 3p6y_1_Q
+ 3p6y_1_W
+ 5dto_1_B
+ 4cxh_1_X
+ 1uvj_1_F
+ 1uvj_1_D
+ 1uvj_1_E
+ 6kqd_1_I
+ 6kqd_1_S
+ 5uh5_1_I
+ 1ytu_1_F
+ 1ytu_1_D
+ 4kzz_1_J
+ 5t2c_1_AN
+ 4v5z_1_BF
+ 3j6b_1_E
+ 4v4f_1_B6
+ 4v4f_1_A5
+ 4v4f_1_A3
+ 4v4f_1_B0
+ 4v4f_1_B9
+ 4v4f_1_A2
+ 4v4f_1_A8
+ 4v4f_1_A1
+ 4v4f_1_A9
+ 4v4f_1_BZ
+ 4v4f_1_B8
+ 4v4f_1_B7
+ 4v4f_1_B5
+ 4v4f_1_A0
+ 4v4f_1_A7
+ 4v4f_1_A4
+ 4v4f_1_AZ
+ 4v4f_1_B3
+ 4v4f_1_B1
+ 4v4f_1_B4
+ 4v4f_1_A6
+ 4v4f_1_B2
+ 5flx_1_Z
+ 5zsb_1_C
+ 5zsb_1_D
+ 5zsn_1_D
+ 5zsn_1_E
+ 3jcr_1_N
+ 6gfw_1_R
+ 2vaz_1_A
+ 1qzc_1_B
+ 1mvr_1_C
+ 4v5z_1_BP
+ 6n6e_1_D
+ 4g7o_1_I
+ 4g7o_1_S
+ 5x22_1_S
+ 5x22_1_I
+ 5x21_1_I
+ 5uh6_1_I
+ 6l74_1_I
+ 5uh9_1_I
+ 2ftc_1_R
+ 6sag_1_R
+ 4udv_1_R
+ 2r1g_1_E
+ 5zsc_1_D
+ 5zsc_1_C
+ 6woy_1_I
+ 6wox_1_I
+ 6evj_1_N
+ 6evj_1_M
+ 4gkk_1_W
+ 4v9e_1_AG
+ 4v9e_1_BM
+ 4v9e_1_AM
+ 4v9e_1_AA
+ 4v9e_1_BA
+ 4v9e_1_BG
+ 5lzs_1_II
+ 6fqr_1_C
+ 6ha1_1_X
+ 5kcr_1_1X
+ 2r1g_1_X
+ 3m7n_1_Z
+ 3m85_1_X
+ 3m85_1_Z
+ 3m85_1_Y
+ 1e8s_1_C
+ 5wnp_1_B
+ 5wnv_1_B
+ 5yts_1_B
+ 1utd_1_6
+ 1utd_1_Z
+ 1utd_1_4
+ 1utd_1_7
+ 1utd_1_9
+ 1utd_1_5
+ 1utd_1_3
+ 1utd_1_2
+ 1utd_1_8
+ 1utd_1_1
+ 6n6i_1_C
+ 6n6i_1_D
+ 6n6a_1_D
+ 6ij2_1_F
+ 6ij2_1_G
+ 6ij2_1_H
+ 6ij2_1_E
+ 3u2e_1_D
+ 3u2e_1_C
+ 5uef_1_C
+ 5uef_1_D
+ 4x4u_1_H
+ 4afy_1_D
+ 6oy5_1_I
+ 6owl_1_B
+ 6owl_1_C
+ 4afy_1_C
+ 4lq3_1_R
+ 6s0m_1_C
+ 6gx6_1_B
+ 4k4s_1_D
+ 4k4s_1_H
+ 4k4t_1_H
+ 4k4t_1_D
+ 1zn1_1_C
+ 1zn0_1_C
+ 1xpu_1_G
+ 1xpu_1_L
+ 1xpr_1_L
+ 1xpu_1_H
+ 1xpo_1_K
+ 1xpo_1_J
+ 1xpu_1_J
+ 1xpo_1_H
+ 1xpr_1_J
+ 1xpu_1_K
+ 1xpr_1_K
+ 1xpo_1_M
+ 1xpo_1_L
+ 1xpu_1_M
+ 1xpr_1_M
+ 1xpo_1_G
+ 1xpr_1_H
+ 1xpr_1_G
+ 6gc5_1_F
+ 6gc5_1_H
+ 6gc5_1_G
+ 4v7e_1_AA
+ 4v7e_1_AC
+ 1n1h_1_B
+ 4ohz_1_B
+ 6t83_1_6B
+ 4gv6_1_C
+ 4gv6_1_B
+ 4gv3_1_C
+ 4gv3_1_B
+ 4gv9_1_E
+ 6i7o_1_L
+ 2a8v_1_D
+ 6qx3_1_G
+ 2xnr_1_C
+ 4gkj_1_W
+ 4v5z_1_BC
+ 4v5z_1_BB
+ 4v5z_1_BH
+ 3j0o_1_F
+ 3j0l_1_F
+ 3j0p_1_F
+ 3j0q_1_F
+ 3j0o_1_B
+ 3j0l_1_B
+ 3j0o_1_C
+ 3j0l_1_C
+ 3j0q_1_C
+ 3j0p_1_C
+ 3j0o_1_A
+ 3j0l_1_A
+ 3j0q_1_A
+ 3j0p_1_A
+ 1cwp_1_D
+ 4v5z_1_BJ
+ 5sze_1_C
+ 6wre_1_D
+ 6i0u_1_B
+ 5zsa_1_C
+ 5zsa_1_D
+ 1n34_1_Z
+ 3pf5_1_S
+ 6ppn_1_A
+ 6ppn_1_I
+ 6qdw_1_V
+ 5hk0_1_F
+ 4qm6_1_D
+ 4qm6_1_C
+ 4jzu_1_C
+ 4jzv_1_C
+ 5ytv_1_B
+ 4k4z_1_P
+ 4k4z_1_D
+ 4k4x_1_L
+ 4k4z_1_L
+ 4k4x_1_D
+ 4k4z_1_H
+ 4k4x_1_H
+ 4k4x_1_P
+ 1t1m_1_A
+ 1t1m_1_B
+ 4a3b_1_P
+ 4a3m_1_P
+ 6u6y_1_E
+ 6u6y_1_G
+ 6u6y_1_F
+ 6u6y_1_H
+ 6qik_1_X
+ 6rzz_1_X
+ 6ri5_1_X
+ 6qt0_1_X
+ 6qtz_1_X
+ 6s05_1_X
+ 6t83_1_BB
+ 6t83_1_4B
+ 5fl8_1_Z
+ 5jcs_1_Z
+ 5mrc_1_BB
+ 5mre_1_BB
+ 5mrf_1_BB
+ 6gz4_1_BW
+ 3j46_1_P
+ 3jcr_1_M
+ 4e6b_1_A
+ 4e6b_1_B
+ 6a6l_1_D
+ 4v5z_1_BS
+ 4v8t_1_1
+ 1uvi_1_D
+ 1uvi_1_F
+ 1uvi_1_E
+ 4m7d_1_P
+ 4k4u_1_D
+ 4k4u_1_H
+ 6rt7_1_E
+ 6rt7_1_A
+ 2voo_1_C
+ 2voo_1_D
+ 5k78_1_X
+ 5k78_1_Y
+ 4ylo_1_9
+ 4kzy_1_I
+ 4kzz_1_I
+ 4kzx_1_I
+ 5vyc_1_I2
+ 5vyc_1_I3
+ 5vyc_1_I5
+ 5vyc_1_I1
+ 5vyc_1_I6
+ 5vyc_1_I4
+ 6ip8_1_2M
+ 6ip5_1_2M
+ 6ip6_1_2M
+ 6qcs_1_M
+ 486d_1_G
+ 2r1g_1_C
+ 486d_1_F
+ 4v5z_1_B0
+ 4nia_1_O
+ 4nia_1_J
+ 4nia_1_K
+ 4nia_1_L
+ 4nia_1_F
+ 4oq9_1_K
+ 4oq9_1_O
+ 4oq9_1_J
+ 4oq9_1_F
+ 4oq9_1_L
+ 5tbw_1_SR
+ 6hhq_1_SR
+ 6zvi_1_H
+ 6sv4_1_2B
+ 6sv4_1_2C
+ 6t83_1_2B
+ 6t83_1_A
+ 6i7o_1_2B
+ 6r9q_1_B
+ 6v3a_1_SN1
+ 6v3b_1_SN1
+ 6v39_1_SN1
+ 6v3e_1_SN1
+ 1pn7_1_C
+ 1mj1_1_Q
+ 1mj1_1_R
+ 4dr6_1_V
+ 6kql_1_I
+ 4eya_1_M
+ 4eya_1_N
+ 4eya_1_A
+ 4eya_1_B
+ 2wj8_1_D
+ 2wj8_1_I
+ 2wj8_1_L
+ 2wj8_1_F
+ 2wj8_1_C
+ 2wj8_1_Q
+ 2wj8_1_J
+ 2wj8_1_P
+ 2wj8_1_K
+ 2wj8_1_E
+ 2wj8_1_T
+ 2wj8_1_B
+ 2wj8_1_O
+ 2wj8_1_N
+ 2wj8_1_A
+ 2wj8_1_H
+ 2wj8_1_R
+ 2wj8_1_M
+ 2wj8_1_S
+ 2wj8_1_G
+ 4e6b_1_E
+ 4e6b_1_F
+ 6p71_1_I
+ 3pdm_1_R
+ 5det_1_P
+ 5els_1_I
+ 4n2s_1_B
+ 4yoe_1_E
+ 3j0o_1_H
+ 3j0l_1_H
+ 3j0p_1_H
+ 3j0q_1_H
+ 5gxi_1_B
+ 3iy8_1_A
+ 6tnu_1_M
+ 5mc6_1_M
+ 5mc6_1_N
+ 4eya_1_O
+ 4eya_1_P
+ 4eya_1_C
+ 4eya_1_D
+ 6htq_1_V
+ 6htq_1_W
+ 6htq_1_U
+ 6uu6_1_333
+ 6v3a_1_V
+ 6v39_1_V
+ 5a0v_1_F
+ 3avt_1_T
+ 6d1v_1_C
+ 4s2x_1_B
+ 4s2y_1_B
+ 5wnu_1_B
+ 1zc8_1_F
+ 1vtm_1_R
+ 4v5z_1_BA
+ 4v5z_1_BE
+ 4v5z_1_BD
+ 4v5z_1_BG
+ 4v5z_1_BI
+ 4v5z_1_BK
+ 4v5z_1_BM
+ 4v5z_1_BL
+ 4v5z_1_BV
+ 4v5z_1_BO
+ 4v5z_1_BN
+ 4v5z_1_BQ
+ 4v5z_1_BR
+ 4v5z_1_BT
+ 4v5z_1_BU
+ 4v5z_1_BW
+ 4v5z_1_BY
+ 4v5z_1_BX
+ 4v5z_1_BZ
+ 6u9x_1_H
+ 6u9x_1_K
+ 5elk_1_R
+ 6okk_1_G
+ 4cxg_1_A
+ 4cxh_1_A
+ 6bk8_1_I
+ 4cxg_1_B
+ 4cxh_1_B
+ 4v5z_1_B1
+ 5z4d_1_B
+ 6o78_1_E
+ 6ha8_1_X
+ 1m8w_1_E
+ 1m8w_1_F
+ 5udi_1_B
+ 5udl_1_B
+ 5udk_1_B
+ 5udj_1_B
+ 5w5i_1_B
+ 5w5i_1_D
+ 5w5h_1_B
+ 5w5h_1_D
+ 4eya_1_K
+ 4eya_1_L
+ 4eya_1_I
+ 4eya_1_J
+ 4g9z_1_E
+ 4g9z_1_F
+ 3nma_1_B
+ 3nma_1_C
+ 6een_1_G
+ 6een_1_I
+ 6een_1_H
+ 4wti_1_T
+ 4wti_1_P
+ 5l3p_1_Y
+ 4hor_1_X
+ 3rzo_1_R
+ 2f4v_1_Z
+ 1qln_1_R
+ 2xs7_1_B
+ 6zvi_1_E
+ 6sv4_1_MC
+ 6sv4_1_MB
+ 6i7o_1_MB
+ 6ogy_1_M
+ 6ogy_1_N
+ 6uej_1_B
+ 1x18_1_A
+ 5ytx_1_B
+ 6o8w_1_U
+ 4g0a_1_H
+ 6r9p_1_B
+ 3koa_1_C
+ 4n48_1_D
+ 4n48_1_G
+ 6kug_1_B
+ 6ktc_1_V
+ 6ole_1_U
+ 6om0_1_U
+ 6olg_1_BV
+ 6oli_1_U
+ 6om7_1_U
+ 6w6l_1_U
+ 6olz_1_BV
+ 6olf_1_U
+ 5lzd_1_X
+ 6m7k_1_B
+ 3cd6_1_4
+ 3cma_1_5
+ 6n9e_1_2W
+ 1vqo_1_4
+ 1qvg_1_3
+ 3cme_1_5
+ 5lzd_1_W
+ 5lze_1_W
+ 5lzc_1_W
+ 5lzb_1_W
+ 3wzi_1_C
+ 1mvr_1_E
+ 1mvr_1_B
+ 1mvr_1_A
+ 4adx_1_0
+ 4adx_1_8
+ 1n33_1_Z
+ 6dti_1_W
+ 3d2s_1_F
+ 3d2s_1_H
+ 5mrc_1_AA
+ 5mre_1_AA
+ 5mrf_1_AA
+ 5fl8_1_Y
+ 5jcs_1_Y
+ 2r1g_1_A
+ 2r1g_1_D
+ 2r1g_1_F
+ 3eq4_1_Y
+ 4wkr_1_C
+ 4v99_1_EC
+ 4v99_1_AC
+ 4v99_1_BH
+ 4v99_1_CH
+ 4v99_1_AM
+ 4v99_1_DC
+ 4v99_1_JW
+ 4v99_1_EH
+ 4v99_1_BW
+ 4v99_1_FW
+ 4v99_1_AW
+ 4v99_1_BC
+ 4v99_1_BM
+ 4v99_1_IC
+ 4v99_1_EM
+ 4v99_1_ER
+ 4v99_1_IW
+ 4v99_1_JH
+ 4v99_1_JR
+ 4v99_1_AH
+ 4v99_1_GR
+ 4v99_1_IR
+ 4v99_1_BR
+ 4v99_1_CW
+ 4v99_1_HR
+ 4v99_1_FH
+ 4v99_1_HC
+ 4v99_1_DW
+ 4v99_1_GC
+ 4v99_1_JC
+ 4v99_1_DM
+ 4v99_1_EW
+ 4v99_1_AR
+ 4v99_1_CR
+ 4v99_1_JM
+ 4v99_1_CC
+ 4v99_1_IH
+ 4v99_1_FR
+ 4v99_1_CM
+ 4v99_1_IM
+ 4v99_1_FM
+ 4v99_1_FC
+ 4v99_1_GH
+ 4v99_1_HM
+ 4v99_1_HH
+ 4v99_1_DR
+ 4v99_1_HW
+ 4v99_1_GW
+ 4v99_1_DH
+ 4v99_1_GM
+ 6rt4_1_D
+ 6rt4_1_C
+ 6zvh_1_X
+ 4dwa_1_D
+ 6n6c_1_D
+ 6n6j_1_C
+ 6n6j_1_D
+ 6p7q_1_E
+ 6p7q_1_F
+ 6p7q_1_D
+ 6rcl_1_C
+ 5jju_1_C
+ 4ejt_1_G
+ 5ceu_1_C
+ 5ceu_1_D
+ 6lkq_1_W
+ 3qsu_1_P
+ 3qsu_1_R
+ 1n38_1_B
+ 4qvc_1_G
+ 6q1h_1_D
+ 6q1h_1_H
+ 6p7p_1_F
+ 6p7p_1_E
+ 6p7p_1_D
+ 6vm6_1_J
+ 6vm6_1_G
+ 6wan_1_K
+ 6wan_1_H
+ 6wan_1_G
+ 6wan_1_L
+ 6wan_1_I
+ 6ywo_1_F
+ 6wan_1_J
+ 4oau_1_A
+ 6ywo_1_E
+ 6ywo_1_K
+ 6vm6_1_I
+ 6vm6_1_H
+ 6ywo_1_I
+ 2a1r_1_C
+ 2a1r_1_D
+ 3gpq_1_E
+ 3gpq_1_F
+ 6o79_1_C
+ 6vm6_1_K
+ 6hyu_1_D
+ 1laj_1_R
+ 6ybv_1_K
+ 6mpf_1_W
+ 6spc_1_A
+ 6spe_1_A
+ 6fti_1_V
+ 6ftj_1_V
+ 6ftg_1_V
+ 4g0a_1_G
+ 4g0a_1_F
+ 4g0a_1_E
+ 2b2d_1_S
+ 5hkc_1_C
+ 1rmv_1_B
+ 4qu7_1_X
+ 4qu7_1_V
+ 4qu7_1_U
+ 4v5z_1_AH
+ 4v5z_1_AA
+ 4v5z_1_AB
+ 4v5z_1_AC
+ 4v5z_1_AD
+ 4v5z_1_AE
+ 4v5z_1_AF
+ 4v5z_1_AG
+ 6pmi_1_3
+ 6pmj_1_3
+ 5hjz_1_C
--- a/known_issues_reasons.txt
View file @7196427
+++ b/known_issues_reasons.txt
View file @7196427
--- a/regression.py
View file @7196427
+++ b/regression.py
View file @7196427
@@ -11,7 +11,7 @@
 #   - Use a specialised database (SILVA) : better alignments (we guess?), but two kind of jobs
 #   - Use cmalign --small everywhere (homogeneity)
 # Moreover, --small requires --nonbanded --cyk, which means the output alignement is the optimally scored one. 
- # To date, we trust Infernal as the best tool to realign RNA. Is it ?
+ # To date, we trust Infernal as the best tool to realign ncRNA. Is it ?
 
 # Contact: louis.becquey@univ-evry.fr (PhD student), fariza.tahi@univ-evry.fr (PI)
 
@@ -28,7 +28,7 @@ pd.set_option('display.max_rows', None)
 LSU_set = ["RF00002", "RF02540", "RF02541", "RF02543", "RF02546"]   # From Rfam CLAN 00112
 SSU_set = ["RF00177", "RF02542",  "RF02545", "RF01959", "RF01960"]  # From Rfam CLAN 00111
 
- with sqlite3.connect("results/RNANet.db") as conn:
+ with sqlite3.connect(os.getcwd()+"/results/RNANet.db") as conn:
     df = pd.read_sql("SELECT rfam_acc, max_len, nb_total_homol, comput_time, comput_peak_mem FROM family;", conn)
 
 to_remove = [ f for f in df.rfam_acc if f in LSU_set+SSU_set ]
@@ -74,7 +74,7 @@ ax.set_ylabel("Maximum length of sequences ")
 ax.set_zlabel("Computation time (s)")
 
 plt.subplots_adjust(wspace=0.4)
- plt.savefig("results/cmalign_jobs_performance.png")
+ plt.savefig(os.getcwd()+"/results/cmalign_jobs_performance.png")
 
 # # ========================================================
 # # Linear Regression of max_mem as function of max_length
--- a/statistics.py
View file @7196427
+++ b/statistics.py
View file @7196427
@@ -3,7 +3,6 @@
 # This file computes additional statistics over the produced dataset.
 # Run this file if you want the base counts, pair-type counts, identity percents, etc
 # in the database.
- # This should be run from the folder where the file is (to access the database with path "results/RNANet.db")
 
 import getopt, os, pickle, sqlite3, shlex, subprocess, sys
 import numpy as np
@@ -22,34 +21,35 @@ from multiprocessing import Pool, Manager
 from os import path
 from tqdm import tqdm
 from collections import Counter
- from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker
+ from setproctitle import setproctitle
+ from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_worker, trace_unhandled_exceptions
 
 path_to_3D_data = "tobedefinedbyoptions"
 path_to_seq_data = "tobedefinedbyoptions"
+ runDir = os.getcwd()
 res_thr = 20.0 # default: all structures
 
 LSU_set = ("RF00002", "RF02540", "RF02541", "RF02543", "RF02546")   # From Rfam CLAN 00112
 SSU_set = ("RF00177", "RF02542",  "RF02545", "RF01959", "RF01960")  # From Rfam CLAN 00111
 
- def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
+ @trace_unhandled_exceptions
+ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=2.0):
     """
     Plot the joint distribution of pseudotorsion angles, in a Ramachandran-style graph.
-     See Wadley & Pyle (2007)
+     See Wadley & Pyle (2007).
+     Only unique unmapped chains with resolution < res argument are considered.
 
     Arguments:
-     show: True or False, call plt.show() at this end or not
-     filter_helical: None, "form", "zone", or "both"
-                     None: do not remove helical nucleotide
-                     "form": remove nucleotides if they belong to a A, B or Z form stem
-                     "zone": remove nucleotides falling in an arbitrary zone (see zone argument)
-                     "both": remove nucleotides fulfilling one or both of the above conditions
-     carbon: 1 or 4, use C4' (eta and theta) or C1' (eta_prime and theta_prime)
-     sd_range: tuple, set values below avg + sd_range[0] * stdev to 0,
-                      and values above avg + sd_range[1] * stdev to avg + sd_range[1] * stdev.
-                      This removes noise and cuts too high peaks, to clearly see the clusters.
+     carbon:     1 or 4, use C4' (eta and theta) or C1' (eta_prime and theta_prime)
+     show:       True or False, call plt.show() at this end or not
+     sd_range:   tuple, set values below avg + sd_range[0] * stdev to 0,
+                     and values above avg + sd_range[1] * stdev to avg + sd_range[1] * stdev.
+                     This removes noise and cuts too high peaks, to clearly see the clusters.
+     res:        Minimal resolution (maximal resolution value, actually) of the structure to 
+                     consider its nucleotides.
     """
 
-     os.makedirs("results/figures/wadley_plots/", exist_ok=True)
+     os.makedirs(runDir + "/results/figures/wadley_plots/", exist_ok=True)
 
     if carbon == 4:
         angle = "eta"
@@ -63,30 +63,32 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
         exit("You overestimate my capabilities !")
 
     
-     if not path.isfile(f"data/wadley_kernel_{angle}_{res}A.npz"):
+     if not path.isfile(runDir + f"/data/wadley_kernel_{angle}_{res}A.npz"):
 
         # Get a worker number to position the progress bar
         global idxQueue
         thr_idx = idxQueue.get()
+         setproctitle(f"RNANet statistics.py Worker {thr_idx+1} reproduce_wadley_results(carbon={carbon})")
+ 
         pbar = tqdm(total=2, desc=f"Worker {thr_idx+1}: eta/theta C{carbon} kernels", position=thr_idx+1, leave=False)
 
         # Extract the angle values of c2'-endo and c3'-endo nucleotides
-         with sqlite3.connect("results/RNANet.db") as conn:
+         with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
             df = pd.read_sql(f"""SELECT {angle}, th{angle} 
-                                  FROM nucleotide JOIN (
-                                     SELECT chain_id FROM chain JOIN structure
-                                     WHERE structure.resolution <= {res}
-                                  ) AS c
+                                  FROM (
+                                     SELECT chain_id FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
+                                     WHERE chain.rfam_acc = 'unmappd' AND structure.resolution <= {res} AND issue = 0
+                                  ) AS c NATURAL JOIN nucleotide
                                  WHERE puckering="C2'-endo" 
                                     AND {angle} IS NOT NULL 
                                     AND th{angle} IS NOT NULL;""", conn)
             c2_endo_etas = df[angle].values.tolist()
             c2_endo_thetas = df["th"+angle].values.tolist()
             df = pd.read_sql(f"""SELECT {angle}, th{angle} 
-                                  FROM nucleotide JOIN (
-                                     SELECT chain_id FROM chain JOIN structure
-                                     WHERE structure.resolution <= {res}
-                                  ) AS c
+                                  FROM (
+                                     SELECT chain_id FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
+                                     WHERE chain.rfam_acc = 'unmappd' AND structure.resolution <= {res} AND issue = 0
+                                  ) AS c NATURAL JOIN nucleotide 
                                  WHERE form = '.' 
                                     AND puckering="C3'-endo" 
                                     AND {angle} IS NOT NULL 
@@ -111,14 +113,16 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
         pbar.update(1)
 
         # Save the data to an archive for later use without the need to recompute
-         np.savez(f"data/wadley_kernel_{angle}_{res}A.npz",
+         np.savez(runDir + f"/data/wadley_kernel_{angle}_{res}A.npz",
                   c3_endo_e=c3_endo_etas, c3_endo_t=c3_endo_thetas,
                   c2_endo_e=c2_endo_etas, c2_endo_t=c2_endo_thetas,
                   kernel_c3=f_c3, kernel_c2=f_c2)
         pbar.close()
         idxQueue.put(thr_idx)
     else:
-         f = np.load(f"data/wadley_kernel_{angle}_{res}A.npz")
+         setproctitle(f"RNANet statistics.py reproduce_wadley_results(carbon={carbon})")
+ 
+         f = np.load(runDir + f"/data/wadley_kernel_{angle}_{res}A.npz")
         c2_endo_etas = f["c2_endo_e"]
         c3_endo_etas = f["c3_endo_e"]
         c2_endo_thetas = f["c2_endo_t"]
@@ -148,7 +152,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
         f_low_thr = f.mean() + sd_range[0]*f.std()
         f_cut = np.where(f > f_sup_thr, f_sup_thr, f)
         f_cut = np.where(f_cut < f_low_thr, 0, f_cut)
-         levels = [f.mean()+f.std(), f.mean()+2*f.std(), f.mean()+4*f.std()]
+         levels = [ f.mean()+f.std(), f.mean()+2*f.std(), f.mean()+4*f.std()]
 
         # histogram:
         fig = plt.figure()
@@ -157,7 +161,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
         ax.bar3d(xpos.ravel(), ypos.ravel(), 0.0, 0.09, 0.09, hist_cut.ravel(), color=color_values, zorder="max")
         ax.set_xlabel(xlabel)
         ax.set_ylabel(ylabel)
-         fig.savefig(f"results/figures/wadley_plots/wadley_hist_{angle}_{l}_{res}A.png")
+         fig.savefig(runDir + f"/results/figures/wadley_plots/wadley_hist_{angle}_{l}_{res}A.png")
         if show:
             fig.show()
         plt.close()
@@ -168,7 +172,7 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
         ax.plot_surface(xx, yy, f_cut, cmap=cm.get_cmap("coolwarm"), linewidth=0, antialiased=True)
         ax.set_xlabel(xlabel)
         ax.set_ylabel(ylabel)
-         fig.savefig(f"results/figures/wadley_plots/wadley_distrib_{angle}_{l}_{res}A.png")
+         fig.savefig(runDir + f"/results/figures/wadley_plots/wadley_distrib_{angle}_{l}_{res}A.png")
         if show:
             fig.show()
         plt.close()
@@ -177,10 +181,10 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
         fig = plt.figure(figsize=(5,5))
         ax = fig.gca()
         ax.scatter(x, y, s=1, alpha=0.1)
-         ax.contourf(xx, yy, f_cut, alpha=0.5, cmap=cm.get_cmap("coolwarm"), levels=levels, extend="max")
+         ax.contourf(xx, yy, f, alpha=0.5, cmap=cm.get_cmap("coolwarm"), levels=levels, extend="max")
         ax.set_xlabel(xlabel)
         ax.set_ylabel(ylabel)
-         fig.savefig(f"results/figures/wadley_plots/wadley_{angle}_{l}_{res}A.png")
+         fig.savefig(runDir + f"/results/figures/wadley_plots/wadley_{angle}_{l}_{res}A.png")
         if show:
             fig.show()
         plt.close()
@@ -188,9 +192,12 @@ def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=4.0):
 
 def stats_len():
     """Plots statistics on chain lengths in RNA families.
+     Uses all chains mapped to a family including copies, inferred or not.
     
     REQUIRES tables chain, nucleotide up to date.
     """
+ 
+     setproctitle(f"RNANet statistics.py stats_len({res_thr})")
     
     # Get a worker number to position the progress bar
     global idxQueue
@@ -214,7 +221,7 @@ def stats_len():
     cols = []
     lengths = []
     
-     for i,f in enumerate(tqdm(fam_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: Average chain lengths", leave=False)):
+     for f in tqdm(fam_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: Average chain lengths", leave=False):
 
         # Define a color for that family in the plot
         if f in LSU_set:
@@ -229,7 +236,7 @@ def stats_len():
             cols.append("grey")
 
         # Get the lengths of chains
-         with sqlite3.connect("results/RNANet.db") as conn:
+         with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
             l = [ x[0] for x in sql_ask_database(conn, f"""SELECT COUNT(index_chain) 
                                                             FROM (
                                                                 SELECT chain_id 
@@ -239,8 +246,6 @@ def stats_len():
                                                             GROUP BY chain_id;""", warn_every=0) ]
         lengths.append(l) # list of chain lengths from the family
 
-         # notify(f"[{i+1}/{len(fam_list)}] Computed {f} chains lengths")
- 
     # Plot the figure
     fig = plt.figure(figsize=(10,3))
     ax = fig.gca()
@@ -267,7 +272,7 @@ def stats_len():
                 ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.5))
 
     # Save the figure
-     fig.savefig(f"results/figures/lengths_{res_thr}A.png")
+     fig.savefig(runDir + f"/results/figures/lengths_{res_thr}A.png")
     idxQueue.put(thr_idx) # replace the thread index in the queue
     # notify("Computed sequence length statistics and saved the figure.")
 
@@ -285,6 +290,7 @@ def format_percentage(tot, x):
 
 def stats_freq():
     """Computes base frequencies in all RNA families.
+     Uses all chains mapped to a family including copies, inferred or not.
 
     Outputs results/frequencies.csv
     REQUIRES tables chain, nucleotide up to date."""
@@ -293,17 +299,18 @@ def stats_freq():
     global idxQueue
     thr_idx = idxQueue.get()
 
+     setproctitle(f"RNANet statistics.py Worker {thr_idx+1} stats_freq()")
+ 
     # Initialize a Counter object for each family
     freqs = {}
     for f in fam_list:
         freqs[f] = Counter()
 
     # List all nt_names happening within a RNA family and store the counts in the Counter
-     for i,f in enumerate(tqdm(fam_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: Base frequencies", leave=False)):
-         with sqlite3.connect("results/RNANet.db") as conn:
+     for f in tqdm(fam_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: Base frequencies", leave=False):
+         with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
             counts = dict(sql_ask_database(conn, f"SELECT nt_name, COUNT(nt_name) FROM (SELECT chain_id from chain WHERE rfam_acc='{f}') NATURAL JOIN nucleotide GROUP BY nt_name;", warn_every=0))
         freqs[f].update(counts)
-         # notify(f"[{i+1}/{len(fam_list)}] Computed {f} nucleotide frequencies.")
     
     # Create a pandas DataFrame, and save it to CSV.
     df = pd.DataFrame()
@@ -311,7 +318,7 @@ def stats_freq():
         tot = sum(freqs[f].values())
         df = pd.concat([ df, pd.DataFrame([[ format_percentage(tot, x) for x in freqs[f].values() ]], columns=list(freqs[f]), index=[f]) ])
     df = df.fillna(0)
-     df.to_csv("results/frequencies.csv")    
+     df.to_csv(runDir + "/results/frequencies.csv")    
     idxQueue.put(thr_idx) # replace the thread index in the queue
     # notify("Saved nucleotide frequencies to CSV file.")
 
@@ -327,11 +334,13 @@ def parallel_stats_pairs(f):
     global idxQueue
     thr_idx = idxQueue.get()
 
+     setproctitle(f"RNANet statistics.py Worker {thr_idx+1} p_stats_pairs({f})")
+ 
     chain_id_list = mappings_list[f]
     data = []
     sqldata = []
     for cid in tqdm(chain_id_list, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} basepair types", leave=False):
-         with sqlite3.connect("results/RNANet.db") as conn:
+         with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
             # Get comma separated lists of basepairs per nucleotide
             interactions = pd.DataFrame(
                             sql_ask_database(conn, 
@@ -398,7 +407,7 @@ def parallel_stats_pairs(f):
         data.append(expanded_list)
 
     # Update the database
-     with sqlite3.connect("results/RNANet.db", isolation_level=None) as conn:
+     with sqlite3.connect(runDir + "/results/RNANet.db", isolation_level=None) as conn:
         conn.execute('pragma journal_mode=wal') # Allow multiple other readers to ask things while we execute this writing query
         sql_execute(conn, """UPDATE chain SET pair_count_cWW = ?, pair_count_cWH = ?, pair_count_cWS = ?, pair_count_cHH = ?,
                                 pair_count_cHS = ?, pair_count_cSS = ?, pair_count_tWW = ?, pair_count_tWH = ?, pair_count_tWS = ?, 
@@ -416,8 +425,8 @@ def parallel_stats_pairs(f):
 
     # Create an output DataFrame
     f_df = pd.DataFrame([[ x for x in cnt.values() ]], columns=list(cnt), index=[f])
-     f_df.to_csv(f"data/{f}_counts.csv")
-     expanded_list.to_csv(f"data/{f}_pairs.csv")
+     f_df.to_csv(runDir + f"/data/{f}_counts.csv")
+     expanded_list.to_csv(runDir + f"/data/{f}_pairs.csv")
     
     idxQueue.put(thr_idx) # replace the thread index in the queue
 
@@ -430,28 +439,34 @@ def to_dist_matrix(f):
     global idxQueue
     thr_idx = idxQueue.get()
 
-     # notify(f"Computing {f} distance matrix from alignment...")
-     command = f"esl-alipid --rna --noheader --informat stockholm {f}_3d_only.stk"
+     setproctitle(f"RNANet statistics.py Worker {thr_idx+1} to_dist_matrix({f})")
 
     # Prepare a file
     with open(path_to_seq_data+f"/realigned/{f}++.afa") as al_file:
         al = AlignIO.read(al_file, "fasta")
         names = [ x.id for x in al if '[' in x.id ]
         al = al[-len(names):]
-     with open(f + "_3d_only.stk", "w") as only_3d:
-         only_3d.write(al.format("stockholm"))
+     with open(path_to_seq_data+f"/realigned/{f}_3d_only_tmp.stk", "w") as only_3d:
+         try:
+             only_3d.write(al.format("stockholm"))
+         except ValueError as e:
+             warn(e)
     del al
+     subprocess.run(["esl-reformat", "--informat", "stockholm", "--mingap", "-o", path_to_seq_data+f"/realigned/{f}_3d_only.stk", "stockholm",  path_to_seq_data+f"/realigned/{f}_3d_only_tmp.stk"])
 
     # Prepare the job
-     process = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE)
+     process = subprocess.Popen(shlex.split(f"esl-alipid --rna --noheader --informat stockholm {path_to_seq_data}realigned/{f}_3d_only.stk"), 
+                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     id_matrix = np.zeros((len(names), len(names)))
 
     pbar = tqdm(total = len(names)*(len(names)-1)*0.5, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} idty matrix", leave=False)
-     while process.poll() is None:
-         output = process.stdout.readline()
+     cnt = 0
+     while not cnt or process.poll() is None:
+         output = process.stdout.read()
         if output:
             lines = output.strip().split(b'\n')
             for l in lines:
+                 cnt += 1
                 line = l.split()
                 s1 = line[0].decode('utf-8')
                 s2 = line[1].decode('utf-8')
@@ -460,9 +475,14 @@ def to_dist_matrix(f):
                 id2 = names.index(s2)
                 id_matrix[id1, id2] = float(score)
                 pbar.update(1)
+     if cnt != len(names)*(len(names)-1)*0.5:
+         warn(f"{f} got {cnt} updates on {len(names)*(len(names)-1)*0.5}")
+     if process.poll() != 0:
+         l = process.stderr.read().strip().split(b'\n')
+         warn("\n".join([ line.decode('utf-8') for line in l ]))
     pbar.close()
 
-     subprocess.run(["rm", "-f", f + "_3d_only.stk"])
+     subprocess.run(["rm", "-f", f + "_3d_only_tmp.stk"])
     np.save("data/"+f+".npy", id_matrix)
     idxQueue.put(thr_idx) # replace the thread index in the queue
     return 0
@@ -471,21 +491,26 @@ def seq_idty():
     """Computes identity matrices for each of the RNA families.
     
     REQUIRES temporary results files in data/*.npy
-     REQUIRES tables chain, family un to date."""
+     REQUIRES tables chain, family up to date."""
 
     # load distance matrices
+     fams_to_plot = [ f for f in famlist if f not in ignored ]
     fam_arrays = []
-     for f in famlist:
+     for f in fams_to_plot:
         if path.isfile("data/"+f+".npy"):
-             fam_arrays.append(np.load("data/"+f+".npy"))
+             fam_arrays.append(np.load("data/"+f+".npy") / 100.0)  # normalize percentages in [0,1]
         else:
-             fam_arrays.append([])
+             warn("data/"+f+".npy not found !")
+             fam_arrays.append(np.array([]))
 
     # Update database with identity percentages
-     conn = sqlite3.connect("results/RNANet.db")
-     for f, D in zip(famlist, fam_arrays):
+     conn = sqlite3.connect(runDir + "/results/RNANet.db")
+     for f, D in zip(fams_to_plot, fam_arrays):
         if not len(D): continue
-         a = 1.0 - np.average(D + D.T) # Get symmetric matrix instead of lower triangle + convert from distance matrix to identity matrix
+         if D.shape[0] > 1:
+             a = np.sum(D) * 2 / D.shape[0] / (D.shape[0] - 1)    # SUM(D) / (n(n-1)/2)
+         else:
+             a = D[0][0]
         conn.execute(f"UPDATE family SET idty_percent = {round(float(a),2)} WHERE rfam_acc = '{f}';")
     conn.commit()
     conn.close()
@@ -495,10 +520,11 @@ def seq_idty():
     axs = axs.ravel()
     [axi.set_axis_off() for axi in axs]
     im = "" # Just to declare the variable, it will be set in the loop
-     for f, D, ax in zip(famlist, fam_arrays, axs):
-         if not len(D): continue
+     for f, D, ax in zip(fams_to_plot, fam_arrays, axs):
+         D = D + D.T         # Copy the lower triangle to upper, to get a symetrical matrix
         if D.shape[0] > 2:  # Cluster only if there is more than 2 sequences to organize
-             D = D + D.T     # Copy the lower triangle to upper, to get a symetrical matrix
+             D = 1.0 - D
+             np.fill_diagonal(D, 0.0)
             condensedD = squareform(D)
 
             # Compute basic dendrogram by Ward's method
@@ -507,15 +533,20 @@ def seq_idty():
 
             # Reorganize rows and cols
             idx1 = Z['leaves']
-             D = D[idx1,:]
+             D = D[idx1[::-1],:]
             D = D[:,idx1[::-1]]
-         im = ax.matshow(1.0 - D, vmin=0, vmax=1, origin='lower') # convert to identity matrix 1 - D from distance matrix D
-         ax.set_title(f + "\n(" + str(len(mappings_list[f]))+ " chains)", fontsize=10)
+             D = 1.0 - D
+         elif D.shape[0] == 2:
+             np.fill_diagonal(D, 1.0) # the diagonal has been ignored until now
+         ax.text(np.floor(D.shape[0]/2.0)-(0.5 if not D.shape[0]%2 else 0), -0.5, f + "\n(" + str(D.shape[0]) + " chains)", 
+                 fontsize=9, horizontalalignment = 'center', verticalalignment='bottom')
+         im = ax.matshow(D, vmin=0, vmax=1)
+ 
     fig.tight_layout()
-     fig.subplots_adjust(wspace=0.1, hspace=0.3)
-     fig.colorbar(im, ax=axs[-1], shrink=0.8)
-     fig.savefig(f"results/figures/distances.png")
-     notify("Computed all identity matrices and saved the figure.")
+     fig.subplots_adjust(hspace=0.3, wspace=0.1)
+     fig.colorbar(im, ax=axs[-4], shrink=0.8)
+     fig.savefig(runDir + f"/results/figures/distances.png")
+     print("> Computed all identity matrices and saved the figure.", flush=True)
 
 def stats_pairs():
     """Counts occurrences of intra-chain base-pair types in RNA families
@@ -523,6 +554,8 @@ def stats_pairs():
     Creates a temporary results file in data/pair_counts.csv, and a results file in results/pairings.csv.
     REQUIRES tables chain, nucleotide up-to-date.""" 
     
+     setproctitle(f"RNANet statistics.py stats_pairs()")
+ 
     def line_format(family_data):
         return family_data.apply(partial(format_percentage, sum(family_data)))
 
@@ -530,12 +563,12 @@ def stats_pairs():
         results = []
         allpairs = []
         for f in fam_list:
-             newpairs = pd.read_csv(f"data/{f}_pairs.csv", index_col=0)
-             fam_df = pd.read_csv(f"data/{f}_counts.csv", index_col=0)
+             newpairs = pd.read_csv(runDir + f"/data/{f}_pairs.csv", index_col=0)
+             fam_df = pd.read_csv(runDir + f"/data/{f}_counts.csv", index_col=0)
             results.append(fam_df)
             allpairs.append(newpairs)
-             subprocess.run(["rm", "-f", f"data/{f}_pairs.csv"])
-             subprocess.run(["rm", "-f", f"data/{f}_counts.csv"])
+             subprocess.run(["rm", "-f", runDir + f"/data/{f}_pairs.csv"])
+             subprocess.run(["rm", "-f", runDir + f"/data/{f}_counts.csv"])
         all_pairs = pd.concat(allpairs)
         df = pd.concat(results).fillna(0)
         df.to_csv("data/pair_counts.csv")
@@ -573,14 +606,14 @@ def stats_pairs():
     crosstab = crosstab[["AU", "GC", "Wobble", "Other"]]
 
     # Save to CSV
-     df.to_csv("results/pair_types.csv")
+     df.to_csv(runDir + "/results/pair_types.csv")
 
     # Plot barplot of overall types
     ax = crosstab.plot(figsize=(8,5), kind='bar', stacked=True, log=False, fontsize=13)
     ax.set_ylabel("Number of observations (millions)", fontsize=13)
     ax.set_xlabel(None)
     plt.subplots_adjust(left=0.1, bottom=0.16, top=0.95, right=0.99)
-     plt.savefig("results/figures/pairings.png")
+     plt.savefig(runDir + "/results/figures/pairings.png")
 
     notify("Computed nucleotide statistics and saved CSV and PNG file.")
 
@@ -588,8 +621,10 @@ def per_chain_stats():
     """Computes per-chain frequencies and base-pair type counts.
 
     REQUIRES tables chain, nucleotide up to date. """
+     
+     setproctitle(f"RNANet statistics.py per_chain_stats()")
 
-     with sqlite3.connect("results/RNANet.db", isolation_level=None) as conn:
+     with sqlite3.connect(runDir + "/results/RNANet.db", isolation_level=None) as conn:
         # Compute per-chain nucleotide frequencies
         df = pd.read_sql("SELECT SUM(is_A) as A, SUM(is_C) AS C, SUM(is_G) AS G, SUM(is_U) AS U, SUM(is_other) AS O, chain_id FROM nucleotide GROUP BY chain_id;", conn)
         df["total"] = pd.Series(df.A + df.C + df.G + df.U + df.O, dtype=np.float64)
@@ -600,35 +635,143 @@ def per_chain_stats():
         conn.execute('pragma journal_mode=wal')
         sql_execute(conn, "UPDATE chain SET chain_freq_A = ?, chain_freq_C = ?, chain_freq_G = ?, chain_freq_U = ?, chain_freq_other = ? WHERE chain_id= ?;",
                           many=True, data=list(df.to_records(index=False)), warn_every=10)
-     notify("Updated the database with per-chain base frequencies")
+     print("> Updated the database with per-chain base frequencies", flush=True)
 
 def general_stats():
     """
     Number of structures as function of the resolution threshold
     Number of Rfam families as function of the resolution threshold
     """
-     with sqlite3.connect("results/RNANet.db") as conn:
-         df_unique = pd.read_sql(f"""SELECT distinct pdb_id, chain_name, exp_method, resolution
-                                         FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
-                                         WHERE rfam_acc = 'unmappd' AND ISSUE=0;""", conn)
-         df_mapped_unique = pd.read_sql(f"""SELECT distinct pdb_id, chain_name, exp_method, resolution
-                                             FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
-                                             WHERE rfam_acc != 'unmappd' AND ISSUE=0;""", conn)
-         df_mapped_copies = pd.read_sql(f"""SELECT pdb_id, chain_name, inferred, rfam_acc, pdb_start, pdb_end, exp_method, resolution
-                                             FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
-                                             WHERE rfam_acc != 'unmappd' AND ISSUE=0;""", conn)
-         df_inferred_only_unique = pd.read_sql(f"""SELECT DISTINCT pdb_id, c.chain_name, exp_method, resolution
-                                                     FROM (SELECT inferred, rfam_acc, pdb_start, pdb_end, chain.structure_id, chain.chain_name, r.redundancy, r.inf_redundancy
-                                                             FROM chain 
-                                                             JOIN (SELECT structure_id, chain_name, COUNT(distinct rfam_acc) AS redundancy, SUM(inferred) AS inf_redundancy 
-                                                                     FROM chain 
-                                                                     WHERE rfam_acc != 'unmappd' AND issue=0 
-                                                                     GROUP BY structure_id, chain_name
-                                                             ) AS r ON chain.structure_id=r.structure_id AND chain.chain_name = r.chain_name 
-                                                             WHERE r.redundancy=r.inf_redundancy AND rfam_acc != 'unmappd' and issue=0
-                                                     ) AS c
-                                                     JOIN structure ON c.structure_id=structure.pdb_id;""", conn)
-     print("> found", len(df_inferred_only_unique.index), "chains which are mapped only by inference using BGSU NR Lists.")
+ 
+     setproctitle(f"RNANet statistics.py general_stats()")
+ 
+     reqs = [
+         # unique unmapped chains with no issues
+         """ SELECT distinct pdb_id, chain_name, exp_method, resolution
+             FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
+             WHERE rfam_acc = 'unmappd' AND ISSUE=0;""",
+ 
+         # unique mapped chains with no issues
+         """ SELECT distinct pdb_id, chain_name, exp_method, resolution
+             FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
+             WHERE rfam_acc != 'unmappd' AND ISSUE=0;""",
+ 
+         # mapped chains with no issues
+         """ SELECT pdb_id, chain_name, inferred, rfam_acc, pdb_start, pdb_end, exp_method, resolution
+             FROM chain JOIN structure ON chain.structure_id = structure.pdb_id
+             WHERE rfam_acc != 'unmappd' AND ISSUE=0;""",
+ 
+         # mapped chains with no issues that are all inferred
+         """ SELECT DISTINCT pdb_id, c.chain_name, exp_method, resolution
+             FROM (
+                 SELECT inferred, rfam_acc, pdb_start, pdb_end, chain.structure_id, chain.chain_name, r.redundancy, r.inf_redundancy
+                 FROM chain 
+                 JOIN (SELECT structure_id, chain_name, COUNT(distinct rfam_acc) AS redundancy, SUM(inferred) AS inf_redundancy 
+                         FROM chain 
+                         WHERE rfam_acc != 'unmappd' AND issue=0 
+                         GROUP BY structure_id, chain_name
+                 ) AS r ON chain.structure_id=r.structure_id AND chain.chain_name = r.chain_name 
+                 WHERE r.redundancy=r.inf_redundancy AND rfam_acc != 'unmappd' and issue=0
+             ) AS c
+             JOIN structure ON c.structure_id=structure.pdb_id;""",
+ 
+         # Number of mapped chains (not inferred)
+         """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 0);""",
+ 
+         # Number of unique mapped chains (not inferred)
+         """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 0);""",
+ 
+         # Number of mapped chains (inferred)
+         """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 1);""",
+ 
+         # Number of unique mapped chains (inferred)
+         """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 1);""",
+ 
+         # Number of mapped chains inferred once
+         """SELECT count(*) FROM (
+                 SELECT structure_id, chain_name, COUNT(DISTINCT rfam_acc) as c 
+                 FROM chain where rfam_acc!='unmappd' and inferred=1 
+                 GROUP BY structure_id, chain_name
+             ) WHERE c=1;""",
+ 
+         # Number of mapped chains inferred twice
+         """select count(*) from (
+                 select structure_id, chain_name, count(distinct rfam_acc) as c 
+                 from chain where rfam_acc!='unmappd' and inferred=1 
+                 group by structure_id, chain_name
+             ) where c=2;""",
+ 
+         # Number of mapped chains inferred 3 times or more
+         """select count(*) from (
+                 select structure_id, chain_name, count(distinct rfam_acc) as c 
+                 from chain where rfam_acc!='unmappd' and inferred=1 
+                 group by structure_id, chain_name
+             ) where c>2;""",
+ 
+         # Number of chains both mapped with and without inferrence
+         """ SELECT COUNT(*) FROM (
+                 SELECT structure_id, chain_name, sum(inferred) AS s, COUNT(rfam_acc) AS c 
+                 FROM chain 
+                 WHERE rfam_acc!='unmappd' 
+                 GROUP BY structure_id, chain_name
+             ) 
+             WHERE s < c AND s > 0;""",
+         
+         # Number of mapped chains (total)
+         """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd');""",
+ 
+         # Number of unique mapped chains
+         """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd');""",
+ 
+         # Number of unmapped chains
+         """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc = 'unmappd');""",
+         
+         # Number of mapped chains without issues (not inferred)
+         """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 0 AND issue = 0);""",
+ 
+         # Number of unique mapped chains without issues (not inferred)
+         """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 0 AND issue = 0);""",
+ 
+         # Number of mapped chains without issues (inferred)
+         """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 1 AND issue=0);""",
+ 
+         # Number of unique mapped chains without issues (inferred)
+         """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND inferred = 1 AND issue=0);""",
+ 
+         # Number of mapped chains without issues (total)
+         """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND issue=0);""",
+ 
+         # Number of unique mapped chains without issues
+         """SELECT count(*) FROM (SELECT DISTINCT structure_id, chain_name FROM chain WHERE rfam_acc != 'unmappd' AND issue=0);""",
+ 
+         # Number of unmapped chains without issues
+         """SELECT count(*) FROM (SELECT structure_id, chain_name FROM chain WHERE rfam_acc = 'unmappd' AND issue=0);"""
+     ]
+ 
+     answers = []
+     with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
+         for r in reqs:
+             answers.append(pd.read_sql(r, conn))
+     df_unique = answers[0]
+     df_mapped_unique = answers[1]
+     df_mapped_copies = answers[2]
+     df_inferred_only_unique = answers[3]
+     print()
+     print("> found", answers[4].iloc[0][0], f"chains ({answers[5].iloc[0][0]} unique chains) that are mapped thanks to Rfam. Removing chains with issues, only {answers[15].iloc[0][0]} ({answers[16].iloc[0][0]} unique)")
+     if answers[4].iloc[0][0] != answers[5].iloc[0][0]:
+         print("\t> This happens because different parts of the same chain can be mapped to different families.")
+     print("> found", answers[6].iloc[0][0], f"chains ({answers[7].iloc[0][0]} unique chains) that are mapped by inferrence. Removing chains with issues, only {answers[17].iloc[0][0]} ({answers[18].iloc[0][0]} unique).")
+     print("\t> ", answers[8].iloc[0][0], "chains are mapped only once,")
+     print("\t> ", answers[9].iloc[0][0], "are mapped to 2 families,")
+     print("\t> ", answers[10].iloc[0][0], "are mapped to 3 or more.")
+     print("> Among them,", answers[11].iloc[0][0], "chains are mapped both with families found on Rfam and by inferrence.")
+     if answers[11].iloc[0][0]:
+         print("\t> this is normal if you used option -f (--full-inference). Otherwise, there might be a problem.")
+     print("> TOTAL:", answers[12].iloc[0][0], f"chains ({answers[13].iloc[0][0]} unique chains) mapped to a family. Removing chains with issues, only {answers[19].iloc[0][0]} ({answers[20].iloc[0][0]} unique).")
+     print("> TOTAL:", answers[14].iloc[0][0], f"unmapped chains. Removing chains with issues, {answers[21].iloc[0][0]}.")
+     if answers[14].iloc[0][0]:
+         print("\t> this is normal if you used option --no-homology. Otherwise, there might be a problem.")
+     print()
 
     ##########################################
     # plot N = f(resolution, exp_method)
@@ -642,7 +785,7 @@ def general_stats():
     df_inferred_only_unique.sort_values('resolution', inplace=True, ignore_index=True)
     df_mapped_copies.sort_values('resolution', inplace=True, ignore_index=True)
     max_res = max(df_unique.resolution)
-     max_structs = len(df_mapped_copies.index.tolist())
+     max_structs = max(len(df_mapped_copies.index), len(df_unique.index))
     colors = np.linspace(0,1,1+len(methods))
     plt.xticks( np.arange(0, max_res+2, 2.0).tolist(),  np.arange(0, max_res+2, 2.0).tolist() )
 
@@ -654,7 +797,7 @@ def general_stats():
     axs[0][0].set_ylabel("ALL", fontsize=14)
     axs[0][0].set_title("Number of unique RNA chains", fontsize=14)
     axs[0][0].set_ylim((0, max_structs * 1.05))
-     axs[0][0].legend(loc="best", fontsize=14)
+     axs[0][0].legend(loc="lower right", fontsize=14)
 
     axs[0][1].grid(axis='y', ls='dotted', lw=1)
     axs[0][1].set_yticklabels([])
@@ -663,9 +806,9 @@ def general_stats():
     axs[0][1].hist(df_inferred_only_unique.resolution, bins=np.arange(0, max_res, 0.5), fc=(0.2, 0, colors[0], 0.5), cumulative=True, label='only by inference')
     axs[0][1].text(0.95*max_res, 0.95*len(df_mapped_unique.resolution), "%d " %  len(df_mapped_unique.resolution), 
                          horizontalalignment='right', verticalalignment='top', fontsize=14)
-     axs[0][1].set_title("Number of unique RNA chains\nmapped to $\geq 1$ family", fontsize=14)
+     axs[0][1].set_title(r"Number of unique RNA chains\nmapped to $\geq 1$ family", fontsize=14)
     axs[0][1].set_ylim((0, max_structs * 1.05))
-     axs[0][1].legend(loc="best", fontsize=14)
+     axs[0][1].legend(loc="upper left", fontsize=14)
 
     axs[0][2].grid(axis='y', ls='dotted', lw=1)
     axs[0][2].set_yticklabels([])
@@ -675,7 +818,7 @@ def general_stats():
     axs[0][2].text(0.95*max_res, 0.95*len(df_mapped_copies.resolution), "%d " %  len(df_mapped_copies.resolution), 
                          horizontalalignment='right', verticalalignment='top', fontsize=14)
     axs[0][2].set_title("Number of RNA chains mapped to a\nfamily (with copies)", fontsize=14)
-     axs[0][2].legend(loc="right", fontsize=14)
+     axs[0][2].legend(loc="upper left", fontsize=14)
     axs[0][2].set_ylim((0, max_structs * 1.05))
 
     for i,m in enumerate(methods):
@@ -683,7 +826,7 @@ def general_stats():
         df_mapped_unique_m = df_mapped_unique[df_mapped_unique.exp_method == m]
         df_inferred_only_unique_m = df_inferred_only_unique[df_inferred_only_unique.exp_method == m]
         df_mapped_copies_m = df_mapped_copies[ df_mapped_copies.exp_method == m]
-         max_structs = len(df_mapped_copies_m.resolution.tolist())
+         max_structs = max(len(df_mapped_copies_m.index), len(df_unique_m.index))
         print("> found", max_structs, "structures with method", m, flush=True)
 
         axs[1+i][0].grid(axis='y', ls='dotted', lw=1)
@@ -693,7 +836,7 @@ def general_stats():
                          horizontalalignment='right', verticalalignment='top', fontsize=14)
         axs[1+i][0].set_ylim((0, max_structs * 1.05))
         axs[1+i][0].set_ylabel(m, fontsize=14)
-         axs[1+i][0].legend(loc="best", fontsize=14)
+         axs[1+i][0].legend(loc="lower right", fontsize=14)
 
         axs[1+i][1].grid(axis='y', ls='dotted', lw=1)
         axs[1+i][1].set_yticklabels([])
@@ -703,7 +846,7 @@ def general_stats():
         axs[1+i][1].text(0.95*max_res, 0.95*len(df_mapped_unique_m.resolution), "%d " %  len(df_mapped_unique_m.resolution), 
                          horizontalalignment='right', verticalalignment='top', fontsize=14)
         axs[1+i][1].set_ylim((0, max_structs * 1.05))
-         axs[1+i][1].legend(loc="best", fontsize=14)
+         axs[1+i][1].legend(loc="upper left", fontsize=14)
         
         axs[1+i][2].grid(axis='y', ls='dotted', lw=1)
         axs[1+i][2].set_yticklabels([])
@@ -713,7 +856,7 @@ def general_stats():
         axs[1+i][2].text(0.95*max_res, 0.95*len(df_mapped_copies_m.resolution), "%d " %  len(df_mapped_copies_m.resolution), 
                          horizontalalignment='right', verticalalignment='top', fontsize=14)
         axs[1+i][2].set_ylim((0, max_structs * 1.05))
-         axs[1+i][2].legend(loc="right", fontsize=14)
+         axs[1+i][2].legend(loc="upper left", fontsize=14)
     
     axs[-1][0].set_xlabel("Structure resolution\n(Angströms, lower is better)", fontsize=14)
     axs[-1][1].set_xlabel("Structure resolution\n(Angströms, lower is better)", fontsize=14)
@@ -722,7 +865,7 @@ def general_stats():
     fig.suptitle("Number of RNA chains by experimental method and resolution", fontsize=16)
     fig.subplots_adjust(left=0.07, right=0.98, wspace=0.05, 
                         hspace=0.05, bottom=0.05, top=0.92)
-     fig.savefig("results/figures/resolutions.png")
+     fig.savefig(runDir + "/results/figures/resolutions.png")
     plt.close()
 
     ##########################################
@@ -765,7 +908,7 @@ def general_stats():
     fig.suptitle("Number of RNA families used by experimental method and resolution", fontsize=16)
     fig.subplots_adjust(left=0.05, right=0.98, wspace=0.05, 
                         hspace=0.05, bottom=0.12, top=0.84)
-     fig.savefig("results/figures/Nfamilies.png")
+     fig.savefig(runDir + "/results/figures/Nfamilies.png")
     plt.close()
 
 def log_to_pbar(pbar):
@@ -776,8 +919,10 @@ def log_to_pbar(pbar):
 if __name__ == "__main__":
 
     # parse options
+     DELETE_OLD_DATA = False
+     DO_WADLEY_ANALYSIS = False
     try:
-         opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "resolution=", "3d-folder=", "seq-folder=" ])
+         opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "resolution=", "3d-folder=", "seq-folder=" ])
     except getopt.GetoptError as err:
         print(err)
         sys.exit(2)
@@ -795,6 +940,7 @@ if __name__ == "__main__":
                     "\n\t\t\t\t\tdatapoints/\t\tFinal results in CSV file format.")
             print("--seq-folder=…\t\t\tPath to a folder containing the sequence and alignment files. Required subfolder:"
                     "\n\t\t\t\t\trealigned/\t\tSequences, covariance models, and alignments by family")
+             print("--from-scratch\t\t\tDo not use precomputed results from past runs, recompute everything")
             sys.exit()
         elif opt == '--version':
             print("RNANet statistics 1.1 beta")
@@ -810,25 +956,37 @@ if __name__ == "__main__":
             path_to_seq_data = path.abspath(arg)
             if path_to_seq_data[-1] != '/':
                 path_to_seq_data += '/'
+         elif opt=='--from-scratch':
+             DELETE_OLD_DATA = True
+             DO_WADLEY_ANALYSIS = True
+             subprocess.run(["rm","-f", "data/wadley_kernel_eta.npz", "data/wadley_kernel_eta_prime.npz", "data/pair_counts.csv"])
+         elif opt=='--wadley':
+             DO_WADLEY_ANALYSIS = True
     
 
     # Load mappings
     print("Loading mappings list...")
-     with sqlite3.connect("results/RNANet.db") as conn:
+     with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
         fam_list = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from family ORDER BY rfam_acc ASC;") ]
         mappings_list = {}
         for k in fam_list:
-             mappings_list[k] = [ x[0] for x in sql_ask_database(conn, f"SELECT chain_id from chain WHERE rfam_acc='{k}' and issue=0;") ]
+             mappings_list[k] = [ x[0] for x in sql_ask_database(conn, f"SELECT chain_id from chain JOIN structure ON chain.structure_id=structure.pdb_id WHERE rfam_acc='{k}' AND issue=0 AND resolution <= {res_thr};") ]
 
     # List the families for which we will compute sequence identity matrices
-     with sqlite3.connect("results/RNANet.db") as conn:
-         famlist = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain GROUP BY rfam_acc) WHERE n_chains > 0 ORDER BY rfam_acc ASC;") ]
-         ignored = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain GROUP BY rfam_acc) WHERE n_chains < 2 ORDER BY rfam_acc ASC;") ]
+     with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
+         famlist = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain WHERE issue = 0 GROUP BY rfam_acc) WHERE n_chains > 0 ORDER BY rfam_acc ASC;") ]
+         ignored = [ x[0] for x in sql_ask_database(conn, "SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain WHERE issue = 0 GROUP BY rfam_acc) WHERE n_chains < 3 ORDER BY rfam_acc ASC;") ]
+         n_unmapped_chains = sql_ask_database(conn, "SELECT COUNT(*) FROM chain WHERE rfam_acc='unmappd' AND issue=0;")[0][0]
     if len(ignored):
         print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n')
     
+     if DELETE_OLD_DATA:
+         for f in fam_list:
+             subprocess.run(["rm","-f", runDir + f"/data/{f}.npy", runDir + f"/data/{f}_pairs.csv", runDir + f"/data/{f}_counts.csv"])
+ 
+ 
     # Prepare the multiprocessing execution environment
-     nworkers = max(read_cpu_number()-1, 32)
+     nworkers = min(read_cpu_number()-1, 32)
     thr_idx_mgr = Manager()
     idxQueue = thr_idx_mgr.Queue()
     for i in range(nworkers):
@@ -836,14 +994,15 @@ if __name__ == "__main__":
 
     # Define the tasks
     joblist = []
-     # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), 4.0)))   # res threshold is 4.0 Angstroms by default
-     # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), 4.0)))   #
+     if n_unmapped_chains and DO_WADLEY_ANALYSIS:
+         joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), 20.0)))   # res threshold is 4.0 Angstroms by default
+         joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), 20.0)))   #
     joblist.append(Job(function=stats_len)) # Computes figures
-     # joblist.append(Job(function=stats_freq)) # updates the database
-     # for f in famlist:
-     #     joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database
-     #     if f not in ignored:
-     #         joblist.append(Job(function=to_dist_matrix, args=(f,))) # updates the database
+     joblist.append(Job(function=stats_freq)) # updates the database
+     for f in famlist:
+         joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database
+         if f not in ignored:
+             joblist.append(Job(function=to_dist_matrix, args=(f,))) # updates the database
 
     p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers)
     pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, leave=True)
@@ -867,7 +1026,8 @@ if __name__ == "__main__":
     print()
 
     # finish the work after the parallel portions
-     # per_chain_stats()
-     # seq_idty()
-     # stats_pairs()
-     general_stats()
+     per_chain_stats()
+     seq_idty()
+     stats_pairs()
+     if n_unmapped_chains:
+         general_stats()