Louis BECQUEY

v pre-1.6b

############################################################################################
v 1.6 beta, August 2021
Aglaé Tabot joins the development team. Khodor Hannoush leaves.
FEATURE CHANGES
- Distinct options --cmalign-opts and --cmalign-rrna-opts allow to adapt the parameters for LSU and SSU families.
The LSU and SSU are now aligned with Infernal options '--cpu 10 --mxsize 8192 --mxtau 0.1', which is slow,
requires up to 100 GB of RAM, and yields a suboptimal alignment (tau=0.1 is quite bad), but is homogenous with the other families.
- The LSU and SSU therefore have defined cm_coords fields, and therefore distance matrices can be computed.
- We now provide for download the renumbered (standardised) 3D MMCIF files, the nucleotides being numbered by their "index_chain" in the database.
- We now provide for download the sequences of the 3D chains aligned by Rfam family (without Rfam sequences, which have been removed).
- statistics.py now computes histograms and a density estimation with Gaussian mixture models for a large set of geometric parameters,
measured on the unmapped data at a given resolution threshold. The parameters include:
* All atom bonded distances and torsion angles
* Distances, flat angles and torsion angles in the Pyle/VFold model
* Distances, flat angles and torsion anfles in the HiRE-RNA model
* Sequence-dependant geometric parameters of the basepairs for all non-canonical basepairs in the HiRE-RNA model.
The data is saved as JSON files of parameters, and numerous figures are produced to illustrate the distributions.
The number of gaussians to use in the GMMs are hard-coded in geometric_stats.py after our first estimation. If you do not want to trust this estimation,
you can ignore it with option --rescan-nmodes. An exploration of the number of Gaussians from 1 to 8 will be performed, and the best GMM will be kept.
BUG CORRECTIONS
- New code file geometric_stats.py
- New automation script that starts from scratch
- Many small fixes
- Performance tweaks
TECHNICAL CHANGES
- Switched to DSSR Pro.
- Switched to esl-alimerge instead of cmalign --merge to merge alignments.
############################################################################################
v 1.5 beta, April 2021
FEATURE CHANGES
......
MIT License
Copyright (c) 2019 Louis Becquey
Copyright (c) 2019-2021 IBISC, Université Paris Saclay
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
......
......@@ -57,13 +57,14 @@ def trace_unhandled_exceptions(func):
return func(*args, **kwargs)
except:
s = traceback.format_exc()
with open(runDir + "/errors.txt", "a") as f:
f.write("Exception in "+func.__name__+"\n")
f.write(s)
f.write("\n\n")
warn('Exception in '+func.__name__, error=True)
print(s)
if not "KeyboardInterrupt" in s:
with open(runDir + "/errors.txt", "a") as f:
f.write("Exception in "+func.__name__+"\n")
f.write(s)
f.write("\n\n")
warn('Exception in '+func.__name__, error=True)
print(s)
return wrapped_func
pd.set_option('display.max_rows', None)
......@@ -261,7 +262,7 @@ class Chain:
new_s.add(new_model)
# renumber this structure (portion of the original) with the index_chain and save it in a cif file
t=pdb.Structure.Structure(new_s.get_id())
t = pdb.Structure.Structure(new_s.get_id())
for model in new_s:
new_model_t=pdb.Model.Model(model.get_id())
for chain in model:
......@@ -288,7 +289,7 @@ class Chain:
# particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G)
if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' or resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' :
res=chain[(' ', resseq, icode_res)]
else : #modified nucleotides (e.g. chain 5l4o_1_A)
else : # modified nucleotides (e.g. chain 5l4o_1_A)
het='H_' + nt
res=chain[(het, resseq, icode_res)]
res_id=res.get_id()
......@@ -1599,7 +1600,7 @@ class Pipeline:
try:
fam_pbar = tqdm(total=len(self.fam_list), desc="RNA families", position=0, leave=True)
# Apply work_pssm_remap to each RNA family
for i, _ in enumerate(p.imap_unordered(work_pssm_remap, self.fam_list, chunksize=1)):
for i, _ in enumerate(p.imap_unordered(partial(work_pssm_remap, useSina=pp.USESINA), self.fam_list, chunksize=1)):
# Everytime the iteration finishes on a family, update the global progress bar over the RNA families
fam_pbar.update(1)
fam_pbar.close()
......@@ -1654,7 +1655,7 @@ class Pipeline:
p = Pool(initializer=init_with_tqdm, initargs=(tqdm.get_lock(),), processes=3)
try:
pbar = tqdm(total=len(self.loaded_chains), desc="Saving chains to CSV", position=0, leave=True)
for _, _2 in enumerate(p.imap_unordered(work_save, self.loaded_chains)):
for _, _2 in enumerate(p.imap_unordered(partial(work_save, homology=pp.HOMOLOGY), self.loaded_chains)):
pbar.update(1)
pbar.close()
p.close()
......@@ -1700,18 +1701,28 @@ class Pipeline:
if self.ARCHIVE:
os.makedirs(runDir + "/archive", exist_ok=True)
datestr = time.strftime('%Y%m%d')
# The text files
subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_datapoints_latest.tar.gz"])
subprocess.run(["tar", "-C", path_to_3D_data + "/datapoints", "-czf", runDir + f"/archive/RNANET_datapoints_{datestr}.tar.gz", "."])
subprocess.run(["ln", "-s", runDir + f"/archive/RNANET_datapoints_{datestr}.tar.gz", runDir + f"/archive/RNANET_datapoints_latest.tar.gz"])
# The alignments
if self.HOMOLOGY:
# gather the alignments
os.makedirs(path_to_seq_data + "realigned/3d_only", exist_ok=True)
for f in os.listdir(path_to_seq_data + "realigned"):
if "3d_only.afa" in f:
subprocess.run(["cp", path_to_seq_data + "realigned/" + f, path_to_seq_data + "realigned/3d_only"])
subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_alignments_latest.tar.gz"])
subprocess.run(["tar", "-C", path_to_seq_data + "realigned/3d_only" , "-czf", runDir + f"/archive/RNANET_alignments_latest.tar.gz", "."])
subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_3dOnlyAlignments_latest.tar.gz"])
subprocess.run(["tar", "-C", path_to_seq_data + "realigned/3d_only" , "-czf", runDir + f"/archive/RNANET_3dOnlyAlignments_latest.tar.gz", "."])
# The 3D files
if os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"):
subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_MMCIFmappedToRfam_latest.tar.gz"])
subprocess.run(["tar", "-C", path_to_3D_data + "rna_mapped_to_Rfam" , "-czf", runDir + f"/archive/RNANET_MMCIFmappedToRfam_latest.tar.gz", "."])
if os.path.isdir(path_to_3D_data + "rna_only"):
subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_MMCIFall_latest.tar.gz"])
subprocess.run(["tar", "-C", path_to_3D_data + "rna_only" , "-czf", runDir + f"/archive/RNANET_MMCIFall_latest.tar.gz", "."])
def sanitize_database(self):
"""Searches for issues in the database and correct them"""
......@@ -1813,7 +1824,7 @@ def warn(message, error=False):
"""
# Cut if too long
if len(message) > 66:
x = message.find(' ', 50, 66)
x = message.find(' ', 40, 66)
if x != -1:
warn(message[:x], error=error)
warn(message[x+1:], error=error)
......@@ -2809,7 +2820,7 @@ def work_save_pydca(f,alignment):
warn(e)
@trace_unhandled_exceptions
def work_pssm_remap(f):
def work_pssm_remap(f, useSina=False):
"""Computes Position-Specific-Scoring-Matrices given the multiple sequence alignment of the RNA family.
This also remaps the 3D object sequence with the aligned sequence in the MSA.
If asked, the 3D object sequence is completed by the consensus nucleotide when one of them is missing.
......@@ -2991,7 +3002,7 @@ def work_pssm_remap(f):
setproctitle(f"RNAnet.py work_pssm_remap({f}) insert/match states")
# Get back the information of match/insertion states from the STK file
if (not use_sina) or (f not in SSU_set and f not in LSU_set):
if (not useSina) or (f not in SSU_set and f not in LSU_set):
alignstk = AlignIO.read(path_to_seq_data + "realigned/" + f + "++.stk", "stockholm")
consensus_2d = alignstk.column_annotations["secondary_structure"]
del alignstk
......@@ -3037,8 +3048,6 @@ def work_pssm_remap(f):
gap_percent, consensus, cons_sec_struct)
VALUES (?, 0, 0, NULL, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, '-', NULL);""", data=(f,))
# Save the number of "used columns" to table family ( = the length of the alignment if it was composed only of the RNANet chains)
sql_execute(conn, f"UPDATE family SET ali_filtered_len = ? WHERE rfam_acc = ?;", data=(len(columns_to_save), f))
conn.close()
......@@ -3171,15 +3180,9 @@ if __name__ == "__main__":
print(f"Among errors, {len(no_nts_set)} structures seem to contain RNA chains without defined nucleotides:", no_nts_set, flush=True)
if len(weird_mappings):
print(f"{len(weird_mappings)} mappings to Rfam were taken as absolute positions instead of residue numbers:", weird_mappings, flush=True)
if pp.SELECT_ONLY is None:
if pp.HOMOLOGY and pp.SELECT_ONLY is None:
pp.checkpoint_save_chains()
if not pp.HOMOLOGY:
# Save chains to file
for c in pp.loaded_chains:
work_save(c, homology=False)
print("Completed.")
# At this point, structure, chain and nucleotide tables of the database are up to date.
# (Modulo some statistics computed by statistics.py)
......@@ -3187,33 +3190,34 @@ if __name__ == "__main__":
# Homology information
# ===========================================================================
if pp.SELECT_ONLY is None:
# If your job failed, you can comment all the "3D information" part and start from here.
pp.checkpoint_load_chains()
if pp.HOMOLOGY:
if pp.SELECT_ONLY is None:
# If your job failed, you can comment all the "3D information" part and start from here.
pp.checkpoint_load_chains()
# Get the list of Rfam families found in the update
rfam_acc_to_download = {}
for c in pp.loaded_chains:
if c.mapping.rfam_acc not in rfam_acc_to_download.keys():
rfam_acc_to_download[c.mapping.rfam_acc] = [c]
else:
rfam_acc_to_download[c.mapping.rfam_acc].append(c)
# Get the list of Rfam families found in the update
rfam_acc_to_download = {}
for c in pp.loaded_chains:
if c.mapping.rfam_acc not in rfam_acc_to_download.keys():
rfam_acc_to_download[c.mapping.rfam_acc] = [c]
else:
rfam_acc_to_download[c.mapping.rfam_acc].append(c)
print(f"> Identified {len(rfam_acc_to_download.keys())} families to update and re-align with the crystals' sequences")
pp.fam_list = sorted(rfam_acc_to_download.keys())
print(f"> Identified {len(rfam_acc_to_download.keys())} families to update and re-align with the crystals' sequences")
pp.fam_list = sorted(rfam_acc_to_download.keys())
if len(pp.fam_list):
pp.prepare_sequences()
pp.realign()
if len(pp.fam_list):
pp.prepare_sequences()
pp.realign()
# At this point, the family table is almost up to date
# (lacking idty_percent and ali_filtered_length, both set in statistics.py)
# At this point, the family table is almost up to date
# (lacking idty_percent and ali_filtered_length, both set in statistics.py)
thr_idx_mgr = Manager()
idxQueue = thr_idx_mgr.Queue()
thr_idx_mgr = Manager()
idxQueue = thr_idx_mgr.Queue()
pp.remap()
pp.extractCMs()
pp.remap()
pp.extractCMs()
# At this point, the align_column and re_mapping tables are up-to-date.
......
......@@ -35,7 +35,7 @@ nohup bash -c 'time docker run --rm -v /path/to/3D/data/folder:/3D -v /path/to/s
# Method 2 : Classical command line installation (Linux only)
You need to install the dependencies:
- DSSR 1.9.9 or newer, you need to register to the X3DNA forum [here](http://forum.x3dna.org/site-announcements/download-instructions/) and then download the DSSR binary [on that page](http://forum.x3dna.org/downloads/3dna-download/). Make sure to have the `x3dna-dssr` binary in your $PATH variable so that RNANet.py finds it.
- DSSR 1.9.9 or newer, you need to register to ask for a DSSR (academic) license [on that page](http://innovation.columbia.edu/technologies/CU20391). Make sure to have the `x3dna-dssr` binary in your $PATH variable so that RNANet.py finds it.
- Infernal 1.1.4 or newer, to download at [Eddylab](http://eddylab.org/infernal/), several options are available depending on your preferences. Make sure to have the `cmalign`, `cmfetch`, `cmbuild`, `esl-alimanip`, `esl-alimerge`, `esl-alipid` and `esl-reformat` binaries in your $PATH variable, so that RNANet.py can find them.
- SINA (if you plan to use it), follow [these instructions](https://sina.readthedocs.io/en/latest/install.html) for example. Make sure to have the `sina` binary in your $PATH.
- Sqlite 3, available under the name *sqlite* in every distro's package manager,
......
This diff could not be displayed because it is too large.
7nqh_1_BA_1-1457
6cfj_1_1X
6cfj_1_2X
5hcq_1_1X
6cae_1_1X
5hcq_1_2X
5hcr_1_1X
4z8c_1_1X
5j4b_1_1X
6xhy_1_1X
6xhy_1_2X
5j4b_1_2X
4z8c_1_2X
6cae_1_2X
5j4c_1_1X
5w4k_1_1X
6of1_1_1X
6xhw_1_1X
5hcr_1_2X
5hd1_1_1X
5hcp_1_1X
6of1_1_2X
5hau_1_1W
5j4c_1_2X
5wis_1_1X
6xhv_1_1X
6xqd_1_1X
6nd5_1_1X
5w4k_1_2X
6xhw_1_2X
5hau_1_2W
6xqd_1_2X
6xhv_1_2X
4y4p_1_1X
6o97_1_1X
5hcp_1_2X
5doy_1_1X
4zer_1_1X
5wit_1_1X
5hd1_1_2X
6nd5_1_2X
4z3s_1_1X
7jql_1_1X
7jqm_1_1X
7jql_1_2X
5wis_1_2X
6nd6_1_1X
6o97_1_2X
4y4p_1_2X
7jqm_1_2X
4z3s_1_2X
4zer_1_2X
6uo1_1_2X
6uo1_1_1X
5doy_1_2X
5wit_1_2X
5f8k_1_1X
6nd6_1_2X
6xqe_1_1X
6xqe_1_2X
6n9e_1_1X
6n9e_1_2X
6n9f_1_1X
5f8k_1_2X
6n9f_1_2X
6xz7_1_F
6xzb_1_F2
6xza_1_F2
6y69_1_W
5afi_1_V
5afi_1_W
6h4n_1_W
5wdt_1_V
5wfs_1_V
5wdt_1_W
5wfs_1_W
5we4_1_V
5we4_1_W
5uq8_1_Y
6c4i_1_Y
6c4i_1_X
6yef_1_X
5zeb_1_V
5zep_1_W
5lzd_1_V
5we6_1_V
5wfk_1_V
5wfk_1_W
5we6_1_W
5u4i_1_Y
5uq7_1_Y
5u4i_1_X
5lza_1_V
5wf0_1_V
5wf0_1_W
5zeu_1_V
5l3p_1_X
3jcj_1_V
6gxm_1_X
6gwt_1_X
6gxn_1_X
6gxo_1_X
3j9y_1_V
6o9k_1_Y
6o7k_1_V
5lzf_1_V
3jcn_1_V
5lzc_1_V
5u4j_1_X
5u4j_1_Z
5lzb_1_V
6h58_1_W
6h58_1_WW
5j8b_1_X
4v7j_1_AV
4v7j_1_BV
4v7k_1_BV
4v7k_1_AV
4v7k_1_BW
4v7k_1_AW
4v7j_1_AW
4v7j_1_BW
4v4j_1_Z
6i0v_1_B
5k77_1_X
5k77_1_V
5k77_1_Y
5k77_1_W
5k77_1_Z
4pei_1_X
4pei_1_V
4pei_1_W
4pei_1_Z
4pei_1_Y
4a3c_1_P
4a3e_1_P
6lkq_1_U
7k00_1_B
6ys3_1_A
6qdw_1_A
6hcj_1_Q3
6hcq_1_Q3
6o8w_1_U
5mmm_1_Z
4w2e_1_W
5j4b_1_1Y
6cfj_1_1W
5w4k_1_1Y
6xhy_1_1W
5wit_1_1W
6cfj_1_1Y
6cfj_1_2W
5j4c_1_1W
5wis_1_1Y
5j4c_1_1Y
6xhw_1_1W
6cfj_1_2Y
5wis_1_1W
5j4b_1_1W
6xhv_1_1W
6xhy_1_2W
5j4c_1_2W
5j4b_1_2W
5j4b_1_2Y
5j4c_1_2Y
5w4k_1_1W
6nd5_1_1Y
6xhw_1_2W
5wis_1_2Y
5wit_1_2W
6xhv_1_2W
5doy_1_1Y
5w4k_1_2Y
4y4p_1_1Y
4z3s_1_1Y
5doy_1_1W
5doy_1_2Y
6nd5_1_1W
4z3s_1_2Y
4z3s_1_1W
5w4k_1_2W
6nd5_1_2Y
4y4p_1_2Y
6uo1_1_2Y
6uo1_1_2W
4y4p_1_1W
4z3s_1_2W
6uo1_1_1Y
6xhy_1_1Y
6uo1_1_1W
5wis_1_2W
5wit_1_1Y
6nd5_1_2W
4y4p_1_2W
5doy_1_2W
5wit_1_2Y
6xhv_1_1Y
6xhy_1_2Y
6xhw_1_1Y
6xhw_1_2Y
6ucq_1_1Y
6xhv_1_2Y
4v4i_1_Z
6ucq_1_1X
6ucq_1_2Y
4w2e_1_X
6ucq_1_2X
7n1p_1_DT
7n2u_1_DT
6yss_1_W
7n30_1_DT
7n31_1_DT
7n2c_1_DT
5afi_1_Y
5uq8_1_Z
5wdt_1_Y
5wfs_1_Y
6ysr_1_W
5we4_1_Y
6yst_1_W
5uq7_1_Z
5we6_1_Y
5wfk_1_Y
5wf0_1_Y
6o9j_1_V
6ysu_1_W
3j46_1_A
5j8b_1_Y
5j8b_1_W
3bbv_1_Z
5aj0_1_BV
5aj0_1_BW
4wt8_1_AB
4wt8_1_BB
4v4j_1_Y
4v4i_1_Y
5uq8_1_X
5uq7_1_X
4v4j_1_W
4v4i_1_W
4wt8_1_CS
4wt8_1_DS
4v4j_1_X
4v4i_1_X
6lkq_1_S
5h5u_1_H
7d6z_1_F
5lze_1_Y
5lze_1_V
5lze_1_X
3jcj_1_G
6o7k_1_G
6d30_1_C
6j7z_1_C
3er9_1_D
5kal_1_Y
4nia_1_3
5kal_1_Z
4nia_1_7
4nia_1_4
5new_1_C
4nia_1_U
4nia_1_6
4oq9_1_7
4nia_1_1
4oq9_1_4
4nia_1_8
4oq9_1_8
4nia_1_5
2vrt_1_E
4nia_1_W
4oq9_1_6
4oq8_1_D
4nia_1_Z
4oq9_1_W
4oq9_1_5
4nia_1_2
2vrt_1_F
4oq9_1_U
4oq9_1_Z
4oq9_1_2
4oq9_1_3
1ddl_1_E
4oq9_1_1
6rt5_1_A
6rt5_1_E
6lkq_1_T
6ys3_1_B
6qdw_1_B
3jbv_1_B
3jbu_1_B
6do8_1_B
6dpi_1_B
6dp9_1_B
6dpb_1_B
6dmn_1_B
6dpp_1_B
6dpk_1_B
6dpd_1_B
6dot_1_B
6dok_1_B
6dp8_1_B
6dpl_1_B
6dpg_1_B
6dou_1_B
6dpc_1_B
6do9_1_B
6dmv_1_B
6dp4_1_B
6dpn_1_B
6doj_1_B
6dph_1_B
6dos_1_B
6doo_1_B
6dp6_1_B
6dox_1_B
6dp5_1_B
6dol_1_B
6dp1_1_B
6doz_1_B
6dp7_1_B
6doq_1_B
6dpa_1_B
6dom_1_B
6dog_1_B
6dop_1_B
6doh_1_B
6doa_1_B
6don_1_B
6dov_1_B
6dpo_1_B
6dod_1_B
6dob_1_B
6dow_1_B
6dpm_1_B
6dpf_1_B
6dp3_1_B
6dp2_1_B
6dpe_1_B
6dpj_1_B
6dor_1_B
6dof_1_B
6dp0_1_B
6doi_1_B
6doc_1_B
6doe_1_B
6n6g_1_D
4b3r_1_W
4b3t_1_W
4b3s_1_W
7b5k_1_X
5o2r_1_X
5kcs_1_1X
7n1p_1_PT
7n2u_1_PT
7n30_1_PT
7n31_1_PT
7n2c_1_PT
6zvk_1_E2
6zvk_1_H2
7a01_1_E2
7a01_1_H2
6fti_1_U
6fti_1_W
6ftj_1_U
6ftj_1_W
6ftg_1_U
6ftg_1_W
6x1b_1_D
6x1b_1_F
5f6c_1_C
6i0t_1_B
1b2m_1_C
1b2m_1_D
1b2m_1_E
2uxc_1_Y
4a3g_1_P
4a3j_1_P
7k00_1_5
5mmi_1_Z
3j9m_1_U
7a5k_1_U3
6nu2_1_U
7a5g_1_U3
6nu3_1_U
5c0y_1_C
6n6f_1_D
4ohy_1_B
4oi1_1_B
4oi0_1_B
5ipl_1_3
6utw_1_333
5ipm_1_3
5ipn_1_3
4ylo_1_3
4yln_1_6
4ylo_1_6
4yln_1_3
4yln_1_9
5lzf_1_Y
1n32_1_Z
5zsl_1_D
5zsd_1_C
5zsd_1_D
5zsl_1_E
4nku_1_D
4nku_1_H
1cwp_1_E
6thn_1_A
6qik_1_Y
6rzz_1_Y
6ri5_1_Y
6qt0_1_Y
6qtz_1_Y
6t83_1_1B
6t83_1_3B
6t83_1_AA
6t83_1_CA
6s05_1_Y
5jcs_1_X
5fl8_1_X
6ole_1_V
6om0_1_V
6oli_1_V
6om7_1_V
6w6l_1_V
6olf_1_V
3erc_1_G
6of1_1_1W
6cae_1_1Y
6o97_1_1W
6of1_1_1Y
6of1_1_2W
6o97_1_1Y
6nd6_1_1Y
6cae_1_1W
6of1_1_2Y
6cae_1_2Y
6nd6_1_1W
6cae_1_2W
6o97_1_2Y
6nd6_1_2Y
6o97_1_2W
6nd6_1_2W
4wtm_1_T
4wtm_1_P
6gz4_1_BW
6xz7_1_G
6xzb_1_G2
6gz5_1_BW
6gz3_1_BW
4hot_1_X
6d2z_1_C
7eh0_1_I
4tu0_1_F
4tu0_1_G
6r9o_1_B
6is0_1_C
5lzc_1_X
5lzb_1_X
5lzd_1_Y
5lzc_1_Y
5lzb_1_Y
6zvi_1_E
6sv4_1_MC
6sv4_1_MB
7nrd_1_SM
6i7o_1_MB
6zvi_1_D
6sv4_1_NB
6sv4_1_NC
6i7o_1_NB
7nsq_1_V
6swa_1_Q
6swa_1_R
6ole_1_T
6om0_1_T
6oli_1_T
6om7_1_T
6olf_1_T
6w6l_1_T
6tnu_1_M
5mc6_1_M
7nrc_1_SM
6tb3_1_N
7b7d_1_SM
7b7d_1_SN
6tnu_1_N
7nrc_1_SN
7nrd_1_SN
6zot_1_C
4qu6_1_B
2uxb_1_X
2x1f_1_B
2x1a_1_B
5o1y_1_B
4kzy_1_I
4kzz_1_I
4kzx_1_I
6dzi_1_H
5zeu_1_A
6evj_1_N
6evj_1_M
6wub_1_A
6wua_1_A
6mpi_1_W
5mfx_1_B
5w0m_1_J
5bud_1_E
5w0m_1_I
5w0m_1_H
4j7m_1_B
5bud_1_D
6a4e_1_B
6a4e_1_D
6hxx_1_AA
6hxx_1_AB
6hxx_1_AC
6hxx_1_AD
6hxx_1_AE
6hxx_1_AF
6hxx_1_AG
6hxx_1_AH
6hxx_1_AI
6hxx_1_AJ
6hxx_1_AK
6hxx_1_AL
6hxx_1_AM
6hxx_1_AN
6hxx_1_AO
6hxx_1_AP
6hxx_1_AQ
6hxx_1_AR
6hxx_1_AS
6hxx_1_AT
6hxx_1_AU
6hxx_1_AV
6hxx_1_AW
6hxx_1_AX
6hxx_1_AY
6hxx_1_AZ
6hxx_1_BA
6hxx_1_BB
6hxx_1_BC
6hxx_1_BD
6hxx_1_BE
6hxx_1_BF
6hxx_1_BG
6hxx_1_BH
6hxx_1_BI
5odv_1_A
5odv_1_B
5odv_1_C
5odv_1_D
5odv_1_E
5odv_1_F
5odv_1_G
5odv_1_H
5odv_1_I
5odv_1_J
5odv_1_K
5odv_1_L
5odv_1_M
5odv_1_N
5odv_1_O
5odv_1_P
5odv_1_Q
5odv_1_R
5odv_1_S
5odv_1_T
5odv_1_U
5odv_1_V
5odv_1_W
5odv_1_X
6t34_1_A
6t34_1_B
6t34_1_C
6t34_1_D
6t34_1_E
6t34_1_F
6t34_1_G
6t34_1_H
6t34_1_I
6t34_1_J
6t34_1_K
6t34_1_L
6t34_1_M
6t34_1_N
6t34_1_O
6t34_1_P
6t34_1_Q
6t34_1_R
6t34_1_S
6ip8_1_ZY
6ip5_1_ZY
6ip5_1_ZU
6ip6_1_ZY
6ip8_1_ZZ
6ip6_1_ZZ
6uu3_1_333
6uu1_1_333
3er8_1_H
3er8_1_G
3er8_1_F
5o3j_1_B
4dr7_1_B
1i5l_1_Y
1i5l_1_U
4dr6_1_B
6i2n_1_U
4v68_1_A0
6vyu_1_Y
6vyw_1_Y
6vz7_1_Y
6vz5_1_Y
6vz3_1_Y
6vyy_1_Y
6vyx_1_Y
6vyz_1_Y
6vz2_1_Y
1mvr_1_1
6vyt_1_Y
1cgm_1_I
3jb7_1_T
3jb7_1_M
3j0o_1_D
3j0l_1_D
3j0q_1_D
3j0p_1_D
2tmv_1_R
5a79_1_R
5a7a_1_R
2om3_1_R
2xea_1_R
4v7e_1_AA
4v7e_1_AC
4wtl_1_T
4wtl_1_P
1xnq_1_W
7n2v_1_DT
4peh_1_Z
1vq6_1_4
4am3_1_D
4am3_1_H
4am3_1_I
4lj0_1_C
4lj0_1_D
4lj0_1_E
5lzy_1_HH
4wtj_1_T
4wtj_1_P
4xbf_1_D
6n6d_1_D
6n6k_1_C
6n6k_1_D
3rtj_1_D
6ty9_1_M
6tz1_1_N
6q1h_1_D
6q1h_1_H
6p7p_1_F
6p7p_1_E
6p7p_1_D
6vm6_1_J
6vm6_1_G
6wan_1_K
6wan_1_H
6wan_1_G
6wan_1_L
6wan_1_I
6ywo_1_F
6wan_1_J
4oau_1_A
6ywo_1_E
6ywo_1_K
6vm6_1_I
6vm6_1_H
6ywo_1_I
2a1r_1_C
6m6v_1_F
6m6v_1_E
2a1r_1_D
3gpq_1_E
3gpq_1_F
6o79_1_C
6vm6_1_K
6m6v_1_G
6hyu_1_D
1laj_1_R
6ybv_1_K
6sce_1_B
6xl1_1_C
6scf_1_I
6scf_1_K
6yud_1_K
6yud_1_O
6scf_1_M
6yud_1_P
6scf_1_L
6yud_1_M
6yud_1_Q
6w11_1_C
6o6x_1_D
4ba2_1_R
7bdv_1_F
7bdv_1_H
6o6x_1_C
7did_1_C
6o7b_1_C
6o6v_1_C
6wxx_1_Y
6wxx_1_X
6r7b_1_D
6r9r_1_D
6ov0_1_E
6ov0_1_H
6ov0_1_G
6o6v_1_D
6ov0_1_F
6o7b_1_D
5e02_1_C
6r9r_1_E
6r7b_1_E
6o7i_1_I
6o7h_1_K
7l6t_1_C
7jyy_1_F
7jyy_1_E
7jz0_1_F
7jz0_1_E
6rt6_1_A
6rt6_1_E
1y1y_1_P
5zuu_1_I
5zuu_1_G
7am2_1_R1
4peh_1_W
4peh_1_V
4peh_1_X
4peh_1_Y
7d8c_1_C
6mkn_1_W
7kl3_1_B
4cxg_1_C
4cxh_1_C
4eya_1_E
4eya_1_F
4eya_1_Q
4eya_1_R
4ht9_1_E
6z1p_1_AB
6z1p_1_AA
4ii9_1_C
5mq0_1_3
5uk4_1_X
5uk4_1_V
5uk4_1_W
5uk4_1_U
5f6c_1_E
7nwh_1_HH
4rcj_1_B
1xnr_1_W
6e0o_1_C
6o75_1_D
6o75_1_C
6e0o_1_B
3j06_1_R
4eya_1_G
4eya_1_H
4eya_1_S
4eya_1_T
4dr4_1_V
1ibl_1_Z
1ibm_1_Z
4dr5_1_V
4d61_1_J
7nwg_1_Q3
5tbw_1_SR
6hhq_1_SR
6zvi_1_H
6sv4_1_2B
6sv4_1_2C
6t83_1_2B
6t83_1_A
6i7o_1_2B
6q8y_1_N
6sv4_1_N
6i7o_1_N
6swa_1_S
5k8h_1_A
5z4a_1_B
3jbu_1_V
1h2c_1_R
1h2d_1_S
1h2d_1_R
6szs_1_X
5mgp_1_X
6enu_1_X
6enf_1_X
6enj_1_X
1pvo_1_L
1pvo_1_G
1pvo_1_H
1pvo_1_J
1pvo_1_K
2ht1_1_K
2ht1_1_J
5sze_1_C
6wre_1_D
6i0u_1_B
5zsa_1_C
5zsa_1_D
1n34_1_Z
3pf5_1_S
6ppn_1_A
6ppn_1_I
5flx_1_Z
6eri_1_AX
7k5l_1_R
7d80_1_Y
7du2_1_R
4v8z_1_CX
6kqe_1_I
5uh8_1_I
5vi5_1_Q
4xln_1_T
4xlr_1_T
4xln_1_Q
5i2d_1_K
5i2d_1_V
4xlr_1_Q
6sty_1_C
6sty_1_F
2xs5_1_D
3ok4_1_N
3ok4_1_L
3ok4_1_Z
3ok4_1_4
3ok4_1_V
3ok4_1_X
3ok4_1_P
3ok4_1_H
3ok4_1_J
3ok4_1_R
3ok4_1_T
3ok4_1_2
6n6h_1_D
5wnt_1_B
3b0u_1_B
3b0u_1_A
4x9e_1_G
4x9e_1_H
6z1p_1_BB
6z1p_1_BA
2uxd_1_X
6ywe_1_BB
3ol9_1_D
3ol9_1_H
3ol9_1_L
3ol9_1_P
3olb_1_L
3olb_1_P
3olb_1_D
3olb_1_H
3ol6_1_D
3ol6_1_H
3ol6_1_L
3ol6_1_P
3ol8_1_D
3ol8_1_H
3ol7_1_L
3ol7_1_P
3ol7_1_D
3ol7_1_H
3ol8_1_L
3ol8_1_P
6yrq_1_E
6yrq_1_H
6yrq_1_G
6yrq_1_F
6yrb_1_C
6yrb_1_D
6gz5_1_BV
6gz4_1_BV
6gz3_1_BV
6fti_1_Q
7njc_1_B
4v7e_1_AB
4v7e_1_AE
4v7e_1_AD
4x62_1_B
4x64_1_B
4x65_1_B
1xmq_1_W
4x66_1_B
3t1h_1_W
3t1y_1_W
1xmo_1_W
6kr6_1_B
6z8k_1_X
4csf_1_U
4csf_1_Q
4csf_1_G
4csf_1_M
4csf_1_K
4csf_1_A
4csf_1_I
4csf_1_S
4csf_1_C
4csf_1_W
4csf_1_O
4csf_1_E
6ywx_1_BB
6th6_1_AA
6skg_1_AA
6skf_1_AA
6q8y_1_M
6i7o_1_M
6zmw_1_W
6ybv_1_W
2fz2_1_D
2xpj_1_D
2vrt_1_H
2vrt_1_G
6r9m_1_B
4nia_1_C
4nia_1_A
4nia_1_H
4nia_1_N
4nia_1_G
4nia_1_D
4nia_1_B
4nia_1_I
4nia_1_E
4nia_1_M
4oq9_1_I
4oq9_1_G
4oq9_1_C
4oq9_1_H
4oq9_1_N
4oq9_1_A
4oq9_1_D
4oq9_1_E
4oq9_1_M
4oq9_1_B
5uhc_1_I
1uvn_1_F
1uvn_1_B
1uvn_1_D
4wtk_1_T
4wtk_1_P
1vqn_1_4
4oav_1_C
4oav_1_A
4i67_1_B
6k32_1_T
6k32_1_P
5mmj_1_A
5x8r_1_A
6yw5_1_AA
6ywe_1_AA
6ywy_1_AA
6ywx_1_AA
3nvk_1_G
3nvk_1_S
1cwp_1_D
1cwp_1_F
5z4j_1_B
5gmf_1_E
5gmf_1_H
6e4p_1_J
5gmf_1_F
5gmf_1_G
5gmg_1_D
5gmg_1_C
6e4p_1_K
3ie1_1_E
3ie1_1_H
3ie1_1_F
4dr7_1_V
3ie1_1_G
3s4g_1_C
3s4g_1_B
2qqp_1_R
1nb7_1_E
1nb7_1_F
4hos_1_X
3p6y_1_T
3p6y_1_V
3p6y_1_U
3p6y_1_Q
3p6y_1_W
5dto_1_B
4cxh_1_X
1uvj_1_F
1uvj_1_D
1uvj_1_E
6kqd_1_I
6kqd_1_S
5uh5_1_I
1ytu_1_F
1ytu_1_D
4kzz_1_J
7a09_1_F
5t2c_1_AN
3j6b_1_E
4v4f_1_B6
4v4f_1_A5
4v4f_1_A3
4v4f_1_B0
4v4f_1_B9
4v4f_1_A2
4v4f_1_A8
4v4f_1_A1
4v4f_1_A9
4v4f_1_BZ
4v4f_1_B8
4v4f_1_B7
4v4f_1_B5
4v4f_1_A0
4v4f_1_A7
4v4f_1_A4
4v4f_1_AZ
4v4f_1_B3
4v4f_1_B1
4v4f_1_B4
4v4f_1_A6
4v4f_1_B2
7m4y_1_V
7m4x_1_V
6v3a_1_V
6v39_1_V
5it9_1_I
7jqc_1_I
5zsb_1_C
5zsb_1_D
5zsn_1_D
5zsn_1_E
6gfw_1_R
6zm6_1_X
6zm5_1_X
6zm6_1_W
6zm5_1_W
6n6e_1_D
4g7o_1_I
4g7o_1_S
5x22_1_S
5x22_1_I
5x21_1_I
5uh6_1_I
6l74_1_I
5uh9_1_I
7a5j_1_X
6sag_1_R
4udv_1_R
5zsc_1_D
5zsc_1_C
6woy_1_I
6wox_1_I
4gkk_1_W
4v9e_1_AG
4v9e_1_BM
4v9e_1_AM
4v9e_1_AA
4v9e_1_BA
4v9e_1_BG
5lzs_1_II
6fqr_1_C
6ha1_1_X
5kcr_1_1X
6uu4_1_333
6uu0_1_333
6uuc_1_333
6uu2_1_333
6xl9_1_R
6b6h_1_3
6xh8_1_3
6pb4_1_3
3m7n_1_Z
3m85_1_X
3m85_1_Z
3m85_1_Y
5wnp_1_B
5wnv_1_B
5yts_1_B
1utd_1_6
1utd_1_Z
1utd_1_4
1utd_1_7
1utd_1_9
1utd_1_5
1utd_1_3
1utd_1_2
1utd_1_8
1utd_1_1
6n6i_1_C
6n6i_1_D
6n6a_1_D
6ij2_1_F
6ij2_1_G
6ij2_1_H
6ij2_1_E
3u2e_1_D
3u2e_1_C
7eh1_1_I
5uef_1_C
5uef_1_D
7eh2_1_R
7eh2_1_I
4x4u_1_H
4afy_1_D
6oy5_1_I
6owl_1_B
6owl_1_C
4afy_1_C
4lq3_1_R
6s0m_1_C
6ymw_1_C
7a5g_1_J
6gx6_1_B
4k4s_1_D
4k4s_1_H
4k4t_1_H
4k4t_1_D
1xpu_1_G
1xpu_1_L
1xpr_1_L
1xpu_1_H
1xpo_1_K
1xpo_1_J
1xpu_1_J
1xpo_1_H
1xpr_1_J
1xpu_1_K
1xpr_1_K
1xpo_1_M
1xpo_1_L
1xpu_1_M
1xpr_1_M
1xpo_1_G
1xpr_1_H
1xpr_1_G
5x70_1_E
5x70_1_G
6gc5_1_F
6gc5_1_H
6gc5_1_G
1n1h_1_B
7n2v_1_PT
4ohz_1_B
6t83_1_6B
4gv6_1_C
4gv6_1_B
4gv3_1_C
4gv3_1_B
4gv9_1_E
6i7o_1_L
2a8v_1_D
6qx3_1_G
2xnr_1_C
4gkj_1_W
5y88_1_X
3j0o_1_H
3j0l_1_H
3j0p_1_H
3j0q_1_H
3j0o_1_F
3j0l_1_F
3j0p_1_F
3j0q_1_F
3j0o_1_B
3j0l_1_B
3j0o_1_C
3j0l_1_C
3j0q_1_C
3j0p_1_C
3j0o_1_A
3j0l_1_A
3j0q_1_A
3j0p_1_A
6ys3_1_V
6qdw_1_V
5hk0_1_F
4qm6_1_D
4qm6_1_C
4jzu_1_C
4jzv_1_C
5ytv_1_B
4k4z_1_P
4k4z_1_D
4k4x_1_L
4k4z_1_L
4k4x_1_D
4k4z_1_H
4k4x_1_H
4k4x_1_P
4a3b_1_P
4a3m_1_P
6u6y_1_E
6u6y_1_G
6u6y_1_F
6u6y_1_H
6qik_1_X
6rzz_1_X
6ri5_1_X
6qt0_1_X
6qtz_1_X
6s05_1_X
6t83_1_BB
6t83_1_4B
5fl8_1_Z
5jcs_1_Z
5mrc_1_BB
5mre_1_BB
5mrf_1_BB
3j46_1_P
4e6b_1_A
4e6b_1_B
6a6l_1_D
1uvi_1_D
1uvi_1_F
1uvi_1_E
4m7d_1_P
4k4u_1_D
4k4u_1_H
6rt7_1_E
6rt7_1_A
2voo_1_C
2voo_1_D
5k78_1_X
5k78_1_Y
4ylo_1_9
5vyc_1_I2
5vyc_1_I3
5vyc_1_I5
5vyc_1_I1
5vyc_1_I6
5vyc_1_I4
6ip8_1_2M
6ip5_1_2M
6ip6_1_2M
6qcs_1_M
7b5k_1_Z
4nia_1_O
4nia_1_J
4nia_1_K
4nia_1_L
4nia_1_F
4oq9_1_K
4oq9_1_O
4oq9_1_J
4oq9_1_F
4oq9_1_L
6r9q_1_B
7m4u_1_A
6v3a_1_SN1
6v3b_1_SN1
6v39_1_SN1
6v3e_1_SN1
4dr6_1_V
6kql_1_I
4eya_1_M
4eya_1_N
4eya_1_A
4eya_1_B
2wj8_1_D
2wj8_1_I
2wj8_1_L
2wj8_1_F
2wj8_1_C
2wj8_1_Q
2wj8_1_J
2wj8_1_P
2wj8_1_K
2wj8_1_E
2wj8_1_T
2wj8_1_B
2wj8_1_O
2wj8_1_N
2wj8_1_A
2wj8_1_H
2wj8_1_R
2wj8_1_M
2wj8_1_S
2wj8_1_G
4e6b_1_E
4e6b_1_F
6p71_1_I
3pdm_1_R
5det_1_P
5els_1_I
4n2s_1_B
5fl8_1_Y
5jcs_1_Y
4yoe_1_E
6ow3_1_I
6ovy_1_I
6oy6_1_I
4qvd_1_H
5gxi_1_B
7n06_1_G
7n06_1_H
7n06_1_I
7n06_1_J
7n06_1_K
7n06_1_L
7n33_1_G
7n33_1_H
7n33_1_I
7n33_1_J
7n33_1_K
7n33_1_L
5mc6_1_N
4eya_1_O
4eya_1_P
4eya_1_C
4eya_1_D
6htq_1_V
6htq_1_W
6htq_1_U
6uu6_1_333
5a0v_1_F
3avt_1_T
6d1v_1_C
4s2x_1_B
4s2y_1_B
5wnu_1_B
1vtm_1_R
5elt_1_F
5elt_1_E
6xlj_1_R
6u9x_1_H
6u9x_1_K
5elk_1_R
6okk_1_G
4cxg_1_A
4cxh_1_A
6bk8_1_I
4cxg_1_B
4cxh_1_B
5z4d_1_B
6o78_1_E
6xa1_1_BV
6ha8_1_X
1m8w_1_E
1m8w_1_F
5udi_1_B
5udl_1_B
5udk_1_B
5udj_1_B
5w5i_1_B
5w5i_1_D
5w5h_1_B
5w5h_1_D
4eya_1_K
4eya_1_L
4eya_1_I
4eya_1_J
4g9z_1_E
4g9z_1_F
3nma_1_B
3nma_1_C
6een_1_G
6een_1_I
6een_1_H
4wti_1_T
4wti_1_P
5l3p_1_Y
4hor_1_X
3rzo_1_R
2f4v_1_Z
1qln_1_R
3cw1_1_X
3cw1_1_W
7b0y_1_A
6ogy_1_M
6ogy_1_N
6uej_1_B
6ywy_1_BB
5ytx_1_B
4g0a_1_H
6r9p_1_B
3koa_1_C
4n48_1_D
4n48_1_G
6kug_1_B
6ktc_1_V
6ole_1_U
6om0_1_U
6olg_1_BV
6oli_1_U
6om7_1_U
6w6l_1_U
6olz_1_BV
6olf_1_U
5lzd_1_X
6m7k_1_B
3cd6_1_4
3cma_1_5
6n9e_1_2W
1vqo_1_4
1qvg_1_3
3cme_1_5
5lzd_1_W
5lze_1_W
5lzc_1_W
5lzb_1_W
3wzi_1_C
1n33_1_Z
6dti_1_W
3d2s_1_F
3d2s_1_H
5mrc_1_AA
5mre_1_AA
5mrf_1_AA
7jhy_1_Z
4wkr_1_C
4v99_1_EC
4v99_1_AC
4v99_1_BH
4v99_1_CH
4v99_1_AM
4v99_1_DC
4v99_1_JW
4v99_1_EH
4v99_1_BW
4v99_1_FW
4v99_1_AW
4v99_1_BC
4v99_1_BM
4v99_1_IC
4v99_1_EM
4v99_1_ER
4v99_1_IW
4v99_1_JH
4v99_1_JR
4v99_1_AH
4v99_1_GR
4v99_1_IR
4v99_1_BR
4v99_1_CW
4v99_1_HR
4v99_1_FH
4v99_1_HC
4v99_1_DW
4v99_1_GC
4v99_1_JC
4v99_1_DM
4v99_1_EW
4v99_1_AR
4v99_1_CR
4v99_1_JM
4v99_1_CC
4v99_1_IH
4v99_1_FR
4v99_1_CM
4v99_1_IM
4v99_1_FM
4v99_1_FC
4v99_1_GH
4v99_1_HM
4v99_1_HH
4v99_1_DR
4v99_1_HW
4v99_1_GW
4v99_1_DH
4v99_1_GM
6rt4_1_D
6rt4_1_C
6zvh_1_X
4dwa_1_D
6n6c_1_D
6n6j_1_C
6n6j_1_D
6p7q_1_E
6p7q_1_F
6p7q_1_D
6rcl_1_C
5jju_1_C
4ejt_1_G
6lkq_1_W
3qsu_1_P
3qsu_1_R
2xs7_1_B
1n38_1_B
4qvc_1_G
6mpf_1_W
6spc_1_A
6spe_1_A
6zvk_1_D2
7a01_1_D2
6fti_1_V
6ftj_1_V
6ftg_1_V
4g0a_1_G
4g0a_1_F
4g0a_1_E
2b2d_1_S
5hkc_1_C
1rmv_1_B
4qu7_1_X
4qu7_1_V
4qu7_1_U
6pmi_1_3
6pmj_1_3
5hjz_1_C
6ydp_1_AA_1176-2737
6ydw_1_AA_1176-2737
7d1a_1_A_805-902
......@@ -10,18 +1514,18 @@
7o7z_1_AH_144-220
4c9d_1_D_29-1
4c9d_1_C_29-1
7aih_1_1_2984-3610
7aih_1_1_2400-2963
7ane_1_2_2489-3115
7aih_1_1_2984-3610
7ane_1_2_1904-2468
7ane_1_2_2489-3115
5g2x_1_A_595-692
7aor_1_2_2589-3210
7aor_1_2_2020-2579
7aor_1_2_2589-3210
7a5p_1_2_259-449
7aor_1_A_2589-3210
7aor_1_A_2020-2579
7am2_1_1_2491-3117
7aor_1_A_2589-3210
7am2_1_1_1904-2470
7ane_1_1_2489-3115
7am2_1_1_2491-3117
7ane_1_1_1904-2468
7ane_1_1_2489-3115
6uz7_1_8_2140-2825
......
This diff could not be displayed because it is too large.
......@@ -5,7 +5,7 @@ rm -rf latest_run.log errors.txt
# Run RNANet
bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ -r 20.0 --no-homology --redundant --extract' > latest_run.log 2>&1
bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ -r 20.0 --redundant --sina --extract -s --stats-opts="--wadley --distance-matrices" --archive' > latest_run.log 2>&1
bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ -r 20.0 --redundant --extract -s --stats-opts="-r 20.0 --wadley --hire-rna --distance-matrices" --archive' >> latest_run.log 2>&1
echo 'Compressing RNANet.db.gz...' >> latest_run.log
touch results/RNANet.db # update last modification date
gzip -k /home/lbecquey/Projects/RNANet/results/RNANet.db # compress it
......
# This is a script supposed to be run periodically as a cron job
# This one uses argument --from-scratch, so all is recomputed ! /!\
# run it one or twice a year, otherwise, the faster update runs should be enough.
cd /home/lbecquey/Projects/RNANet
rm -rf latest_run.log errors.txt
# Run RNANet
bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ --from-scratch --ignore-issues -r 20.0 --no-homology --redundant --extract' > latest_run.log 2>&1
bash -c 'time python3.8 ./RNAnet.py --3d-folder /home/lbecquey/Data/RNA/3D/ --seq-folder /home/lbecquey/Data/RNA/sequences/ --from-scratch --ignore-issues -r 20.0 --redundant --extract -s --stats-opts="-r 20.0 --wadley --hire-rna --distance-matrices" --archive' >> latest_run.log 2>&1
echo 'Compressing RNANet.db.gz...' >> latest_run.log
touch results/RNANet.db # update last modification date
gzip -k /home/lbecquey/Projects/RNANet/results/RNANet.db # compress it
rm -f results/RNANet.db-wal results/RNANet.db-shm # SQLite temporary files
# Save the latest results
export DATE=`date +%Y%m%d`
echo "Creating new release in ./archive/ folder ($DATE)..." >> latest_run.log
cp /home/lbecquey/Projects/RNANet/results/summary.csv /home/lbecquey/Projects/RNANet/archive/summary_latest.csv
cp /home/lbecquey/Projects/RNANet/results/summary.csv "/home/lbecquey/Projects/RNANet/archive/summary_$DATE.csv"
cp /home/lbecquey/Projects/RNANet/results/families.csv /home/lbecquey/Projects/RNANet/archive/families_latest.csv
cp /home/lbecquey/Projects/RNANet/results/families.csv "/home/lbecquey/Projects/RNANet/archive/families_$DATE.csv"
cp /home/lbecquey/Projects/RNANet/results/frequencies.csv /home/lbecquey/Projects/RNANet/archive/frequencies_latest.csv
cp /home/lbecquey/Projects/RNANet/results/pair_types.csv /home/lbecquey/Projects/RNANet/archive/pair_types_latest.csv
mv /home/lbecquey/Projects/RNANet/results/RNANet.db.gz /home/lbecquey/Projects/RNANet/archive/
# Init Seafile synchronization between RNANet library and ./archive/ folder (just the first time !)
# seaf-cli sync -l 8e082c6e-b9ed-4b2f-9279-de2177134c57 -s https://entrepot.ibisc.univ-evry.fr -u l****.b*****y@univ-evry.fr -p ****************** -d archive/
# Sync in Seafile
seaf-cli start >> latest_run.log 2>&1
echo 'Waiting 10m for SeaFile synchronization...' >> latest_run.log
sleep 15m
echo `seaf-cli status` >> latest_run.log
seaf-cli stop >> latest_run.log 2>&1
echo 'We are '`date`', update completed.' >> latest_run.log
......@@ -36,6 +36,6 @@ for fam in families:
# Now re run RNANet normally.
command = ["python3.8", "./RNAnet.py", "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0",
"--redundant", "--sina", "--extract", "-s", "--stats-opts=\"--wadley --distance-matrices\""]
"--redundant", "--extract", "-s", "--stats-opts=\"-r 20.0 --wadley --hire-rna --distance-matrices\""]
print(' '.join(command))
subprocess.run(command)
\ No newline at end of file
......
......@@ -3,8 +3,9 @@ import subprocess, os, sys
# Put a list of problematic chains here, they will be properly deleted and recomputed
problems = [
"1k73_1_A",
"1k73_1_B"
"7nhm_1_A_1-2923"
"4wfa_1_X_1-2923"
"4wce_1_X_1-2923"
]
# provide the path to your data folders, the RNANet.db file, and the RNANet.py file as arguments to this script
......@@ -22,6 +23,7 @@ for p in problems:
# Remove the datapoints files and 3D files
subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_mapped_to_Rfam/{p}.cif"])
subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_only/{p}.cif"])
files = [ f for f in os.listdir(path_to_3D_data + "/datapoints") if p in f ]
for f in files:
subprocess.run(["rm", '-f', path_to_3D_data + f"/datapoints/{f}"])
......@@ -38,14 +40,14 @@ for p in problems:
print(' '.join(command))
subprocess.run(command)
command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--extract", "--only", p]
command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "--redundant", "-r", "20.0", "--extract", "--only", p]
else:
# Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys
command = ["sqlite3", path_to_db, f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc is null;"]
print(' '.join(command))
subprocess.run(command)
command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--no-homology", "--extract", "--only", p]
command = ["python3.8", path_to_RNANet, "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "--redundant", "-r", "20.0", "--no-homology", "--extract", "--only", p]
# Re-run RNANet
os.chdir(os.path.dirname(os.path.realpath(path_to_db)) + '/../')
......
......@@ -7,7 +7,7 @@
# Run this file if you want the base counts, pair-type counts, identity percents, etc
# in the database.
import getopt, glob, json, os, sqlite3, shlex, subprocess, sys, warnings
import getopt, json, os, sqlite3, shlex, subprocess, sys, warnings
import numpy as np
import pandas as pd
import scipy.stats as st
......@@ -27,7 +27,6 @@ from tqdm import tqdm
from collections import Counter
from setproctitle import setproctitle
from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_with_tqdm, trace_unhandled_exceptions
from geometric_stats import *
np.set_printoptions(threshold=sys.maxsize, linewidth=np.inf, precision=8)
path_to_3D_data = "tobedefinedbyoptions"
......@@ -38,6 +37,8 @@ res_thr = 20.0 # default: all structures
LSU_set = ("RF00002", "RF02540", "RF02541", "RF02543", "RF02546") # From Rfam CLAN 00112
SSU_set = ("RF00177", "RF02542", "RF02545", "RF01959", "RF01960") # From Rfam CLAN 00111
from geometric_stats import * # after definition of the variables
@trace_unhandled_exceptions
def reproduce_wadley_results(carbon=4, show=False, sd_range=(1,4), res=2.0):
"""
......@@ -934,7 +935,7 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
coordinates = nt_3d_centers(filename, consider_all_atoms)
if not len(coordinates):
# there is not nucleotides in the file, or no C1' atoms for example.
warn("No C1' atoms in " + filename)
warn("No C1' atoms in " + filename.split('/')[-1] + ", ignoring")
return None, None, None
except FileNotFoundError:
return None, None, None
......@@ -951,8 +952,8 @@ def par_distance_matrix(filelist, f, label, cm_coords, consider_all_atoms, s):
try:
coordinates_with_gaps.append(coordinates[i - nb_gap])
except IndexError as e:
warn(f"{filename} : {s.seq} at position {i}, we get {e}.", error=True)
exit(0)
warn(f"{filename.split('/')[-1]} : {s.seq} at position {i}, we get {e}.", error=True)
return None, None, None
# Build the pairwise distances
d = np.zeros((len(s.seq), len(s.seq)), dtype=np.float32)
......@@ -1099,7 +1100,7 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False):
std = np.divide(std, counts, where=counts>0, out=np.full_like(std, np.NaN))
mask = np.invert(np.isnan(std))
value = std[mask] - np.power(avg[mask], 2)
if ((value[value<0] < -1e-2).any()):
if ((value[value < 0] < -1e-2).any()):
warn("Erasing very negative variance value !")
value[value<0] = 0.0 # floating point problems !
std[mask] = np.sqrt(value)
......@@ -1127,8 +1128,48 @@ def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False):
if not multithread:
idxQueue.put(thr_idx) # replace the thread index in the queue
setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
else:
# basically, for the rRNAs
# we delete the unique csv files for each chain, they wheight hundreds of gigabytes together
warn(f"Removing {f} ({label}) individual distance matrices, they weight too much. keeping the averages and standard deviations.")
for csv in glob.glob(runDir + '/results/distance_matrices/' + f + '_'+ label + "/*-" + f + ".csv"):
try:
os.remove(csv)
except FileNotFoundError:
pass
return 0
@trace_unhandled_exceptions
def measure_from_structure(f):
"""
Do geometric measures required on a given filename
"""
name = f.split('.')[0]
global idxQueue
thr_idx = idxQueue.get()
setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measure_from_structure({f})")
# Open the structure
with warnings.catch_warnings():
# Ignore the PDB problems. This mostly warns that some chain is discontinuous.
warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.PDBConstructionWarning)
warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.BiopythonWarning)
parser = MMCIFParser()
s = parser.get_structure(f, os.path.abspath(path_to_3D_data+ "rna_only/" + f))
#pyle_measures(name, s, thr_idx)
measures_aa(name, s, thr_idx)
if DO_HIRE_RNA_MEASURES:
measures_hrna(name, s, thr_idx)
measures_hrna_basepairs(name, s, path_to_3D_data, thr_idx)
if DO_WADLEY_ANALYSIS:
measures_pyle(name, s, thr_idx)
idxQueue.put(thr_idx) # replace the thread index in the queue
setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished")
def family_order(f):
# sort the RNA families so that the plots are readable
......@@ -1154,7 +1195,11 @@ def nt_3d_centers(cif_file, consider_all_atoms):
try:
structure = MMCIFParser().get_structure(cif_file, cif_file)
except Exception as e:
warn(f"{cif_file} : {e}", error=True)
warn(f"{cif_file.split('/')[-1]} : {e}", error=True)
with open(runDir + "/errors.txt", "a") as f:
f.write(f"Exception in nt_3d_centers({cif_file.split('/')[-1]})\n")
f.write(str(e))
f.write("\n\n")
return result
for model in structure:
for chain in model:
......@@ -1205,6 +1250,7 @@ def log_to_pbar(pbar):
pbar.update(1)
return update
@trace_unhandled_exceptions
def process_jobs(joblist):
"""
Starts a Pool to run the Job() objects in joblist.
......@@ -1302,7 +1348,7 @@ if __name__ == "__main__":
os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/angles/", exist_ok=True)
os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/torsions/", exist_ok=True)
os.makedirs(runDir + "/results/figures/GMM/HiRE-RNA/basepairs/", exist_ok=True)
elif opt == "rescan-nmodes":
elif opt == "--rescan-nmodes":
RESCAN_GMM_COMP_NUM = True
# Load mappings. famlist will contain only families with structures at this resolution threshold.
......@@ -1388,7 +1434,6 @@ if __name__ == "__main__":
if path.isfile(path_to_3D_data + "datapoints/" + f.split('.')[0]):
joblist.append(Job(function=measure_from_structure, args=(f,), how_many_in_parallel=nworkers)) # All-atom distances
process_jobs(joblist)
# Now process the memory-heavy tasks family by family
......@@ -1412,24 +1457,23 @@ if __name__ == "__main__":
general_stats()
os.makedirs(runDir+"/results/figures/GMM/", exist_ok=True)
os.makedirs(runDir+"/results/geometry/json/", exist_ok=True)
concat_dataframes(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv')
concat_dataframes(runDir + '/results/geometry/all-atoms/distances/', 'dist_atoms.csv', nworkers)
if DO_HIRE_RNA_MEASURES:
concat_dataframes(runDir + '/results/geometry/HiRE-RNA/distances/', 'distances_HiRERNA.csv')
concat_dataframes(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_HiRERNA.csv')
concat_dataframes(runDir + '/results/geometry/HiRE-RNA/torsions/', 'torsions_HiRERNA.csv')
concat_dataframes(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs_HiRERNA.csv')
concat_dataframes(runDir + '/results/geometry/HiRE-RNA/distances/', 'distances_HiRERNA.csv', nworkers)
concat_dataframes(runDir + '/results/geometry/HiRE-RNA/angles/', 'angles_HiRERNA.csv', nworkers)
concat_dataframes(runDir + '/results/geometry/HiRE-RNA/torsions/', 'torsions_HiRERNA.csv', nworkers)
concat_dataframes(runDir + '/results/geometry/HiRE-RNA/basepairs/', 'basepairs_HiRERNA.csv', nworkers)
if DO_WADLEY_ANALYSIS:
concat_dataframes(runDir + '/results/geometry/Pyle/distances/', 'distances_pyle.csv')
concat_dataframes(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv')
concat_dataframes(runDir + '/results/geometry/Pyle/distances/', 'distances_pyle.csv', nworkers)
concat_dataframes(runDir + '/results/geometry/Pyle/angles/', 'flat_angles_pyle.csv', nworkers)
joblist = []
joblist.append(Job(function=gmm_aa_dists, args=(RESCAN_GMM_COMP_NUM)))
joblist.append(Job(function=gmm_aa_torsions, args=(RESCAN_GMM_COMP_NUM)))
joblist.append(Job(function=gmm_aa_dists, args=(RESCAN_GMM_COMP_NUM,)))
joblist.append(Job(function=gmm_aa_torsions, args=(RESCAN_GMM_COMP_NUM, res_thr)))
if DO_HIRE_RNA_MEASURES:
joblist.append(Job(function=gmm_hrna, args=(RESCAN_GMM_COMP_NUM)))
joblist.append(Job(function=gmm_hrna_basepairs, args=(RESCAN_GMM_COMP_NUM)))
joblist.append(Job(function=gmm_hrna, args=(RESCAN_GMM_COMP_NUM,)))
joblist.append(Job(function=gmm_hrna_basepairs, args=(RESCAN_GMM_COMP_NUM,)))
if DO_WADLEY_ANALYSIS:
joblist.append(Job(function=gmm_pyle, args=(RESCAN_GMM_COMP_NUM)))
if len(joblist):
process_jobs(joblist)
joblist.append(Job(function=gmm_pyle, args=(RESCAN_GMM_COMP_NUM, res_thr)))
process_jobs(joblist)
merge_jsons()
......