@@ -249,7 +249,9 @@ To help you design your own requests, here follows a description of the database
*`nb_homologs`: The number of hits known to be homologous downloaded from Rfam to compute nucleotide frequencies
*`nb_3d_chains`: The number of 3D RNA chains mapped to the family (from Rfam-PDB mappings, or inferred using the redundancy list)
*`nb_total_homol`: Sum of the two previous fields, the number of sequences in the multiple sequence alignment, used to compute nucleotide frequencies
*`max_len`: The longest RNA sequence among the homologs (in bases)
*`max_len`: The longest RNA sequence among the homologs (in bases, unaligned)
*`ali_len`: The aligned sequences length (in bases, aligned)
*`ali_filtered_len`: The aligned sequences length when we filter the alignment to keep only the RNANet chains (which have a 3D structure) and remove the gap-only columns.
*`comput_time`: Time required to compute the family's multiple sequence alignment in seconds,
*`comput_peak_mem`: RAM (or swap) required to compute the family's multiple sequence alignment in megabytes,
*`idty_percent`: Average identity percentage over pairs of the 3D chains' sequences from the family
db_id=sql_ask_database(conn,f"SELECT chain_id FROM chain WHERE structure_id = {s.id.split('[')[0]} AND chain_name = {s.id.split('-')[1]} AND rfam_acc = {f};")
iflen(db_id):
db_id=db_id[0][0]
else:
idx=chains_ids.index(s.id)
conn.close()
warn(f"Bizarre... sequence {s.id} is not found in the database ! Cannot remap it ! Ignoring...")
pbar.update(1)
continue
seq_to_align=''.join([x[0]forxinsql_ask_database(conn,f"SELECT nt_align_code FROM nucleotide WHERE chain_id = {db_id} ORDER BY index_chain ASC;")])
db_id=sql_ask_database(conn,f"SELECT chain_id FROM chain WHERE structure_id = {s.id.split('[')[0]} AND chain_name = {s.id.split('-')[1]} AND rfam_acc = {f};")
iflen(db_id):
db_id=db_id[0][0]
else:
idx=chains_ids.index(s.id)
gaps+=list_of_chains[idx].replace_gaps(conn)
exceptValueError:
pass# We already printed a warning just above
pbar.update(1)
continue
seq=''.join([x[0]forxinsql_ask_database(conn,f"SELECT nt_code FROM nucleotide WHERE chain_id = {db_id} ORDER BY index_chain ASC;")])
full_length=len(seq)
# detect gaps
c_seq=list(seq)# contains "ACGUNacgu-"
letters=['A','C','G','U','N']
homology_data=sql_ask_database(conn,f"""SELECT freq_A, freq_C, freq_G, freq_U, freq_other FROM
(SELECT chain_id, rfam_acc FROM chain WHERE chain_id={db_id})
NATURAL JOIN re_mapping
NATURAL JOIN align_column;
""")
ifhomology_dataisNoneornotlen(homology_data):
withopen(runDir+"/errors.txt","a")aserrf:
errf.write(f"No homology data found in the database for {s.id} ! Not replacing gaps.\n")
continue
eliflen(homology_data)!=full_length:
withopen(runDir+"/errors.txt","a")aserrf:
errf.write(f"Found {len(homology_data)} nucleotides for {s.id} of length {full_length} ! Not replacing gaps.\n")