counts=dict(sql_ask_database(conn,f"SELECT nt_name, COUNT(nt_name) FROM (SELECT chain_id from chain WHERE rfam_acc='{f}') NATURAL JOIN nucleotide GROUP BY nt_name;",warn_every=0))
freqs[f].update(counts)
# Create a pandas DataFrame, and save it to CSV.
df=pd.DataFrame()
forfintqdm(fam_list,position=thr_idx+1,desc=f"Worker {thr_idx+1}: Base frequencies",leave=False):
forfintqdm(famlist,position=thr_idx+1,desc=f"Worker {thr_idx+1}: Base frequencies",unit="family",leave=False):
fam_list=[x[0]forxinsql_ask_database(conn,"SELECT rfam_acc from family ORDER BY rfam_acc ASC;")]
mappings_list={}
forkinfam_list:
mappings_list[k]=[x[0]forxinsql_ask_database(conn,f"SELECT chain_id from chain JOIN structure ON chain.structure_id=structure.pdb_id WHERE rfam_acc='{k}' AND issue=0 AND resolution <= {res_thr};")]
# List the families for which we will compute sequence identity matrices
famlist=[x[0]forxinsql_ask_database(conn,"SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain WHERE issue = 0 GROUP BY rfam_acc) WHERE n_chains > 0 ORDER BY rfam_acc ASC;")]
ignored=[x[0]forxinsql_ask_database(conn,"SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain WHERE issue = 0 GROUP BY rfam_acc) WHERE n_chains < 3 ORDER BY rfam_acc ASC;")]
n_unmapped_chains=sql_ask_database(conn,"SELECT COUNT(*) FROM chain WHERE rfam_acc='unmappd' AND issue=0;")[0][0]
families=pd.read_sql(f"""SELECT rfam_acc, count(*) as n_chains
FROM chain JOIN structure
ON chain.structure_id = structure.pdb_id
WHERE issue = 0 AND resolution <= {res_thr} AND rfam_acc != 'unmappd'