self.db_chain_id=sql_ask_database(conn,f"SELECT (chain_id) FROM chain WHERE structure_id='{self.pdb_id}' AND chain_name='{self.pdb_chain_id}' AND rfam_acc='{self.rfam_fam}';")[0][0]
self.db_chain_id=sql_ask_database(conn,f"""SELECT (chain_id) FROM chain
WHERE structure_id='{self.pdb_id}'
AND chain_name='{self.pdb_chain_id}'
AND rfam_acc='{self.rfam_fam}';""")[0][0]
else:
sql_execute(conn,"INSERT INTO chain (structure_id, chain_name, issue) VALUES (?, ?, ?) ON CONFLICT(structure_id, chain_name) DO UPDATE SET issue=excluded.issue;",data=(str(self.pdb_id),int(self.pdb_chain_id),int(self.delete_me)))
self.db_chain_id=sql_ask_database(conn,f"SELECT (chain_id) FROM chain WHERE structure_id='{self.pdb_id}' AND chain_name='{self.pdb_chain_id}' AND rfam_acc IS NULL;")[0][0]
df=pd.read_sql(f"""SELECT {angle}, th{angle} FROM nucleotide WHERE puckering="C2'-endo" AND {angle} IS NOT NULL AND th{angle} IS NOT NULL;""",conn)
c2_endo_etas=df[angle].values.tolist()
c2_endo_thetas=df["th"+angle].values.tolist()
df=pd.read_sql(f"""SELECT {angle}, th{angle} FROM nucleotide WHERE form = '.' AND puckering="C3'-endo" AND {angle} IS NOT NULL AND th{angle} IS NOT NULL;""",conn)
c3_endo_etas=df[angle].values.tolist()
c3_endo_thetas=df["th"+angle].values.tolist()
conn.close()
# Extract the angle values of c2'-endo and c3'-endo nucleotides
withsqlite3.connect("results/RNANet.db")asconn:
df=pd.read_sql(f"""SELECT {angle}, th{angle} FROM nucleotide WHERE puckering="C2'-endo" AND {angle} IS NOT NULL AND th{angle} IS NOT NULL;""",conn)
c2_endo_etas=df[angle].values.tolist()
c2_endo_thetas=df["th"+angle].values.tolist()
df=pd.read_sql(f"""SELECT {angle}, th{angle} FROM nucleotide WHERE form = '.' AND puckering="C3'-endo" AND {angle} IS NOT NULL AND th{angle} IS NOT NULL;""",conn)
c3_endo_etas=df[angle].values.tolist()
c3_endo_thetas=df["th"+angle].values.tolist()
# Create arrays with (x,y) coordinates of the points
famlist=[x[0]forxinsql_ask_database(conn,"SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain GROUP BY rfam_acc) WHERE n_chains > 1 ORDER BY rfam_acc ASC;")]
ignored=[x[0]forxinsql_ask_database(conn,"SELECT rfam_acc from (SELECT rfam_acc, COUNT(chain_id) as n_chains FROM family NATURAL JOIN chain GROUP BY rfam_acc) WHERE n_chains < 2 ORDER BY rfam_acc ASC;")]
iflen(ignored):
print("Idty matrices: Ignoring families with only one chain:"," ".join(ignored)+'\n')
print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:"," ".join(ignored)+'\n')
# compute distance matrices (or ignore if data/RF0****.npy exists)
p=Pool(processes=8)
...
...
@@ -476,7 +481,7 @@ def seq_idty():
conn.close()
# Plots plots plots
fig,axs=plt.subplots(5,13,figsize=(15,9))
fig,axs=plt.subplots(4,17,figsize=(17,5.75))
axs=axs.ravel()
[axi.set_axis_off()foraxiinaxs]
im=""# Just to declare the variable, it will be set in the loop
...
...
@@ -495,7 +500,7 @@ def seq_idty():
D=D[idx1,:]
D=D[:,idx1[::-1]]
im=ax.matshow(1.0-D,vmin=0,vmax=1,origin='lower')# convert to identity matrix 1 - D from distance matrix D