Aglaé TABOT

last modification for renumbering (issues with OP2)

......@@ -321,8 +321,8 @@ class Chain:
self.file = path_to_3D_data+"rna_mapped_to_Rfam/"+self.chain_label+".cif"
else:
status = f"Extract {self.pdb_id}-{self.pdb_chain_id}"
self.file = path_to_3D_data+"renumbered_rna_only/"+self.chain_label+".cif"
#self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif"
self.file = path_to_3D_data+"rna_only/"+self.chain_label+".cif"
# Check if file exists, if yes, abort (do not recompute)
if os.path.exists(self.file):
......@@ -405,7 +405,7 @@ class Chain:
nt=nums.at[i, "nt_name"]
# particular case 6n5s_1_A, residue 201 in the original cif file (resname = G and HETATM = H_G)
if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' and resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' :
if nt == 'A' or (nt == 'G' and (self.chain_label != '6n5s_1_A' or resseq != 201)) or nt == 'C' or nt == 'U' or nt in ['DG', 'DU', 'DC', 'DA', 'DI', 'DT' ] or nt == 'N' or nt == 'I' :
res=chain[(' ', resseq, icode_res)]
else : #modified nucleotides (e.g. chain 5l4o_1_A)
het='H_' + nt
......@@ -1521,14 +1521,10 @@ class Pipeline:
if self.HOMOLOGY and not os.path.isdir(path_to_3D_data + "rna_mapped_to_Rfam"):
# for the portions mapped to Rfam
os.makedirs(path_to_3D_data + "rna_mapped_to_Rfam")
'''
if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "rna_only"):
# extract chains of pure RNA
os.makedirs(path_to_3D_data + "rna_only")
'''
if (not self.HOMOLOGY) and not os.path.isdir(path_to_3D_data + "renumbered_rna_only"):
# extract chains of pure RNA
os.makedirs(path_to_3D_data + "renumbered_rna_only")
# define and run jobs
joblist = []
......
#/usr/bin/python3
import json
import os
import numpy as np
runDir = os.getcwd()
def get_best(i):
weights = [ float(x.strip("[]")) for x in i["weights"] ]
means = [ float(x.strip("[]")) for x in i["means"] ]
s = sorted(zip(weights, means), reverse=True)
return s[0][1]
def get_k(lw, bp):
if lw == "cWW":
if bp in ["GC", "CG"]:
return 3.9
if bp in ["AU", "UA"]:
return 3.3
if bp in ["GU", "UG"]:
return 3.15
return 2.4
if lw == "tWW":
return 2.4
return 0.8
if __name__ == "__main__":
print("processing HRNA jsons...")
lws = []
for c in "ct":
for nt1 in "WHS":
for nt2 in "WHS":
lws.append(c+nt1+nt2)
bps = []
for nt1 in "ACGU":
for nt2 in "ACGU":
bps.append(nt1+nt2)
fullresults = dict()
fullresults["A"] = dict()
fullresults["C"] = dict()
fullresults["G"] = dict()
fullresults["U"] = dict()
counts = dict()
for lw in lws:
counts[lw] = 0
for bp in bps:
fullresults[bp[0]][bp[1]] = []
# open json file
with open(runDir + f"/results/geometry/json/hirerna_{bp}_basepairs.json", "rb") as f:
data = json.load(f)
# consider each BP type
for lw in lws:
this = dict()
# gather params
distance = 0
a1 = 0
a2 = 0
for i in data:
if i["measure"] == f"Distance between {lw} {bp} tips":
distance = np.round(get_best(i), 2)
if i["measure"] == f"{lw}_{bp}_alpha_1":
a1 = np.round(np.pi/180.0*get_best(i), 2)
if i["measure"] == f"{lw}_{bp}_alpha_2":
a2 = np.round(np.pi/180.0*get_best(i), 2)
if distance == 0 and a1 == 0 and a2 == 0:
# not found
continue
counts[lw] += 1
# create entry
this["rho"] = distance
this["a1"] = a1
this["a2"] = a2
this["k"] = get_k(lw, bp)
this["canonical"] = 1.0 if lw=="cWW" and bp in ["GC", "CG", "GU", "UG", "AU", "UA"] else 0.0
this["LW"] = lw
# store entry
fullresults[bp[0]][bp[1]].append(this)
with open(runDir + "/results/geometry/json/hirerna_basepairs_processed.json", "w") as f:
json.dump(fullresults, f, indent=4)
This diff is collapsed. Click to expand it.