run_dssr_RNA_puzzles.py
3.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# This script is used to run DSSR on the 3D structures of RNA-puzzles,
# and then to extract the values of torsion angles and pseudotorsions
import json
import os
import subprocess
import warnings
import pandas as pd
from multiprocessing import Pool
runDir = os.getcwd()
os.makedirs(runDir + "/RNA-puzzles/annotations/", exist_ok=True)
def annotate(fichier):
name=fichier.split('/')[-1]
puz_id=name.split('.')[0]
if (os.path.isfile(runDir +"/RNA-puzzles/annotations/" + puz_id + ".json")):
return
if name.split('.')[1]=='cif':
# run DSSR (you need to have it in your $PATH, follow x3dna installation instructions)
output = subprocess.run(["x3dna-dssr", f"-i={fichier}", "--json", "--auxfile=no", f"-o={puz_id}.json"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout = output.stdout.decode('utf-8')
stderr = output.stderr.decode('utf-8')
if "exception" in stderr:
# DSSR is unable to parse the chain.
warn(f"Exception while running DSSR, ignoring {puz_id}.", error=True)
return 1
else:
return
# save the analysis to file only if we can load it :/
json_file = open(runDir + "/RNA-puzzles/annotations/" +
puz_id + ".json", "w")
json_file.write(stdout)
json_file.close()
for f in os.listdir(runDir +"/RNA-puzzles/rna_predict"):
annotate(runDir +"/RNA-puzzles/rna_predict/"+ f)
os.makedirs(runDir + "/RNA-puzzles/torsions/", exist_ok=True)
os.makedirs(runDir +"/RNA-puzzles/pseudotorsions/", exist_ok=True)
def extract_3d_data(f):
pdb_id=f.split('.')[0]
if f.split('.')[1]=='json':
try :
with open(runDir + "/RNA-puzzles/rna_predict/" + pdb_id + ".json", 'r') as json_file:
json_object = json.load(json_file)
except json.decoder.JSONDecodeError as e:
#warn("Could not load "+pdb_id+f".json with JSON package: {e}", error=True)
return None
# Create the Pandas DataFrame for the nucleotides of the right chain
nts = json_object["nts"] # sub-json-object
df = pd.DataFrame(nts)
#print(df)
cols_we_keep = ["index_chain", "nt_resnum", "nt_name", "nt_code", "nt_id", "dbn", "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "epsilon_zeta", "bb_type", "chi", "glyco_bond", "form", "ssZp", "Dp", "eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base", "v0", "v1", "v2", "v3", "v4", "amplitude", "phase_angle", "puckering"]
df = df[cols_we_keep]
torsions=df[["index_chain", "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "chi"]]
pseudotorsions=df[["eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base"]]
torsions.to_csv('/home/atabot/RNANet/RNA-puzzles/torsions/torsions ' + pdb_id + '.csv')
pseudotorsions.to_csv('/home/atabot/RNANet/RNA-puzzles/pseudotorsions/pseudotorsions ' + pdb_id + '.csv')
else:
return
l=os.listdir(runDir + "/RNA-puzzles/rna_predict/")
def main():
with Pool(processes=30) as pool:
result=pool.map(extract_3d_data, [f for f in l])
if __name__ == '__main__':
main()