statistical potentials

Aglaé TABOT
Commit 9dedcd5bed5902745e06023f9620d2b837bbdaab 9dedcd5b 1 parent a54959cc
Showing 3 changed files with 148 additions and 0 deletions
scripts/rna_puzzles_mmcif.py
scripts/run_dssr_RNA_puzzles.py
statistical_potential.py
--- a/scripts/rna_puzzles_mmcif.py 0 → 100644
View file @9dedcd5
+++ b/scripts/rna_puzzles_mmcif.py 0 → 100644
View file @9dedcd5
+ # before executing this script, you have to download the RNA-puzzles 3D structures files
+ # this script is used to transform the 3D RNA-puzzle structure files from PDB format to mmCIF format 
+ # after that, we can perform the geometric measurements on these files by changing the file paths in geometric_stats.py
+ 
+ import Bio
+ from Bio.PDB import PDBParser
+ from Bio.PDB import MMCIFParser
+ import os
+ from os import path
+ import warnings
+ import numpy as np
+ import pandas as pd
+ from Bio.PDB.vectors import Vector, calc_angle, calc_dihedral
+ from statistics import get_euclidian_distance, get_flat_angle, get_torsion_angle, pos_b1, pos_b2
+ from multiprocessing import Pool
+ 
+ runDir = os.getcwd()
+ 
+ os.makedirs(runDir+"/results_decoy/geometry/HiRE-RNA/distances/", exist_ok=True)
+ os.makedirs(runDir+"/results_decoy/geometry/HiRE-RNA/angles/", exist_ok=True)
+ os.makedirs(runDir+"/results_decoy/geometry/HiRE-RNA/torsions/", exist_ok=True)
+ 
+ def split_res_name(f):
+     name=f.split('.')[0]
+ 
+     if path.isfile(runDir+"/RNA-puzzles/rna_predict/"+name+".cif"):
+         return
+     
+     with warnings.catch_warnings():
+         # Ignore the PDB problems. This mostly warns that some chain is discontinuous.
+         warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.PDBConstructionWarning)
+         warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.BiopythonWarning)
+         parser=PDBParser()
+         s = parser.get_structure(name, os.path.abspath(runDir + "/RNA-puzzles/raw_dataset_and_for_assessment/rna_predict/" + f ))
+     new_s=Bio.PDB.Structure.Structure(s.get_id())
+     model=s[0]
+     new_model=Bio.PDB.Model.Model(model.get_id())
+     for chain in model:
+         new_chain=Bio.PDB.Chain.Chain(chain.get_id())
+         for res in chain:
+             res_atoms=res.get_atoms()
+             new_residu=Bio.PDB.Residue.Residue(res.get_id(), res.get_resname().split('  ')[1], res.get_segid())
+             for atom in list(res.get_atoms()):
+                 new_atom=atom.copy()
+                 new_residu.add(new_atom)
+             new_chain.add(new_residu)
+ 
+         new_model.add(new_chain)
+     new_s.add(new_model)
+             
+             
+     ioobj = Bio.PDB.MMCIFIO()
+     ioobj.set_structure(new_s)
+     ioobj.save(os.path.abspath(runDir +"/RNA-puzzles/rna_predict/"+name+".cif"))
+ 
+ liste=os.listdir(runDir + '/RNA-puzzles/raw_dataset_and_for_assessment/rna_predict')
+ 
+ 
+ def main():
+     with Pool(processes=50) as pool:
+         result=pool.map(split_res_name, [f for f in liste])
+ 
+ if __name__ == '__main__':
+     main()
+ 
--- a/scripts/run_dssr_RNA_puzzles.py 0 → 100644
View file @9dedcd5
+++ b/scripts/run_dssr_RNA_puzzles.py 0 → 100644
View file @9dedcd5
+ # This script is used to run DSSR on the 3D structures of RNA-puzzles,
+ # and then to extract the values of torsion angles and pseudotorsions
+ 
+ 
+ import json
+ import os
+ import subprocess
+ import warnings
+ import pandas as pd
+ from multiprocessing import Pool 
+ 
+ runDir = os.getcwd()
+ 
+ os.makedirs(runDir + "/RNA-puzzles/annotations/", exist_ok=True)
+ def annotate(fichier):
+     name=fichier.split('/')[-1]
+     puz_id=name.split('.')[0]
+     
+     if (os.path.isfile(runDir +"/RNA-puzzles/annotations/" + puz_id + ".json")):
+         return
+ 
+     if name.split('.')[1]=='cif':
+     # run DSSR (you need to have it in your $PATH, follow x3dna installation instructions)
+         output = subprocess.run(["x3dna-dssr", f"-i={fichier}", "--json", "--auxfile=no", f"-o={puz_id}.json"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+         stdout = output.stdout.decode('utf-8')
+         stderr = output.stderr.decode('utf-8')
+         
+         if "exception" in stderr:
+             # DSSR is unable to parse the chain.
+             warn(f"Exception while running DSSR, ignoring {puz_id}.", error=True)
+             return 1
+     else:
+         return
+     # save the analysis to file only if we can load it :/
+     
+     json_file = open(runDir + "/RNA-puzzles/annotations/" +
+                  puz_id + ".json", "w")
+     json_file.write(stdout)
+     json_file.close()
+     
+ 
+ 
+ for f in os.listdir(runDir +"/RNA-puzzles/rna_predict"):
+     annotate(runDir +"/RNA-puzzles/rna_predict/"+ f)
+ 
+ os.makedirs(runDir + "/RNA-puzzles/torsions/", exist_ok=True)
+ os.makedirs(runDir +"/RNA-puzzles/pseudotorsions/", exist_ok=True)
+ 
+ def extract_3d_data(f):
+     pdb_id=f.split('.')[0]
+     if f.split('.')[1]=='json':
+         try :
+             with open(runDir + "/RNA-puzzles/rna_predict/" + pdb_id + ".json", 'r') as json_file:
+                 json_object = json.load(json_file)
+ 
+         except json.decoder.JSONDecodeError as e:
+             #warn("Could not load "+pdb_id+f".json with JSON package: {e}", error=True)
+             return None
+ 
+         # Create the Pandas DataFrame for the nucleotides of the right chain
+         nts = json_object["nts"]                        # sub-json-object
+         df = pd.DataFrame(nts)                       
+         #print(df)
+         cols_we_keep = ["index_chain", "nt_resnum", "nt_name", "nt_code", "nt_id", "dbn", "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "epsilon_zeta", "bb_type", "chi", "glyco_bond", "form", "ssZp", "Dp", "eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base", "v0", "v1", "v2", "v3", "v4", "amplitude", "phase_angle", "puckering"]
+         df = df[cols_we_keep]
+         torsions=df[["index_chain", "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "chi"]]
+         pseudotorsions=df[["eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base"]]
+         torsions.to_csv('/home/atabot/RNANet/RNA-puzzles/torsions/torsions ' + pdb_id + '.csv')
+         pseudotorsions.to_csv('/home/atabot/RNANet/RNA-puzzles/pseudotorsions/pseudotorsions ' + pdb_id + '.csv')
+         
+     else:
+         return
+     
+ 
+ 
+ l=os.listdir(runDir + "/RNA-puzzles/rna_predict/")
+ 
+ def main():
+     with Pool(processes=30) as pool:
+         result=pool.map(extract_3d_data, [f for f in l])
+ 
+ if __name__ == '__main__':
+     main()
--- a/statistical_potential.py 0 → 100644
View file @9dedcd5
+++ b/statistical_potential.py 0 → 100644
View file @9dedcd5