run_dssr_RNA_puzzles.py 3.17 KB

Raw Blame History Permalink

# This script is used to run DSSR on the 3D structures of RNA-puzzles,
# and then to extract the values of torsion angles and pseudotorsions


import json
import os
import subprocess
import warnings
import pandas as pd
from multiprocessing import Pool

runDir = os.getcwd()

os.makedirs(runDir + "/RNA-puzzles/annotations/", exist_ok=True)
def annotate(fichier):
    name=fichier.split('/')[-1]
    puz_id=name.split('.')[0]

    if (os.path.isfile(runDir +"/RNA-puzzles/annotations/" + puz_id + ".json")):
        return

    if name.split('.')[1]=='cif':
    # run DSSR (you need to have it in your $PATH, follow x3dna installation instructions)
        output = subprocess.run(["x3dna-dssr", f"-i={fichier}", "--json", "--auxfile=no", f"-o={puz_id}.json"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout = output.stdout.decode('utf-8')
        stderr = output.stderr.decode('utf-8')

        if "exception" in stderr:
            # DSSR is unable to parse the chain.
            warn(f"Exception while running DSSR, ignoring {puz_id}.", error=True)
            return 1
    else:
        return
    # save the analysis to file only if we can load it :/

    json_file = open(runDir + "/RNA-puzzles/annotations/" +
                 puz_id + ".json", "w")
    json_file.write(stdout)
    json_file.close()


for f in os.listdir(runDir +"/RNA-puzzles/rna_predict"):
    annotate(runDir +"/RNA-puzzles/rna_predict/"+ f)

os.makedirs(runDir + "/RNA-puzzles/torsions/", exist_ok=True)
os.makedirs(runDir +"/RNA-puzzles/pseudotorsions/", exist_ok=True)

def extract_3d_data(f):
    pdb_id=f.split('.')[0]
    if f.split('.')[1]=='json':
        try :
            with open(runDir + "/RNA-puzzles/rna_predict/" + pdb_id + ".json", 'r') as json_file:
                json_object = json.load(json_file)

        except json.decoder.JSONDecodeError as e:
            #warn("Could not load "+pdb_id+f".json with JSON package: {e}", error=True)
            return None

        # Create the Pandas DataFrame for the nucleotides of the right chain
        nts = json_object["nts"]                        # sub-json-object
        df = pd.DataFrame(nts)
        #print(df)
        cols_we_keep = ["index_chain", "nt_resnum", "nt_name", "nt_code", "nt_id", "dbn", "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "epsilon_zeta", "bb_type", "chi", "glyco_bond", "form", "ssZp", "Dp", "eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base", "v0", "v1", "v2", "v3", "v4", "amplitude", "phase_angle", "puckering"]
        df = df[cols_we_keep]
        torsions=df[["index_chain", "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "chi"]]
        pseudotorsions=df[["eta", "theta", "eta_prime", "theta_prime", "eta_base", "theta_base"]]
        torsions.to_csv('/home/atabot/RNANet/RNA-puzzles/torsions/torsions ' + pdb_id + '.csv')
        pseudotorsions.to_csv('/home/atabot/RNANet/RNA-puzzles/pseudotorsions/pseudotorsions ' + pdb_id + '.csv')

    else:
        return


l=os.listdir(runDir + "/RNA-puzzles/rna_predict/")

def main():
    with Pool(processes=30) as pool:
        result=pool.map(extract_3d_data, [f for f in l])

if __name__ == '__main__':
    main()