rna_puzzles_mmcif.py 2.45 KB
# before executing this script, you have to download the RNA-puzzles 3D structures files
# this script is used to transform the 3D RNA-puzzle structure files from PDB format to mmCIF format 
# after that, we can perform the geometric measurements on these files by changing the file paths in geometric_stats.py

import Bio
from Bio.PDB import PDBParser
from Bio.PDB import MMCIFParser
import os
from os import path
import warnings
import numpy as np
import pandas as pd
from Bio.PDB.vectors import Vector, calc_angle, calc_dihedral
from statistics import get_euclidian_distance, get_flat_angle, get_torsion_angle, pos_b1, pos_b2
from multiprocessing import Pool

runDir = os.getcwd()

os.makedirs(runDir+"/results_decoy/geometry/HiRE-RNA/distances/", exist_ok=True)
os.makedirs(runDir+"/results_decoy/geometry/HiRE-RNA/angles/", exist_ok=True)
os.makedirs(runDir+"/results_decoy/geometry/HiRE-RNA/torsions/", exist_ok=True)

def split_res_name(f):
    name=f.split('.')[0]

    if path.isfile(runDir+"/RNA-puzzles/rna_predict/"+name+".cif"):
        return
    
    with warnings.catch_warnings():
        # Ignore the PDB problems. This mostly warns that some chain is discontinuous.
        warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.PDBConstructionWarning)
        warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.BiopythonWarning)
        parser=PDBParser()
        s = parser.get_structure(name, os.path.abspath(runDir + "/RNA-puzzles/raw_dataset_and_for_assessment/rna_predict/" + f ))
    new_s=Bio.PDB.Structure.Structure(s.get_id())
    model=s[0]
    new_model=Bio.PDB.Model.Model(model.get_id())
    for chain in model:
        new_chain=Bio.PDB.Chain.Chain(chain.get_id())
        for res in chain:
            res_atoms=res.get_atoms()
            new_residu=Bio.PDB.Residue.Residue(res.get_id(), res.get_resname().split('  ')[1], res.get_segid())
            for atom in list(res.get_atoms()):
                new_atom=atom.copy()
                new_residu.add(new_atom)
            new_chain.add(new_residu)

        new_model.add(new_chain)
    new_s.add(new_model)
            
            
    ioobj = Bio.PDB.MMCIFIO()
    ioobj.set_structure(new_s)
    ioobj.save(os.path.abspath(runDir +"/RNA-puzzles/rna_predict/"+name+".cif"))

liste=os.listdir(runDir + '/RNA-puzzles/raw_dataset_and_for_assessment/rna_predict')


def main():
    with Pool(processes=50) as pool:
        result=pool.map(split_res_name, [f for f in liste])

if __name__ == '__main__':
    main()