statistical_potential.py 23.9 KB

Raw Blame History Permalink

#!/usr/bin/python3

# RNANet statistics
# Developed by Aglaé Tabot, 2021

# This file computes statistical potentials over the produced dataset.
# THIS FILE IS NOT SUPPOSED TO BE RUN DIRECTLY.

import getopt, os, pickle, sqlite3, shlex, subprocess, sys, warnings
import time
import numpy as np
import pandas as pd
import threading as th
import matplotlib
import matplotlib.pyplot as plt
import json
import Bio
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB.vectors import Vector, calc_angle, calc_dihedral
from multiprocessing import Pool, Manager
from os import path
from tqdm import tqdm
from collections import Counter
from setproctitle import setproctitle
from RNAnet import Job, read_cpu_number, sql_ask_database, sql_execute, warn, notify, init_with_tqdm, trace_unhandled_exceptions
from sklearn.mixture import GaussianMixture
import scipy.stats as st
import warnings
from pandas.core.common import SettingWithCopyWarning
from itertools import combinations_with_replacement
from math import *
from geometric_stats import get_euclidian_distance, GMM_histo

np.set_printoptions(threshold=sys.maxsize, linewidth=np.inf, precision=8)

runDir = os.getcwd()

@trace_unhandled_exceptions
def pyle_measures_for_potentials(name, s, thr_idx):
    # measure distances P-P, P-C1', P-C4', C1'-C1', C4'-C4'
    # between residues along the chain
    # Requires a lot of storage space!
    if (path.isfile(runDir + '/results/geometry/Pyle/distances_i_i+1/distances_pyle_i_i+1'+name+'.csv')):
        return

    liste_dist=[]

    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} pyle_measures({name})")

    chain = next(s[0].get_chains())

    for res1 in tqdm(chain, position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} pyle_measures", unit="res", leave=False):

        if res1.get_resname() in ["A", "C", "G", "U"]:
            resnum1=list(res1.get_id())[1]
            atom_p_1 = [ atom.get_coord() for atom in res1 if atom.get_name() ==  "P"]
            atom_c1p_1 = [ atom.get_coord() for atom in res1 if "C1'" in atom.get_fullname() ]
            atom_c4p_1 = [ atom.get_coord() for atom in res1 if "C4'" in atom.get_fullname() ]
            for res2 in chain:
                resnum2=list(res2.get_id())[1]
                if resnum2-resnum1 < 1 :
                    continue
                p_p=np.nan
                p_c4p=np.nan
                p_c1p=np.nan
                c4p_c4p=np.nan
                c1p_c1p=np.nan

                if res2.get_resname() in ["A", "C", "G", "U"]:

                    atom_p_2 = [ atom.get_coord() for atom in res2 if atom.get_name() ==  "P"]
                    atom_c1p_2 = [ atom.get_coord() for atom in res2 if "C1'" in atom.get_fullname() ]
                    atom_c4p_2 = [ atom.get_coord() for atom in res2 if "C4'" in atom.get_fullname() ]

                    p_p = get_euclidian_distance(atom_p_1, atom_p_2)
                    p_c4p= get_euclidian_distance(atom_p_1, atom_c4p_2)
                    p_c1p= get_euclidian_distance(atom_p_1, atom_c1p_2)
                    c4p_c4p= get_euclidian_distance(atom_c4p_1, atom_c4p_2)
                    c1p_c1p= get_euclidian_distance(atom_c1p_1, atom_c1p_2)

                    liste_dist.append([res1.get_resname(), int(resnum1), res2.get_resname(), int(resnum2), p_p, p_c4p, p_c1p, c4p_c4p, c1p_c1p])


    df = pd.DataFrame(liste_dist, columns=["res1", "resnum1", "res2", "resnum2", "P-P", "P-C4'", "P-C1'", "C4'-C4'", "C1'-C1'"])
    df.to_csv(runDir + "/results/geometry/Pyle/distances_i_i+1/distances_pyle_i_i+1" + name + ".csv")

@trace_unhandled_exceptions
def gmm_pyle_type(ntpair, data, scan):

    setproctitle(f"GMM (Pyle {ntpair} )")

    os.makedirs(runDir + "/results/figures/GMM/Pyle/distances_i_i+1/", exist_ok=True)
    os.chdir(runDir + "/results/figures/GMM/Pyle/distances_i_i+1/")

    p_p=list(data["P-P"][~ np.isnan(data["P-P"])])
    p_c4p=list(data["P-C4'"][~ np.isnan(data["P-C4'"])])
    p_c1p=list(data["P-C1'"][~ np.isnan(data["P-C1'"])])
    c4p_c4p=list(data["C4'-C4'"][~ np.isnan(data["C4'-C4'"])])
    c1p_c1p=list(data["C1'-C1'"][~ np.isnan(data["C1'-C1'"])])

    GMM_histo(p_p, f"Distance P-P between {ntpair}", scan, toric=False, hist=False, col="cyan")
    GMM_histo(p_c4p, f"Distance P-C4' between {ntpair}", scan, toric=False, hist=False, col="tomato")
    GMM_histo(p_c1p, f"Distance P-C1' between {ntpair}", scan, toric=False, hist=False, col="goldenrod")
    GMM_histo(c4p_c4p, f"Distance C4'-C4' between {ntpair}", scan, toric=False, hist=False, col="magenta")
    GMM_histo(c1p_c1p, f"Distance C1'-C1' between {ntpair}", scan, toric=False, hist=False, col="black")

    plt.xlabel("Distance (Angströms)")

    plt.title(f"GMM of distances for {ntpair} ", fontsize=10)

    plt.savefig(runDir + "/results/figures/GMM/Pyle/distances_i_i+1/" + f"Distances_Pyle_{ntpair}.png" )
    plt.close()
    setproctitle(f"GMM (Pyle {ntpair} distances) finished")
@trace_unhandled_exceptions
def gmm_pyle_per_type(scan):

    setproctitle("GMM (Pyle model)")

    df = pd.read_csv(os.path.abspath(runDir + "/results/geometry/Pyle/distances_i_i+1/distances.csv"))

    data=df
    if len(data):
        for b1 in ['A','C','G','U']:
            for b2 in ['A','C','G','U']:
                thisbases = data[(data.res1 == b1)&(data.res2 == b2)]
                if len(thisbases):
                    gmm_pyle_type(b1+b2, thisbases, scan)
    setproctitle("GMM (Pyle model) finished")

# The next 7 functions are used to calculate the statistical potentials with the averaging method (residue-averaging)
# for the Pyle model from the GMM results, and to plot the corresponding figures


def pgaussred(x):
    """
    distribution function of the normal distribution (= probability that a random variable distributed according to is less than x)
    calculation by numerical integration: 2000 slices per standard deviation
    result rounded to 7 decimal places
    """
    if x==0:
        return 0.5
    u=abs(x)
    n=int(u*2000)
    du=u/n
    k=1/sqrt(2*pi)
    u1=0
    f1=k
    p=0.5
    for i in range(0,n):
        u2=u1+du
        f2=k*exp(-0.5*u2*u2)
        p=p+(f1+f2)*du*0.5
        u1=u2
        f1=f2
    if x<0:
        p = 1.0-p
    return round(p, 7)

def proba(m, s, xinf, xsup):
    """
    calculates the probability for a value to belong to the interval [xinf, xsup]
    for a normal distribution with mean m and standard deviation s
    """
    prob=pgaussred((xsup-m)/s)-pgaussred((xinf-m)/s)

    return prob

def extract_from_json(data, xinf, xsup):
    """
    extracts the means and standard deviations of the json obtained with the GMM calculations
    and calculates from these parameters the probability for a value to belong to the interval [xinf, xsup].
    """
    p=[]
    p_tot=0
    classes=[]
    with open(data + '.json') as json_data:
        data_dict = json.load(json_data)

    for i in range(len(data_dict['means'])):
        mean = data_dict['means'][i]
        if mean[0] == '[':
            mean=float((mean.split('['))[1].split(']')[0])
        else :
            mean=float(mean)
        std = data_dict['std'][i]
        if std[0] == '[':
            std=float((std.split('['))[2].split(']')[0])
        else:
            std=float(std)
        prob=proba(mean, std, xinf, xsup)
        p.append(prob)

    for x in p:
        p_tot = p_tot+x
    return p_tot

def averaging(liste_data, name):
    """
    creates a json that contains all the means and standard deviations of the parameters
    that we want to use to create the reference distribution with the averaging method
    """

    curves=[]

    x = np.linspace(0,250,1000)

    summary_data = {}
    summary_data["measures"] = []
    summary_data["means"] = []
    summary_data["std"] = []
    for data in  liste_data:
        s=0
        mean_tot=0
        std_tot=0
        with open(runDir + '/results/geometry/json/' + data + '.json') as json_data:
            data_dict = json.load(json_data)

        for i in range(len(data_dict['means'])):
            mean = float(data_dict['means'][i])

            std = float(data_dict['std'][i])

            weight = float(data_dict['weights'][i])
            y = weight*st.norm.pdf(x, mean, std)
            mean_tot=mean_tot+(weight*mean)
            std_tot=std_tot+(weight*std)

            curves.append(y)
        summary_data["measures"].append(data)
        summary_data["means"].append(str(mean_tot))
        summary_data["std"].append(str(std_tot))

    with open(runDir + '/results/statistical_potential/json/Pyle/avg_' +name + ".json", 'w', encoding='utf-8') as f:
        json.dump(summary_data, f, indent=4)

def potential_dist(data_obs, data_ref):
    name=data_obs.split('/')[-1]
    l=[]
    for k in range(0, 400, 5):
        obs=extract_from_json(data_obs, k, k+5)
        ref=extract_from_json(data_ref, k, k+5)
        if obs != 0.0 and ref != 0.0:
            u = -log(obs/ref)
            l.append([k, k+5, obs, ref, u])
            l.append([k+5, k+5, obs, ref, u])

        else:
            l.append([k, k+5, obs, ref, None])
            l.append([k+5, k+5, obs, ref, None])
    df=pd.DataFrame(l, columns=["Xinf", "Xsup", "Pobs", "Pref", "- ln(Pobs/Pref)"])
    df.to_csv(runDir + '/results/statistical_potential/ratio/Pyle/Statistical potential ' + name + ".csv")

def courbe_stat_pot(f):
    name=f.split('/')[-1]
    name=name.split('.')[0]
    df=pd.read_csv(f)
    E=list(df["- ln(Pobs/Pref)"][~ np.isnan(df["- ln(Pobs/Pref)"])])
    max_E=max(E)
    min_E=min(E)
    index_with_nan=df.index[df.iloc[:,5].isnull()]
    for i in index_with_nan:
        df.iloc[i, 5]=max_E # we replace the nan by the maximum found energy


    y=list(df["- ln(Pobs/Pref)"])
    x=list(df["Xinf"])

    x=np.array(x)
    y=np.array(y)
    plt.plot(x, y)
    plt.xlabel("Distance (Angström)")
    plt.ylabel("- ln(Pobs/Pref)")
    plt.title(name)
    plt.savefig(runDir + "/results/statistical_potential/figures/Pyle/avg_statistical_pot_ " + name + ".png")
    plt.close()

def stat_potential_pyle():
    pyle = ["P-P", "P-C1'", "P-C4'", "C1'-C1'", "C4'-C4'"]
    nt = ["A", "C", "G", "U"]
    for pair in pyle:
        type_list=[]
        for res1 in nt:
            for res2 in nt:
                setproctitle(f"RNANet statistics.py stat_potential_pyle({pair}, {res1}{res2})")
                type_list.append(f"Distance {pair} between {res1}{res2}")
                averaging(type_list, f"{pair}")
                for dist in type_list:
                    potential_dist(runDir + '/results/geometry/json/' + dist, runDir + f'/results/statistical_potential/json/Pyle/avg_{pair}')
                    courbe_stat_pot(runDir + '/results/statistical_potential/ratio/Pyle/Statistical potential ' + dist + '.csv')
                    setproctitle(f"RNANet statistics.py stat_potential_pyle({pair}, {res1}{res2}) finished")


@trace_unhandled_exceptions
def measures_heavy_atoms(name, s, thr_idx):
    """
    create a list of all possible pairs of atoms among the 85 types of atoms
    and of all classes of 5 Angstroms intervals between 0 and 150 (and >150)
    Calculate the distance between the different pairs
    and count the number of occurrences in each distance class
    """

    if (path.isfile(runDir + '/results/geometry/all-atoms/distances_classes/distances_classes_occur_'+name+'.csv')):
        return

    #85 types of atoms
    A=['AP', 'AOP1', 'AOP2', "AO5'", "AC5'", "AC4'", "AO4'", "AC3'", "AO3'", "AC2'", "AO2'", "AC1'", 'AN9', 'AC8', 'AN7', 'AC5', 'AC6', 'AN6', 'AN1', 'AC2', 'AN3', 'AC4']
    C=['CP', 'COP1', 'COP2', "CO5'", "CC5'", "CC4'", "CO4'", "CC3'", "CO3'", "CC2'", "CO2'", "CC1'", 'CN1', 'CC2', 'CO2', 'CN3', 'CC4', 'CN4', 'CC5', 'CC6']
    G=['GP', 'GOP1', 'GOP2', "GO5'", "GC5'", "GC4'", "GO4'", "GC3'", "GO3'", "GC2'", "GO2'", "GC1'", 'GN9', 'GC8', 'GN7', 'GC5', 'GC6', 'GO6', 'GN1', 'GC2', 'GN2', 'GN3', 'GC4']
    U=['UP', 'UOP1', 'UOP2', "UO5'", "UC5'", "UC4'", "UO4'", "UC3'", "UO3'", "UC2'", "UO2'", "UC1'", 'UN1', 'UC2', 'UO2', 'UN3', 'UC4', 'UO4', 'UC5', 'UC6']

    setproctitle(f"RNANet statistics.py Worker {thr_idx+1} measures_heavy_atoms({name})")

    l=A+C+G+U
    comb=[]
    for i in combinations_with_replacement(l,2):
        # gives all the possible pairs (only once each) from the list of atoms -> 3655 pairs
        comb.append(i)

    list_comb=[]
    for x in comb:
        x=list(x)
        x=x[0]+'-'+x[1]
        list_comb.append(x)

    classes=[]
    # list for all classes of 5 Angstroms intervals between 0 and 150 (and >150)
    for i in range(0, 150, 5):
        classes.append([i, i+5])
    classes.append([150, 300])
    occurences=[]
    occurences=3655*[32*[0]] #3655 atom-pair types, 31 distance classes
    for i in range(len(occurences)):
        occurences[i]=[list_comb[i]]+31*[0] # 1 sublist per atom_pair type


    chain = next(s[0].get_chains())
    coord_list=[]
    resseq=[]
    atom_types=[]
    for x in l:
        atom_types.append([x, 0])

    for res in chain:
        if res.get_resname() in ["A", "C", "G", "U"]:
            resseq.append(list(res.get_id())[1])
            # list of residue numbers
            for atom in res:
                coord_list.append([list(res.get_id())[1], res.get_resname(), atom.get_name(), atom.get_coord()])
                for i in range(len(atom_types)):
                    if res.get_resname()+atom.get_name()==atom_types[i][0]:
                        # count the number of atoms of each type
                        atom_types[i][1]=atom_types[i][1]+1

    nb_atom_types=pd.DataFrame(atom_types, columns=["atom", "count"])
    nb_atom_types.to_csv(runDir + '/results/geometry/all-atoms/atom_count/atom_count_'+name+'.csv')

    # creates a dataframe with the coordinates of the atoms, their names,
    # and the names and positions of the corresponding residues
    df=pd.DataFrame(coord_list, columns=["res_num", "res_name", "atom_name", "atom_coord"])

    # now, calculate the distances between all pairs of atoms along the chain
    for i in tqdm(range(1, max(df["res_num"])+1), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {name} measures_heavy_atoms", unit="res", leave=False):
        if i in resseq:
            res1=df[df["res_num"]==i]
            res_name1=res1["res_name"][res1.index[0]]
            for atom1 in res1.index:
                coor1=res1["atom_coord"][atom1]
                atom_name1=res1["atom_name"][atom1]

                for j in range(i+1, max(df["res_num"])+1):
                    if j in resseq:
                        res2=df[df["res_num"]==j]
                        if res2.shape[0]==0:
                            return
                        res_name2=res2["res_name"][res2.index[0]]
                        for atom2 in res2.index:
                            coor2=res2["atom_coord"][atom2]
                            atom_name2=res2["atom_name"][atom2]
                            # calculation of the distance between the 2 atoms
                            dist=get_euclidian_distance(coor1, coor2)

                            for x in range(len(classes)):
                                    # we seek to which class this distance belongs
                                    if classes[x][0]<= dist< classes[x][1] :
                                        for k in occurences:
                                            if res_name1+atom_name1+'-'+res_name2+atom_name2 == k[0] or res_name2+atom_name2+'-'+res_name1+atom_name1 == k[0]:
                                                # add 1 to the box corresponding to the type of distance and class identified
                                                k[x+1]=k[x+1]+1

    classes_str=[str(c) for c in classes]
    # creation of the dataframe: types of pairs of atoms in rows and classes in columns
    df_occur = pd.DataFrame(occurences, columns=["atom_pair_type"] + classes_str)
    # save this
    df_occur.to_csv(runDir + "/results/geometry/all-atoms/distances_classes/distances_classes_occur_" + name + ".csv")
@trace_unhandled_exceptions
def count_occur_atom_dist(fpath, outfilename):
    """
    After having calculated the number of occurrences of the distances between pairs of atoms
    sorted by distance class and by type of pair for each RNA chain,
    we add these occurrences one by one to obtain a single dataframe with the total number of occurrences
    """

    setproctitle(f"Addition of occurences of {fpath}")
    liste=os.listdir(fpath)

    df_tot = pd.read_csv(os.path.abspath(fpath + liste.pop()))

    for f in range(len(liste)):
        df = pd.read_csv(os.path.abspath(fpath + liste.pop()))
        for i in range(df.shape[0]):
            for j in range(2, df.shape[1]):
                df_tot.iloc[i, j]=df_tot.iloc[i, j] + df.iloc[i, j]

    df_tot.to_csv(fpath+outfilename)

    setproctitle(f"Addition of occurences of {fpath} finished")

@trace_unhandled_exceptions
def mole_fraction(fpath, outfilename):
    """
    Calculation of the mole fraction of each type of atom within the set of structures
    """
    setproctitle(f"Calculation of mole fractions of {fpath}")
    liste=os.listdir(fpath)


    df_tot = pd.read_csv(os.path.abspath(fpath + liste.pop()))
    del df_tot["Unnamed: 0"]

    for f in range(len(liste)):
        df = pd.read_csv(os.path.abspath(fpath + liste.pop()))
        del df["Unnamed: 0"]
        for i in range(df.shape[0]):
                df_tot.iloc[i, 1]=df_tot.iloc[i, 1] + df.iloc[i, 1]

    total=sum(list(df_tot["count"]))
    fract=[]
    for i in range(df_tot.shape[0]):

        fract.append(df_tot.iloc[i, 1]/total)
    df_tot["mole_fraction"]=fract
    # file_list=os.listdir(fpath)
    # file_list=[fpath+x for x in file_list]
    # for f in file_list: #after processing, deletion of csv by structure
    #     os.remove(f)
    df_tot.to_csv(fpath+outfilename)

    setproctitle(f"Calculation of mole fractions of {fpath} finished")

@trace_unhandled_exceptions
def compute_ratio_from_csv(fpath, avg_file, qch_file):
    """
    Calculation of observed and reference probabilities
    according to the methods chosen to establish the reference state
    Then calculation of the Pobs / Pref ratio
    """
    setproctitle("Compute ratio from csv")
    df = pd.read_csv(fpath)
    del df['Unnamed: 0']
    del df['Unnamed: 0.1']

    # calculation of the reference probability for each distance class (averaging method)
    # if method=averaging
    pref_avg_list=[]
    sommes=[]
    s_tot=0
    for j in range(1, df.shape[1]):
        s=0
        for i in range(df.shape[0]):
            s=s+df.iloc[i,j]
        sommes.append(s)
    s_tot=sum(sommes)
    for s in sommes:
        pref_avg_list.append(s/s_tot)


    df_bis=df.copy()
    # if method=averaging
    for i in range(df.shape[0]):
        for j in range(1, df.shape[1]):
            # calculation of the observed probability for each atom_pair type in each distance class
            df_bis.iloc[i,j]=df.iloc[i,j]/(sum(df.iloc[i, k] for k in range(1, df.shape[1])))
            # ratio between the observed probability and the reference probability
            df_bis.iloc[i,j]=df_bis.iloc[i,j]/pref_avg_list[j-1]


    # if method=quasi-chemical
    df_ter=df.copy()
    atom_count=pd.read_csv(runDir + '/results/geometry/all-atoms/atom_count/atom_count.csv')
    for i in range(df.shape[0]):
        x=[] # mole fractions
        for k in range(atom_count.shape[0]):
            # calculate the mole fractions of the atom corresponding to the pair in row i
            if atom_count.at[k, 'atom']==df.iloc[i, 0].split('-')[0] or atom_count.at[k, 'atom']==df.iloc[i, 0].split('-')[1]:
                x.append(atom_count.at[k, 'mole_fraction'])

        for j in range(1, df.shape[1]):
            if len(x)==2:
                df_ter.iloc[i, j]=df.iloc[i,j]/(x[0]*x[1]*sommes[j-1]) # ratio for qchA method (Nijobs(r)/xi*xj*Nobs(r))
            if len(x)==1:
                df_ter.iloc[i, j]=df.iloc[i,j]/(x[0]*x[0]*sommes[j-1])
    df_bis.to_csv(runDir + '/results/statistical_potential/ratio/all-atoms/' + avg_file)
    df_ter.to_csv(runDir + '/results/statistical_potential/ratio/all-atoms/' + qch_file)

    setproctitle("Compute ratio from csv finished")

@trace_unhandled_exceptions
def sql_new_table(conn):
    cur = conn.cursor()
    sql = """ CREATE TABLE IF NOT EXISTS all_atoms (
                pot_id          INTEGER PRIMARY KEY NOT NULL,
                atom_pair       VARCHAR(10) NOT NULL,
                distance_bin    VARCHAR(15),
                avg_ratio_pobs_pref REAL,
                qch_ratio_pobs_pref       REAL,
                UNIQUE (atom_pair, distance_bin)
            );
         """
    cur.execute(sql)
    # conn.executescript(
    #     """ CREATE TABLE IF NOT EXISTS stat_potential (
    #             pot_id INTEGER NOT NULL PRIMARY KEY,
    #             res1            CHAR(1),
    #             atom1           CHAR(4),
    #             res2            CHAR(1),
    #             atom2           CHAR(4),
    #             method          VARCHAR(20),
    #             distance_bin    VARCHAR(15),
    #             ratio_pobs_pref REAL,
    #             log_ratio       REAL
    #         );
    #      """)
    # CREATE TABLE IF NOT EXISTS pyle (
    #             pot_id          INTEGER PRIMARY KEY NOT NULL,
    #             atom_pair       VARCHAR(10) NOT NULL,
    #             distance_bin    VARCHAR(15),
    #             avg_ratio_pobs_pref REAL,
    #             UNIQUE (atom_pair, distance_bin)
    #         );
    conn.commit()

    #cur.close()

    conn.execute("pragma journal_mode=wal")
    #conn.close()
@trace_unhandled_exceptions
def save_into_database():
    setproctitle("Saving statistical potentials(avg, qch) into database")
    df_avg = pd.read_csv(runDir + '/results/statistical_potential/ratio/all-atoms/avg_ratio_pobs_pref.csv')
    df_qch = pd.read_csv(runDir + '/results/statistical_potential/ratio/all-atoms/qch_ratio_pobs_pref.csv')
    ratio_list=[]
    del df_avg['Unnamed: 0']
    del df_qch['Unnamed: 0']
    for i in range(df_avg.shape[0]):
        for j in range(1,df_avg.shape[1]):

            ratio_list.append([df_avg.iloc[i,0], df_avg.columns[j], df_avg.iloc[i, j], df_qch.iloc[i,j]])


    with sqlite3.connect(runDir + "/results/Stat_potential.db", timeout=20.0) as conn:
        conn.execute('pragma journal_mode=wal') # Allow multiple other readers to ask things while we execute this writing query
        # We use the REPLACE keyword to get the latest information
        sql_execute(conn, """INSERT OR REPLACE INTO all_atoms (atom_pair, distance_bin, avg_ratio_pobs_pref, qch_ratio_pobs_pref )
                                VALUES (?, ?, ?, ?);""",
                    many=True,
                    data=ratio_list
                    )
    setproctitle("Saving statistical potentials(avg, qch) into database finished")
@trace_unhandled_exceptions
def stat_potential(f, method):
    df=pd.read_csv(f)
    del df['Unnamed: 0']
    df.set_index('atom_pair_type', inplace=True)
    df=df.T
    setproctitle(f"RNANet statistics.py stat_potential({method})")
    for pair in df.columns:
        c=df[pair].tolist()
        new=[]
        for x in c:
            if x!=0.0 and not np.isnan(x):
                new.append(-log(x))
                new.append(-log(x))
            if x==0.0:
                new.append(0.0)
                new.append(0.0)
            if np.isnan(x):
                new.append(0.0)
                new.append(0.0)
        abs=[]

        for i in range(0, 150, 5):
            abs.append(i)
            abs.append(i+5)
        abs.append(150)
        abs.append(300)

        x=abs
        y=new
        x=np.array(x)
        y=np.array(y)
        plt.plot(x, y)
        plt.xlabel('Distance')
        plt.ylabel("- ln(Pobs/Pref)")
        plt.title(f'Statistical potential of {pair} distance ({method} method)')
        plt.savefig(runDir + f"/results/statistical_potential/figures/all-atoms/{method}_method/{method}_statistical_pot_{pair}.png")
        plt.close()

    setproctitle(f"RNANet statistics.py stat_potential({method}) finished")


if __name__ == "__main__":
    print("This file is not supposed to be run directly. Run statistics.py instead.")