predict.py
2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
""" Test SLSOM model on ncRNA.
Usage:
predict.py --featurer=<path> --file=<path_fasta> --model=<path> --output=<path> [--reject=<value>] [--keep_features]
predict.py (-h | --help)
predict.py --version
Options:
-h --help Show help screen
--featurer=<path> Path to the featurer executable
--file=<path> Path to the fasta file
--model=<path> Path to the folder containing the model
--output=<path> Path to the output dir
--reject=<value> Rejection threshold.
--keep_features Keep the features computed in the "output" folder.
--version
"""
from docopt import docopt
import os
import shutil
import subprocess
import numpy as np
from SLSOM.SSOM import *
from SLSOM.SOM import *
from SLSOM.util import *
def save_pred(som,data,data_names,y,proba,bmu,path):
y_label = ["Noncoding" if x==1 else "Coding" if x==0 else "Rejected" for x in y]
res = np.array([
[data_names[i],bmu[i],y_label[i]]+[proba[i,j] for j in range(proba.shape[1])]
for i in range(data.shape[0])])
check_dir(path)
np.savetxt(path+"result.txt",res,header="Sequence_name\tBMU\tlabel"+"".join(["\tproba_class_"+str(i) for i in range(proba.shape[1])]),delimiter="\t",fmt="%s")
rep,_ = som.repartition_map(data,y)
plot_repartition(rep,som.dim[0],som.dim[1],path+"plot_repartition")
plot_density(np.array(y_label), proba,path+"plot_density.png")
def main():
arguments = docopt(__doc__,version="Predict IRSOM 1.0")
path_fasta = os.path.expanduser(os.path.expandvars(arguments["--file"]))
path_model = os.path.expanduser(os.path.expandvars(arguments["--model"]))
output_path = os.path.expanduser(os.path.expandvars(arguments["--output"]))
featurer_path = os.path.expanduser(os.path.expandvars(arguments["--featurer"]))
path_feature = output_path+"features/"
check_dir(path_feature)
w = subprocess.Popen(
[featurer_path,path_fasta,path_feature]
)
w.wait()
data,data_names = import_ncRNA(path_feature)
print("Import done")
check_dir(output_path)
#creation model
som = init_SOM(path_model+"SOM/")
ssom = init_SLSOM(path_model+"SLSOM/",som)
som.tf_object.initialize()
ssom.tf_object.initialize()
som.load(path_model+"SOM/")
ssom.load(path_model+"SLSOM/")
print("Model loaded")
y,proba = ssom.predict(data)
y = np.array(y)
if arguments["--reject"]:
diff = np.absolute(proba[:,0] - proba[:,1])
thresh = float(arguments["--reject"])
y[diff < thresh] = -1
bmu = np.array(som.get_BMUS(data))
save_pred(som,data,data_names,y,proba,bmu,output_path)
if not arguments["--keep_features"]:
shutil.rmtree(path_feature)
print("Prediction finish")
if __name__ == "__main__":
main()