Ludovic PLATON

Fix missing rejection threshold for prediction

1 """ Test SLSOM model on ncRNA. 1 """ Test SLSOM model on ncRNA.
2 2
3 Usage: 3 Usage:
4 - predict.py --featurer=<path> --file=<path_fasta> --model=<path> --output=<path> [--keep_features] 4 + predict.py --featurer=<path> --file=<path_fasta> --model=<path> --output=<path> [--reject=<value>] [--keep_features]
5 predict.py (-h | --help) 5 predict.py (-h | --help)
6 predict.py --version 6 predict.py --version
7 7
...@@ -11,6 +11,7 @@ Options: ...@@ -11,6 +11,7 @@ Options:
11 --file=<path> Path to the fasta file 11 --file=<path> Path to the fasta file
12 --model=<path> Path to the folder containing the model 12 --model=<path> Path to the folder containing the model
13 --output=<path> Path to the output dir 13 --output=<path> Path to the output dir
14 + --reject=<value> Rejection threshold.
14 --keep_features Keep the features computed in the "output" folder. 15 --keep_features Keep the features computed in the "output" folder.
15 --version 16 --version
16 17
...@@ -27,7 +28,7 @@ from SLSOM.SOM import * ...@@ -27,7 +28,7 @@ from SLSOM.SOM import *
27 from SLSOM.util import * 28 from SLSOM.util import *
28 29
29 def save_pred(som,data,data_names,y,proba,bmu,path): 30 def save_pred(som,data,data_names,y,proba,bmu,path):
30 - y_label = ["Noncoding" if x==1 else "Coding" for x in y] 31 + y_label = ["Noncoding" if x==1 else if x==0 "Coding" else "Rejected" for x in y]
31 res = np.array([ 32 res = np.array([
32 [data_names[i],bmu[i],y_label[i]]+[proba[i,j] for j in range(proba.shape[1])] 33 [data_names[i],bmu[i],y_label[i]]+[proba[i,j] for j in range(proba.shape[1])]
33 for i in range(data.shape[0])]) 34 for i in range(data.shape[0])])
...@@ -66,6 +67,8 @@ def main(): ...@@ -66,6 +67,8 @@ def main():
66 print("Model loaded") 67 print("Model loaded")
67 y,proba = ssom.predict(data) 68 y,proba = ssom.predict(data)
68 y = np.array(y) 69 y = np.array(y)
70 + if not arguments["--reject"]:
71 + y[proba < float(arguments["--reject"])] = -1
69 bmu = np.array(som.get_BMUS(data)) 72 bmu = np.array(som.get_BMUS(data))
70 73
71 save_pred(som,data,data_names,y,proba,bmu,output_path) 74 save_pred(som,data,data_names,y,proba,bmu,output_path)
......