Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Ludovic PLATON
/
IRSOM
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Ludovic PLATON
2018-03-22 23:59:25 +0000
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
a51525c0ec364b93286c531f1855bcdcee739c4c
a51525c0
1 parent
0d170666
Improve classifier in order to handle reject
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
103 additions
and
69 deletions
scripts/SLSOM/SOM.py
scripts/SLSOM/SSOM.py
scripts/SLSOM/performance_measure.py
scripts/SLSOM/util.py
scripts/train.py
scripts/SLSOM/SOM.py
View file @
a51525c
...
...
@@ -115,19 +115,19 @@ class SOM_basic(object):
res
=
tf
.
exp
(
-
gamma
*
0.5
*
tf
.
pow
(
dist
,
2.0
))
return
res
def
sim2units_neighbour
(
self
,
data
,
units
=
None
):
if
units
is
None
:
units
=
self
.
units
dist
=
self
.
dist2units
(
data
,
units
)
# max_dist = tf.reduce_max(tf.sqrt(tf.reduce_sum(tf.pow((tf.expand_dims(units,0) - tf.expand_dims(units,1)),2.0),axis=2)))
# gamma = 1.0 / (max_dist/np.sqrt(2.0*self.ulen))
# res = 1.0 / (dist+1.0)
gamma
=
1.0
res
=
tf
.
exp
(
-
gamma
*
tf
.
pow
(
dist
,
2.0
))
bmus
=
tf
.
argmin
(
dist
,
1
)
dist_bmu
=
self
.
dist_bmus_op
(
bmus
)
neighbour
=
self
.
R
(
dist_bmu
,
tf
.
cast
(
self
.
learning_rate
(
self
.
it
),
tf
.
float64
)
*
max
(
self
.
dim
)
/
2.0
)
return
res
*
tf
.
transpose
(
neighbour
)
#
def sim2units_neighbour(self,data,units=None):
#
if units is None:
#
units = self.units
#
dist = self.dist2units(data,units)
#
#
max_dist = tf.reduce_max(tf.sqrt(tf.reduce_sum(tf.pow((tf.expand_dims(units,0) - tf.expand_dims(units,1)),2.0),axis=2)))
#
#
gamma = 1.0 / (max_dist/np.sqrt(2.0*self.ulen))
#
#
res = 1.0 / (dist+1.0)
#
gamma = 1.0
#
res = tf.exp(-gamma*tf.pow(dist,2.0))
#
bmus = tf.argmin(dist,1)
#
dist_bmu = self.dist_bmus_op(bmus)
#
neighbour = self.R(dist_bmu,tf.cast(self.learning_rate(self.it),tf.float64)*max(self.dim)/2.0)
#
return res*tf.transpose(neighbour)
def
dist2units
(
self
,
data
,
units
=
None
):
if
units
is
None
:
...
...
scripts/SLSOM/SSOM.py
View file @
a51525c
...
...
@@ -21,12 +21,14 @@ def init_SLSOM(path,som):
return
tmp
class
SLSOM
(
object
):
def
__init__
(
self
,
som
,
nb_label
,
loss_type
=
'cross_entropy'
,
verbose
=
True
):
def
__init__
(
self
,
som
,
nb_label
,
loss_type
=
'cross_entropy'
,
alpha0
=
1.0
,
alpha1
=
0.6
,
verbose
=
True
):
self
.
tf_object
=
som
.
tf_object
self
.
ulen
=
som
.
ulen
self
.
nb_label
=
nb_label
self
.
som
=
som
self
.
loss_type
=
loss_type
self
.
alpha0
=
alpha0
self
.
alpha1
=
alpha1
self
.
verbose
=
verbose
with
self
.
tf_object
.
graph
.
as_default
():
self
.
W
=
tf
.
Variable
(
tf
.
random_normal
([
self
.
ulen
,
self
.
nb_label
],
dtype
=
tf
.
float64
))
...
...
@@ -40,13 +42,12 @@ class SLSOM(object):
self
.
it
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int32
)
self
.
update_it
=
self
.
it
.
assign_add
(
1
)
self
.
data
=
self
.
som
.
sim2units
(
self
.
som
.
data2pred
)
self
.
datapred
=
tf
.
one_hot
(
self
.
som
.
bmu_finder
(
self
.
som
.
data2pred
,
self
.
som
.
units
),
self
.
som
.
ulen
,
dtype
=
tf
.
float64
)
#
self.datapred = tf.one_hot(
#
self.som.bmu_finder(self.som.data2pred,self.som.units),
#
self.som.ulen,
#
dtype=tf.float64
#
)
self
.
data_size
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
[
1
])
self
.
lambda_penality
=
tf
.
placeholder
(
tf
.
float64
,
shape
=
[
1
])
...
...
@@ -58,7 +59,10 @@ class SLSOM(object):
self
.
update_it_som
=
self
.
som
.
it
.
assign_add
(
1
)
def
learning_rate
(
self
,
it
):
return
1.0
-
tf
.
cast
(
self
.
it
,
tf
.
float64
)
/
(
tf
.
cast
(
self
.
it_max
,
tf
.
float64
))
#tmp = 1.0/(tf.cast(self.it,tf.float64)+1.0)
#return tf.Print(tmp,[tmp],"IT : ")
tmp
=
1.0
-
tf
.
cast
(
self
.
it
,
tf
.
float64
)
/
(
tf
.
cast
(
self
.
it_max
,
tf
.
float64
))
return
tmp
def
save
(
self
,
path
):
W
=
self
.
get_W
()
...
...
@@ -82,36 +86,44 @@ class SLSOM(object):
dist
=
self
.
som
.
dist2units
(
self
.
som
.
data2pred
)
bmus
=
tf
.
argmin
(
dist
,
1
)
dist_bmu
=
self
.
som
.
dist_bmus_op
(
bmus
)
neighbour
=
self
.
som
.
R
(
dist_bmu
,
tf
.
cast
(
self
.
learning_rate
(
self
.
it
),
tf
.
float64
)
*
max
(
self
.
som
.
dim
)
/
2.0
)
neighbour
=
self
.
som
.
R
(
dist_bmu
,
(
self
.
alpha1
+
(
self
.
alpha0
-
self
.
alpha1
)
*
tf
.
cast
(
self
.
learning_rate
(
self
.
it
),
tf
.
float64
))
*
max
(
self
.
som
.
dim
)
)
x
=
x
*
tf
.
transpose
(
neighbour
)
y
=
tf
.
matmul
(
x
,
self
.
W
)
+
self
.
biases
y_
=
tf
.
one_hot
(
if
self
.
loss_type
==
'cross_entropy'
:
self
.
loss
=
tf
.
reduce_mean
(
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
logits
=
y
,
labels
=
self
.
labels
))
else
:
y_
=
tf
.
one_hot
(
self
.
labels
,
self
.
nb_label
,
dtype
=
tf
.
float64
)
if
self
.
loss_type
==
'cross_entropy'
:
loss
=
tf
.
reduce_mean
(
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
logits
=
y
,
labels
=
self
.
labels
))
else
:
loss
=
0.5
*
tf
.
reduce_mean
(
tf
.
pow
(
tf
.
nn
.
softmax
(
y
)
-
y_
,
2.0
))
self
.
loss
=
0.5
*
tf
.
reduce_mean
(
tf
.
pow
(
tf
.
nn
.
softmax
(
y
)
-
y_
,
2.0
))
regularizer
=
tf
.
contrib
.
layers
.
l2_regularizer
(
self
.
lambda_penality
)
penality
=
regularizer
(
self
.
W
)
optimizer
=
tf
.
train
.
GradientDescentOptimizer
(
0.3
*
self
.
learning_rate
(
self
.
it
))
optimizer2
=
tf
.
train
.
GradientDescentOptimizer
(
self
.
learning_rate
(
self
.
it
))
loss2
=
tf
.
add
(
loss
,
penality
)
applied
=
optimizer
.
minimize
(
loss2
,
var_list
=
[
self
.
W
,
self
.
biases
])
applied2
=
optimizer2
.
minimize
(
loss2
,
var_list
=
[
self
.
som
.
units
])
return
(
applied
,
applied2
)
optimizer
=
tf
.
train
.
GradientDescentOptimizer
(
0.1
*
self
.
learning_rate
(
self
.
it
))
#optimizer2 = tf.train.GradientDescentOptimizer(0.3*self.learning_rate(self.it))
loss2
=
tf
.
add
(
self
.
loss
,
penality
)
applied
=
optimizer
.
minimize
(
loss2
,
var_list
=
[
self
.
W
,
self
.
biases
,
self
.
som
.
units
])
#applied2 = optimizer2.minimize(loss2,var_list=[])
#return (applied,applied2)
return
applied
def
proba_class_op
(
self
):
x
=
self
.
datapred
# x = self.datapred
x
=
self
.
data
dist
=
self
.
som
.
dist2units
(
self
.
som
.
data2pred
)
bmus
=
tf
.
argmin
(
dist
,
1
)
dist_bmu
=
self
.
som
.
dist_bmus_op
(
bmus
)
neighbour
=
self
.
som
.
R
(
dist_bmu
,
self
.
alpha1
*
max
(
self
.
som
.
dim
)
/
2.0
)
x
=
x
*
tf
.
transpose
(
neighbour
)
y
=
tf
.
matmul
(
x
,
self
.
W
...
...
@@ -127,24 +139,39 @@ class SLSOM(object):
def
get_biases
(
self
):
return
self
.
tf_object
.
run
(
self
.
biases
)
def
train
(
self
,
data
,
labels
,
max_it
=
2000
,
batch_size
=
10
,
penality
=
0.001
):
it
=
np
.
array
([
max_it
])
def
train
(
self
,
data
,
labels
,
nb_it
=
2000
,
batch_size
=
10
,
penality
=
0.001
):
nb_data
=
data
.
shape
[
0
]
pen
=
np
.
array
([
penality
])
data2
=
data
for
i
in
range
(
max_it
):
loss_old
=
0.0
run
=
True
it
=
0
while
run
:
if
self
.
verbose
:
print
(
"It SLSOM: "
+
str
(
i
))
idx
=
np
.
random
.
randint
(
nb_data
,
size
=
batch_size
)
self
.
tf_object
.
run
(
self
.
train_op
,
print
(
"It SLSOM: "
+
str
(
it
))
# idx = np.random.randint(nb_data,size=batch_size)
# _, loss = self.tf_object.run([self.train_op,self.loss],
# feed_dict={
# self.som.data2pred:data2[idx,:],
# self.data_size:np.array([batch_size]),
# self.labels:labels[idx],
# self.lambda_penality:pen,
# self.it_max:nb_it
# })
_
,
loss
=
self
.
tf_object
.
run
([
self
.
train_op
,
self
.
loss
],
feed_dict
=
{
self
.
som
.
data2pred
:
data2
[
idx
,:],
self
.
data_size
:
np
.
array
([
batch_size
]),
self
.
labels
:
labels
[
idx
],
self
.
som
.
data2pred
:
data
,
self
.
labels
:
labels
,
self
.
lambda_penality
:
pen
,
self
.
it_max
:
max
_it
self
.
it_max
:
nb
_it
})
self
.
tf_object
.
run
(
self
.
update_it
)
delta_loss
=
np
.
absolute
(
loss
-
loss_old
)
if
self
.
verbose
:
print
(
"Diff loss: "
+
str
(
delta_loss
))
if
delta_loss
<
np
.
power
(
10.0
,
-
6.0
)
or
not
it
<
nb_it
:
run
=
False
it
=
self
.
tf_object
.
run
(
self
.
update_it
)
loss_old
=
loss
def
predict
(
self
,
data
):
pred
,
proba
=
self
.
tf_object
.
run
([
self
.
prediction
,
self
.
proba_data_op
],
...
...
scripts/SLSOM/performance_measure.py
View file @
a51525c
...
...
@@ -12,10 +12,10 @@ class Performance:
def
Compute_stat
(
self
):
positive
=
(
self
.
arr
[:,
0
]
==
1
)
negative
=
(
self
.
arr
[:,
0
]
==
0
)
self
.
TP
=
float
(
np
.
sum
(
self
.
arr
[
positive
,
1
]))
self
.
TP
=
float
(
np
.
sum
(
self
.
arr
[
positive
,
1
]
==
1
))
self
.
TN
=
float
(
np
.
sum
(
self
.
arr
[
negative
,
1
]
==
0
))
self
.
FP
=
float
(
np
.
sum
(
self
.
arr
[
positive
,
1
]
==
0
))
self
.
FN
=
float
(
np
.
sum
(
self
.
arr
[
negative
,
1
]))
self
.
FN
=
float
(
np
.
sum
(
self
.
arr
[
negative
,
1
]
==
1
))
#More advanced measure
def
Sensitivity
(
self
):
...
...
@@ -24,9 +24,6 @@ class Performance:
def
Specificity
(
self
):
return
self
.
TN
/
(
self
.
TN
+
self
.
FP
)
def
Precision
(
self
):
return
self
.
TP
/
(
self
.
TP
+
self
.
FP
)
# Advanced measure
def
Accuracy
(
self
):
return
(
self
.
TP
+
self
.
TN
)
/
(
self
.
TP
+
self
.
FP
+
self
.
FN
+
self
.
TN
)
...
...
@@ -43,7 +40,6 @@ class Performance:
return
{
"Sensitivity"
:
self
.
Sensitivity
(),
"Specificity"
:
self
.
Specificity
(),
"Precision"
:
self
.
Precision
(),
"Accuracy"
:
self
.
Accuracy
(),
"F1"
:
self
.
F1
(),
"MCC"
:
self
.
MCC
(),
...
...
scripts/SLSOM/util.py
View file @
a51525c
...
...
@@ -9,6 +9,7 @@ import os
import
pandas
as
pd
from
plotnine
import
*
from
functools
import
partial
from
concurrent.futures
import
ThreadPoolExecutor
'''
Files checking
...
...
@@ -27,16 +28,23 @@ Import data
'''
def
import_ncRNA
(
path
):
file_order
=
[
"CP.txt"
,
"ORF.txt"
,
"KMER3.txt"
,
"KMER6.txt"
]
df
=
pd
.
read_csv
(
path
+
file_order
[
0
],
sep
=
","
,
header
=
None
)
for
i
in
range
(
1
,
len
(
file_order
)):
tmp
=
pd
.
read_csv
(
path
+
file_order
[
i
],
sep
=
","
,
header
=
None
)
df
=
df
.
merge
(
tmp
,
on
=
0
)
file_order
=
[
"CB.txt"
,
"ORF.txt"
,
"KMER3.txt"
]
#,"KMER6.txt"]
#df = pd.read_csv(path+file_order[0],sep=",",header=None)
df_raw
=
[]
with
ThreadPoolExecutor
(
max_workers
=
4
)
as
tp
:
for
x
in
file_order
:
df_raw
.
append
(
tp
.
submit
(
pd
.
read_csv
,
path
+
x
,
sep
=
","
,
header
=
None
))
df
=
df_raw
[
0
]
.
result
()
for
i
in
range
(
1
,
len
(
file_order
)):
#tmp = pd.read_csv(path+file_order[i],sep=",",header=None)
tmp
=
df_raw
[
i
]
.
result
()
df
=
df
.
merge
(
tmp
,
on
=
0
)
df
=
df
.
fillna
(
0
)
data
=
df
.
iloc
[:,
1
:]
.
values
data_names
=
df
.
iloc
[:,
0
]
.
values
# Modification
data
[:,
np
.
arange
(
4
)]
=
0.25
*
data
[:,
np
.
arange
(
4
)]
data
[:,
4
]
=
np
.
exp
(
-
data
[:,
4
]
/
200
)
#
data[:,np.arange(4)] = 0.25*data[:,np.arange(4)]
#
data[:,4] = np.exp(-data[:,4]/200)
return
(
data
,
data_names
)
'''
...
...
@@ -70,8 +78,8 @@ def plot_weights(units,m,n,name):
features
=
[
np
.
arange
(
4
),
np
.
arange
(
4
,
6
),
np
.
arange
(
6
,
6
+
4
**
3
),
np
.
arange
(
6
+
4
**
3
,
units
.
shape
[
1
])
#
np.arange(6,6+4**3),
#
np.arange(6+4**3, units.shape[1])
]
for
l
,
f
in
enumerate
(
features
):
dico
=
[]
...
...
scripts/train.py
View file @
a51525c
""" Train IRSOM model on ncRNA.
Usage:
train.py --featurer=<path> --output=<path> (-c <coding>)... (-n <noncoding>)... [--dim0=<value> --dim1=<value> --batch_size=<value> --penality=<value> --keep_features]
train.py --featurer=<path> --output=<path> (-c <coding>)... (-n <noncoding>)... [--dim0=<value> --dim1=<value> --batch_size=<value> --penality=<value> --keep_features
--verbose
]
train.py (-h | --help)
train.py --version
...
...
@@ -12,9 +12,10 @@ Options:
-n <noncoding> Path for one or multiple fasta file(s) containing noncoding transcript.
--dim0=<value> SOM dimension 0 (by default at 3)
--dim1=<value> SOM dimension 1 (by default at 3).
--batch_size=<value> the size of the batch given at each iteration (by default at 10).
--batch_size=<value> the size of the batch given at each iteration (by default at 10
0
).
--penality=<value> Coefficient of the regularization term (by default at 0.001).
--keep_features Keep the features computed in the "output" folder.
--verbose Produce more output
"""
from
docopt
import
docopt
...
...
@@ -31,10 +32,11 @@ def main():
arguments
=
docopt
(
__doc__
,
version
=
"train IRSOM 1.0"
)
output_path
=
os
.
path
.
expanduser
(
os
.
path
.
expandvars
(
arguments
[
"--output"
]))
featurer_path
=
os
.
path
.
expanduser
(
os
.
path
.
expandvars
(
arguments
[
"--featurer"
]))
map_size_m
=
int
(
arguments
[
"--dim0"
])
if
not
arguments
[
"--dim0"
]
is
None
else
3
map_size_n
=
int
(
arguments
[
"--dim1"
])
if
not
arguments
[
"--dim0"
]
is
None
else
3
batch_size
=
float
(
arguments
[
"--batch_size"
])
if
not
arguments
[
"--batch_size"
]
is
None
else
10
map_size_m
=
int
(
arguments
[
"--dim0"
])
if
not
arguments
[
"--dim0"
]
is
None
else
4
map_size_n
=
int
(
arguments
[
"--dim1"
])
if
not
arguments
[
"--dim0"
]
is
None
else
4
batch_size
=
float
(
arguments
[
"--batch_size"
])
if
not
arguments
[
"--batch_size"
]
is
None
else
10
00
penality
=
float
(
arguments
[
"--penality"
])
if
not
arguments
[
"--penality"
]
is
None
else
0.001
verbose
=
arguments
[
"--verbose"
]
#Compute features
path_feature_root
=
[
output_path
+
"features/coding"
,
output_path
+
"features/noncoding"
]
...
...
@@ -76,12 +78,12 @@ def main():
data
=
np
.
concatenate
(
data_coding_list
+
data_noncoding_list
,
axis
=
0
)
label
=
np
.
repeat
([
0
,
1
],[
nb_coding
,
nb_noncoding
])
som
=
SOM
(
m
=
map_size_m
,
n
=
map_size_n
,
unit_width
=
data
.
shape
[
1
],
verbose
=
Fal
se
)
som
=
SOM
(
m
=
map_size_m
,
n
=
map_size_n
,
unit_width
=
data
.
shape
[
1
],
verbose
=
verbo
se
)
ssom
=
SLSOM
(
som
,
2
,
verbose
=
Fal
se
)
ssom
=
SLSOM
(
som
,
2
,
verbose
=
verbo
se
)
ssom
.
tf_object
.
initialize
()
ssom
.
train
(
data
,
label
,
data
.
shape
[
0
],
batch_size
,
penality
)
ssom
.
train
(
data
,
label
,
penality
=
penality
)
print
(
"SLSOM learned"
)
check_dir
(
output_path
+
"SOM/"
)
...
...
@@ -90,6 +92,7 @@ def main():
ssom
.
save
(
output_path
+
"SLSOM/"
)
y
,
p
=
ssom
.
predict
(
data
)
np
.
savetxt
(
output_path
+
"proba.txt"
,
np
.
array
(
p
))
rep
,
_
=
som
.
repartition_map
(
data
,
label
)
plot_repartition
(
rep
,
map_size_m
,
map_size_n
,
output_path
+
"plot_repartition"
)
plot_density
(
label
,
p
,
output_path
+
"plot_density"
)
...
...
Please
register
or
login
to post a comment