Name Last Update
..
BaseGraphClass.cc Loading commit data...
BaseGraphClass.h Loading commit data...
BaseGraphClass.o Loading commit data...
FeatureGenerator.h Loading commit data...
FlagsService.cc Loading commit data...
FlagsService.h Loading commit data...
FlagsService.o Loading commit data...
GraphClass.cc Loading commit data...
GraphClass.h Loading commit data...
GraphClass.o Loading commit data...
KQuickShift Loading commit data...
KQuickShift.cc Loading commit data...
LearningCurveROC.ps Loading commit data...
Makefile Loading commit data...
NSPDK Loading commit data...
NSPDK.cc Loading commit data...
NSPDK.doxyfile Loading commit data...
NSPDK.o Loading commit data...
NSPDK_FeatureGenerator.cc Loading commit data...
NSPDK_FeatureGenerator.h Loading commit data...
NSPDK_FeatureGenerator.o Loading commit data...
OpenBabelConverter.cpp Loading commit data...
OpenBabelConverter.h Loading commit data...
OpenBabelConverter.o Loading commit data...
README Loading commit data...
Utility.cc Loading commit data...
Utility.h Loading commit data...
Utility.o Loading commit data...
bursi.gspan Loading commit data...
bursi.target Loading commit data...
bursi.test.gspan Loading commit data...
bursi.test.gspan.prediction Loading commit data...
bursi.test.target Loading commit data...
bursi.train.gspan Loading commit data...
bursi.train.target Loading commit data...
gzstream.cpp Loading commit data...
gzstream.h Loading commit data...
gzstream.o Loading commit data...
log Loading commit data...
model Loading commit data...
perf Loading commit data...
svmsgdnspdk Loading commit data...
svmsgdnspdk.cpp Loading commit data...
svmsgdnspdk.o Loading commit data...
svmsgdnspdk.tgz Loading commit data...
vectors.cpp Loading commit data...
vectors.h Loading commit data...
vectors.o Loading commit data...
wrapper.h Loading commit data...
#-------------------------------------------------------------------------------
#Train-test cycle
./svmsgdnspdk -i bursi.train.gspan -t bursi.train.target -r 3 -d 8 -a TRAIN -m model
./svmsgdnspdk -i bursi.test.gspan -r 3 -d 8 -a TEST -m model
paste bursi.test.target bursi.test.gspan.prediction | awk '{print $1,$3}' | ./perf -APR -ROC -ACC -t 0 -PRF 2>/dev/null | awk '{printf("%s %s ",$1,$2)}END{printf("\n")}'
ACC 0.93504 PRF 0.93791 APR 0.97995 ROC 0.97822

#-------------------------------------------------------------------------------
#Cross validation cycle
./svmsgdnspdk -i bursi.gspan -t bursi.target -r 3 -d 8 -a CROSS_VALIDATION
cat bursi.gspan.cv_predictions  | awk '{print $2,$4}' | ./perf -ACC -PRF -APR -ROC -t 0 2>/dev/null | awk '{printf("%s %s ",$1,$2)}END{printf("\n")}'
ACC 0.92506 PRF 0.93409 APR 0.97962 ROC 0.97660

#-------------------------------------------------------------------------------
#Parameter optimization
./svmsgdnspdk -i bursi.gspan -t bursi.target -r 5 -d 10 -l 1e-10 -e 50 -c 5 -a PARAMETERS_OPTIMIZATION >/dev/null

#-------------------------------------------------------------------------------
#learning curve

LC=10; NUM_REP=10; \rm dat_lc; lcn=$((LC+1));for r in $(seq 1 $NUM_REP); do ./svmsgdnspdk -a LEARNING_CURVE -i bursi.gspan -t bursi.target -p $lcn -R $r | tee log_lc; for i in $(seq 1 $LC); do  dim=$(cat  bursi.gspan.lc_predictions_train_fold_$i | wc -l); echo -n "$dim " >>dat_lc; cat bursi.gspan.lc_predictions_train_fold_$i | awk '{print $2,$4}'  | ./perf -APR -ROC -ACC -t 0 -PRF 2>/dev/null | awk '{printf("%s %s ",$1,$2)}END{printf("\n")}' >>dat_lc; cat bursi.gspan.lc_predictions_test_fold_$i | awk '{print $2,$4}'  | ./perf -APR -ROC -ACC -t 0 -PRF 2>/dev/null | awk '{printf("%s %s ",$1,$2)}END{printf("\n")}' >>dat_lc; done; done
cat dat_lc | awk 'NR%2==1{printf("%s ",$0)}NR%2==0{print $0}' | column -t > dat


cat <<EOF >tmp_plot_command
set terminal postscript eps color enhanced "Helvetica" 11
set grid
set xlabel "Training set size"
set ylabel "Area Under ROC Curve"
set out 'LearningCurveROC.ps'
ftr(x)=atr-btr/(x+ctr)
fts(x)=ats-bts/(x+cts)
fit ftr(x) 'dat' u 1:9  via atr,btr,ctr
fit fts(x) 'dat' u 1:17  via ats,bts,cts
plot 'dat' u 1:9 t "" w p lt 1, '' u 1:17 t "" w p lt 2, ftr(x) t "Train" w l lt 1 lw 2, fts(x) t "Test"  w l lt 2 lw 2
EOF
gnuplot tmp_plot_command
evince LearningCurveROC.ps
 
#-------------------------------------------------------------------------------
#Embedding
#make increasingly permuted sequences
echo "abcdefghilmnopqrstuvz" > seq 
for j in $(seq 2 40); do \cp seq tmp; for i in $(seq 1 $j); do cat tmp  |  awk -v I=$i 'BEGIN{srand(I+600)}{l=length($0); i=int(rand()*(l-2)+1); j=int(rand()*(l-i-1)+i+1); print substr($0,0,i-1) substr($0,j,1) substr($0,i+1,j-i-1) substr($0,i,1) substr($0,j+1)}' > tmp2; \mv tmp2 tmp; done; cat tmp; done | tee dat.seq
./svmsgdnspdk -i dat.seq -f SEQUENCE -g DIRECTED -a EMBED -G 2 -N 3
#gnuplot data
dat=dat.seq
cat $dat| awk -v DIM=$(cat $dat | wc -l) 'NR<DIM/2{print 1}NR>=DIM/2{print -1}' > $dat.target
target=$dat.target
xrange=$(cat $dat.embed | awk '{print $1}' | sort -g | awk 'NR==1{b=$1} {e=$1} END{if(b>e){print (b-e)}else {print (e-b)}}')
yrange=$(cat $dat.embed | awk '{print $2}' | sort -g | awk 'NR==1{b=$1} {e=$1} END{if(b>e){print (b-e)}else {print (e-b)}}')
paste $dat.embed $dat.distortion $target | awk -v S=30 -v XRANGE=$xrange -v YRANGE=$yrange 'BEGIN{if(XRANGE<YRANGE){SCALE=S/XRANGE}else{SCALE=S/YRANGE}} {  print $1,$2,(1-$3)/SCALE,$4}' > $dat.plot
cat <<EOF >PLOT
set size ratio -1
plot '$dat.plot' u 1:(\$4==-1?\$2:1/0):3 t "" with circles lc rgb "orange" fs transparent solid 0.1, '' u 1:(\$4==1?\$2:1/0):3 t "" with circles lc rgb "blue" fs transparent solid 0.1, '' u  1:(\$4==-1?\$2:1/0) t "" w p pt 7 lc rgb 'red', '' u  1:(\$4==1?\$2:1/0) t "" w p pt 7 lc 0
pause (-1)
EOF
gnuplot PLOT

#note: to plot with shade of color proportional to confidence
./svmsgdnspdk -i bursi.gspan -t bursi.target -a CONFIDENCE -r 3 -d 8 -e 10 -c 3
./svmsgdnspdk -i bursi.gspan -a EMBED -r 3 -d 8
paste bursi.gspan.embed bursi.gspan.conf bursi.gspan.distortion | awk '$3==1' | tr '\t' ' ' | sort -k4,4n > datp
paste bursi.gspan.embed bursi.gspan.conf bursi.gspan.distortion | awk '$3==-1' | tr '\t' ' ' | sort -k4,4n > datn

cat <<EOF >PLOT
set size ratio -1
rgb(r,g,b) = int(r)*65536 + int(g)*256 + int(b)
s=50
plot 'datp' u 1:2:((1-\$7)/s):(rgb(0,0,\$4*255)) t "" with circles lc rgb variable fs transparent solid .3 noborder, 'datn' u 1:2:((1-\$7)/s):(rgb(\$4*255,0,0)) t "" with circles lt rgb variable fs transparent solid .3 noborder
pause (-1)
EOF
gnuplot PLOT