################################################################################ ## Date: 10-26-2015 ## Purpose: Demonstrate jointrls software on the Auto-MPG Data. ## Usage: Should take a couple minutes to run. ################################################################################ ###### ## Load packages and set user defined flags ##### library(glmnet) source("http://www.stat.wvu.edu/~kjryan/jmlr2015/jt_code.R") scale.x=TRUE P1=!TRUE ##Set cars version ####### ## Set up data with median imputation, download from UCI repository. ####### dat<-read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data",F,na.strings="?") dim(dat) na.median<-function(x,...){ handle.na<-function(x){ #plug the median into missing values x[is.na(x)]<-median(x,na.rm=TRUE) return(x) } p<-dim(x) if(is.null(p)){ #special case: `x' is a vector return(handle.na(x)) } for(i in 1:p[2]){ if(!is.factor(x[,i]))# if `x' is a factor ignore it x[,i]<-handle.na(x[,i]) } return(x) } dat<-na.median(dat) if(P1){ mx<-dat[,8] x=dat[,-c(8,9)] yt=dat[,1] x=as.matrix(x[,-1]) L=which(mx>1) U=which(mx<2) }else{ mx<-dat[,2] x=dat[,-c(2,9)] yt=dat[,1] x=as.matrix(x[,-1]) L=which(mx<5) U=which(mx>4) } n=dim(x)[1] p=dim(x)[2] y=yt y[U]=NA ##### ## Run Superivsed ##### alphas=seq(0,1,length=57) lalp=length(alphas) gam=0.000000001 tunes<-rep(0,lalp) lams<-rep(0,lalp) bsups<-NULL for(i in 1:lalp){ set.seed(10) g1<-jointrls(y,x,lam=NULL,alp=alphas[i],gam=gam,scale.x=scale.x) bsup<-g1$bsemi bsups<-rbind(bsups,bsup) lams[i]=g1$lam tunes[i]=g1$err } k<-which.min(tunes) lam=lams[k] alp=alphas[k] bsup=bsups[k,] f<-as.vector(cbind(1,x)[U,]%*%bsup) sup.err=sqrt(mean((f-yt[U])^2)) ##### ## Run Semi-superivsed (Should take between 0.8 to 1.3 minutes -- took 0.8 on my machine in BATCH mode) ##### alphas=sort(unique(c(0,1,0.25,0.5,0.75,alp))) gsemi<-cv.jointrls(y,x,alpha=alphas,fold=3,scale.x=scale.x) semi.err<-sapply(1:dim(gsemi$bsemi)[2], function(i)sqrt(mean ( (as.vector(cbind(1,x)[U,]%*%gsemi$bsemi[,i])-yt[U])^2)))[which.min(gsemi$mat$cverr)] ##### ## Display Errors ##### round(sup.err,3) ##Supervised Error round(semi.err,3) ##Semi-Supervised Error round((sup.err-semi.err)/sup.err*100,3) ## %Improvement from Ryan and Culp (2015) ###### ##Get Same superivsed using glmnet directly ##### l=length(L) gsup<-glmnet(x[L,],y[L],lambda=lam*sqrt(var(y[L])*(l-1)/l),alpha=alp,standardize=scale.x) f1=cbind(1,x)[U,]%*%coef(gsup) sup.err-sqrt(mean((f1-yt[U])^2)) ## should be zero