MNIST

(code by Seth Flaxman) You can get the data from https://statweb.stanford.edu/~tibs/ElemStatLearn/data.html

library(data.table)
train = fread("zip.train")
test = fread("zip.test")
train = data.matrix(train[V1 %in% c(2,3)])
test = data.matrix(test[V1 %in% c(2,3)])
p = ncol(train)-1

library(glmnet)

## Loading required package: Matrix

## Loading required package: foreach

## Loaded glmnet 2.0-5

# this is the training error, where a Deviance Ratio of 1 means that the model does just 
# as well as the unnormalized model (which as we see below is almost perfect)
fit = glmnet(train[,2:p],train[,1],family="binomial",alpha=.5)
plot(log(fit$lambda),fit$dev.ratio,ty="l",xlab="log lambda",ylab="Deviance Ratio",main="Learning curve for training data")

fit = cv.glmnet(train[,2:p],train[,1],family="binomial",alpha=.5)
# this is the test error
plot(fit)

# confusion matrix for train
yhat = (predict(fit,train[,2:p],type="response") > .5)+2 # FALSE -> 2, TRUE -> 3
round(table(yhat,train[,1]) / nrow(train),2)

##     
## yhat    2    3
##    2 0.53 0.00
##    3 0.00 0.47

# train classification accuracy
mean(yhat == train[,1])

## [1] 0.9985601

# confusion matrix for test
ytest = (predict(fit,test[,2:p],type="response") > .5)+2 # FALSE -> 2, TRUE -> 3
round(table(ytest,test[,1])/nrow(test),2)

##      
## ytest    2    3
##     2 0.52 0.02
##     3 0.02 0.44

# test classification accuracy
mean(ytest == test[,1])

## [1] 0.9642857

# unnormalized fit
yhat = (predict(fit,train[,2:p],type="response",s=0) > .5)+2
mean(yhat == train[,1])

## [1] 1

round(table(yhat,train[,1]) / nrow(train),2)

##     
## yhat    2    3
##    2 0.53 0.00
##    3 0.00 0.47

ytest = (predict(fit,test[,2:p],type="response",s=0) > .5)+2
round(table(ytest,test[,1])/nrow(test),2)

##      
## ytest    2    3
##     2 0.52 0.02
##     3 0.02 0.44

mean(ytest == test[,1])

## [1] 0.9615385

So it seems that regularization helps a little. How about k-nearest neighbours?

library(class)
result = NULL
for(k in 1:10) {
  yhat = knn(train[,2:p],train[,2:p],train[,1],k)
  ytest = knn(train[,2:p],test[,2:p],train[,1],k)
  result = rbind(result,data.frame(k=k,train=mean(yhat==train[,1]),test=mean(ytest==test[,1])))
}
plot(result$k,result$train,ty="l",ylim=c(0.95,1),col="blue",xlab="number of neighbours k",ylab="accuracy")
lines(result$k,result$test,ty="l",col="green")

K-nearest neighbours improves on regularized logistic regression.