library(SDM)
library(MASS)
library(nnet)

## Wahba-Wold synthetic example

x <- 1:250 *pi/250
f <- 4.26*(exp(-x) - 4 * exp(-2*x) + 3 * exp(-3*x))
set.seed(111)
y <- rnorm(250, f, 0.2)

plot(x, y, type="n")
points(x, y, cex=0.5, pch=16)
lines(x, lm(y ~ poly(x,6))$fitted, col=3)
lines(x, lm(y ~ poly(x,10))$fitted, col=4)
lines(x, lm(y ~ poly(x,16))$fitted, col=5)

plot(x, y, type="n")
points(x, y, cex=0.5, pch=16)
# try each of these fits several times
# use $ not @ for S+2000
lines(x, nnet(y ~ x, NULL, size=2, skip=T, linout=T, maxit=250)@fitted, col=3)
lines(x, nnet(y ~ x, NULL, size=4, skip=T, linout=T, maxit=250)@fitted, col=4)
lines(x, nnet(y ~ x, NULL, size=8, skip=T, linout=T, maxit=250)@fitted, col=5)

# now add weight decay
lines(x, nnet(y ~ x, NULL, size=8, decay=0.01, skip=T, linout=T, maxit=250)@fitted, col=6)



attach(rock)
area1 <- area/10000; peri1 <- peri/10000
rock1 <- data.frame(perm, area=area1, peri=peri1, shape)
rock.nn <- nnet(log(perm) ~ area + peri + shape, data=rock1,
                size=3, decay=1e-3, linout=T, skip=T, maxit=1000, Hess=T)
summary(rock.nn)
sum((log(perm) - predict(rock.nn))^2)
detach()
eigen(rock.nn@Hessian, T)$values
# eigen(rock.nn$Hessian, T)$values  # for S+2000


Xp <- expand.grid(area=seq(0.1,1.2,0.05),
                  peri=seq(0,0.5,0.02), shape=0.2)
trellis.device()
rock.grid <- cbind(Xp,fit=predict(rock.nn, Xp))
wireframe(fit ~ area + peri, rock.grid, screen=list(z=160,x=-60),
          aspect=c(1,0.5), drape=T)
dev.off()

cpus0 <- cpus[, 2:8]
for(i in 1:3) cpus0[,i] <- log10(cpus0[,i])
set.seed(123); samp <- sample(1:209, 100)

attach(cpus0)
cpus1 <- data.frame(syct=syct-2, mmin=mmin-3, mmax=mmax-4,
cach=cach/256, chmin=chmin/100, chmax=chmax/100, perf=perf)
detach()

test <- function(fit)
  sqrt(sum((log10(cpus1[-samp, "perf"]) -
           predict(fit, cpus1[-samp,]))^2)/109)
cpus.nn1 <- nnet(log10(perf) ~ ., data=cpus1[samp,], linout=T,
                 skip=T, size=0)
test(cpus.nn1)

cpus.nn2 <- nnet(log10(perf) ~ ., data=cpus1[samp,], linout=T,
                 skip=T, size=4, decay=0.01, maxit=1000)
test(cpus.nn2)

cpus.nn3 <- nnet(log10(perf) ~ ., data=cpus1[samp,], linout=T,
                 skip=T, size=10, decay=0.01, maxit=1000)
test(cpus.nn3)

cpus.nn4 <- nnet(log10(perf) ~ ., data=cpus1[samp,], linout=T,
                 skip=T, size=25, decay=0.01, maxit=1000)
test(cpus.nn4)

# several minutes:
CVnn.cpus(log10(perf) ~ ., data=cpus1[samp,], linout=T, skip=T, maxit=1000)


## Cushing's example

cush <- log(as.matrix(Cushings[, -3]))
tp <- factor(Cushings$Type[1:21])
Cf <- data.frame(tp = tp,
   Tetrahydrocortisone = log(Cushings[1:21,1]),
   Pregnanetriol = log(Cushings[1:21,2]) )

# Logistic discrimination

cush.multinom <- multinom(tp ~ Tetrahydrocortisone
   + Pregnanetriol, Cf, maxit=250)
xp <- seq(0.6, 4.0, length=100); np <- length(xp)
yp <- seq(-3.25, 2.45, length=100)
cushT <- expand.grid(Tetrahydrocortisone=xp,
     Pregnanetriol=yp)
Z <- predict(cush.multinom, cushT, type="probs")
plot(Cushings[,1], Cushings[,2], log="xy", type="n",
  xlab="Tetrahydrocortisone", ylab = "Pregnanetriol")
for(il in 1:4) {
  set <- Cushings$Type==levels(Cushings$Type)[il]
  text(Cushings[set, 1], Cushings[set, 2],
       labels=as.character(Cushings$Type[set]), col = 2 + il) }
zp <- Z[,3] - pmax(Z[,2], Z[,1])
contour(xp/log(10), yp/log(10), matrix(zp, np),
   add=T, levels=0, labex=0)
zp <- Z[,1] - pmax(Z[,2], Z[,3])
contour(xp/log(10), yp/log(10), matrix(zp, np),
   add=T, levels=0, labex=0)


# Neural nets

cush <- cush[1:21,]; tpi <- class.ind(tp)
par(mfrow=c(2,2))
pltnn(main="Size = 2")
set.seed(1); plt.bndry(size=2, col=2)
set.seed(3); plt.bndry(size=2, col=3); plt.bndry(size=2, col=4)

pltnn(main="Size = 2, lambda = 0.001")
set.seed(1); plt.bndry(size=2, decay=0.001, col=2)
set.seed(2); plt.bndry(size=0, decay=0.001, col=4)

pltnn(main="Size = 2, lambda = 0.01")
set.seed(1); plt.bndry(size=2, decay=0.01, col=2)
set.seed(2); plt.bndry(size=2, decay=0.01, col=4)

pltnn(main="Size = 5, 20  lambda = 0.01")
set.seed(2); plt.bndry(size=5, decay=0.01, col=1)
set.seed(2); plt.bndry(size=20, decay=0.01, col=2)

par(mfrow=c(1,1))
pltnn(main="Size=3, lambda=0.01")
Z <- matrix(0, nrow(cushT), ncol(tpi))
for(iter in 1:20) {
    set.seed(iter)
    cush.nn <- nnet(cush, tpi,  skip=T, softmax=T, size=3,
        decay=0.01, maxit=1000, trace=F)
    Z <- Z + predict(cush.nn, cushT)
    cat("final value", format(round(cush.nn@value,3)), "\n")
#    cat("final value", format(round(cush.nn$value,3)), "\n") # for S+2000
    b1(predict(cush.nn, cushT), col=2, lwd=0.5)
    guiLocator(0)
}
pltnn(main="Averaged")
b1(Z, lwd=3)

set.seed(123); rand <- sample (10, 214, replace=T)
con <- function(x,y)
{
   tab <- table(x,y)
   print(tab)
   diag(tab) <- 0
   cat("error rate = ", round(100*sum(tab)/length(x),2),"%\n")
   invisible()
}
CVtest <- function(fitfn, predfn, ...)
{
  res <- fgl$type
  for (i in sort(unique(rand))) {
     cat("fold ",i,"\n", sep="")
     learn <- fitfn(rand != i, ...)
     res[rand == i] <- predfn(learn, rand==i)
  }
  res
}
res.multinom <- CVtest(
   function(x, ...) multinom(type ~ ., fgl[x,], ...),
   function(obj, x) predict(obj, fgl[x, ],type="class"),
   maxit=1000, trace=F )

con(fgl$type, res.multinom)

fgl1 <- lapply(fgl[, 1:9], function(x) {r <- range(x); (x-r[1])/diff(r)})
fgl1 <- data.frame(fgl1, type=fgl$type)

res.multinom <- CVtest(
   function(x, ...) multinom(type ~ ., fgl1[x,], ...),
   function(obj, x) predict(obj, fgl1[x, ],type="class"),
   maxit=1000, trace=F)
con(fgl$type, res.multinom)

res.mult2 <- CVtest(
   function(x, ...) multinom(type ~ ., fgl1[x,], ...),
   function(obj, x) predict(obj, fgl1[x, ], type="class"),
   maxit=1000, trace=F, decay=1e-3)
con(fgl$type, res.mult2)

#============================================
# 2-class synthetic data problem


attach(synth.tr)
synth.set()        ## base plot
synth.fit(size=0)  ## linear discrimination

# try this several times
synth.fit(size=6, maxit=500)

synth.fit(size=6, decay=0.001, col=3, maxit=500)

synth.fit(size=6, decay=0.01, col=7, maxit=500)

synth.set()  ## base plot
synth.aver(size=3, decay=0.001, naver=10, maxit=500)

detach()


# Support vector machines

library(libsvm)
crabs.svm <- svm(crabs$sp ~ ., data = lcrabs, cost = 100, gamma = 1)
table(true = crabs$sp, predicted = predict(crabs.svm))

svm(crabs$sp ~ ., data = lcrabs, cost = 100, gamma = 1, cross = 10)
