library(SDM)
library(MASS, first=T)

ir <- rbind(iris[,,1], iris[,,2], iris[,,3])
ir.species <- c(rep("s",50), rep("c",50), rep("v",50))
ir.sp <- as.factor(ir.species)
brush(ir)

ir.pca <- princomp(log(ir), cor=T)
ir.pca
summary(ir.pca)
plot(ir.pca)
loadings(ir.pca)
ir.pc <- predict(ir.pca)
eqscplot(ir.pc[,1:2], type="n",
     xlab = "first principal component",
     ylab = "second principal component")
text(ir.pc[,1:2], labels = ir.species, col=3+as.numeric(ir.sp))

ir.scal <- cmdscale(dist(ir), k = 2, eig = T)
ir.scal$points[, 2] <- -ir.scal$points[, 2]
eqscplot(ir.scal$points, type="n")
text(ir.scal$points, labels = ir.species, col=3+as.numeric(ir.sp), cex=0.8)
distp <- dist(ir)
dist2 <- dist(ir.scal$points)
sum((distp - dist2)^2)/sum(distp^2)

ir.sam <- sammon(dist(ir[-143,]))
eqscplot(ir.sam$points, type="n")
text(ir.sam$points, labels=ir.species[-143], col=3+as.numeric(ir.sp), cex=0.8)

ir.iso <- isoMDS(dist(ir[-143,]))
eqscplot(ir.iso$points, type="n")
text(ir.iso$points, labels=ir.species[-143], col=3+as.numeric(ir.sp), cex=0.8)

# quite slow - a minute?
fgl.iso <- isoMDS(dist(as.matrix(fgl[-40, -10])))
eqscplot(fgl.iso$points, type="n", xlab="", ylab="")
# either
for(i in seq(along=levels(fgl$type))) {
  set <- fgl$type[-40] == levels(fgl$type)[i]
  points(fgl.iso$points[set,], pch=18, cex=0.6, col=2+i)}
key(text=list(levels(fgl$type), col=3:8))
# or
text(fgl.iso$points, labels = c("F", "N", "V", "C", "T", "H")
     [fgl$type[-40]], cex=0.6)

fgl0 <- fgl[-40, ] # omit duplicate
fgl.col <- c("SkyBlue", "SlateBlue", "Orange", "Orchid", "Green", "HotPink")[fgl0$type]
fgl.iso3 <- isoMDS(dist(as.matrix(fgl0[-10])), k = 3) #slow
spin(fgl.iso3$points)
xgobi(fgl.iso3$points, colors = fgl.col)  # Use rotate mode
## or
ggobi(fgl.iso3$points, colors = codes(fgl0$type))


# biplots

state <- state.x77[,2:7]; row.names(state) <- state.abb
biplot(princomp(state, cor=T), pc.biplot=T, cex = 0.7, ex=0.8)


# ICA

library(fastICA)
nICA <- 4
crabs.grp <- factor(c("B", "b", "O", "o")[rep(1:4, each = 50)])
crabs.ica <- fastICA(crabs[, 4:8], nICA)
Z <- crabs.ica$S
par(mfrow = c(2, (nICA+1)%/%2))
for(i in 1:nICA) boxplot(split(Z[, i], crabs.grp))


# Glyph representations

stars(state.x77[, c(7, 4, 6, 2, 5, 3)], byrow = T)

parcoord(state.x77[, c(7, 4, 6, 2, 5, 3)])

parcoord(log(ir)[, c(3, 4, 2, 1)], col = c(1,2,5)[1 + (0:149)%/%50])


# clustering

h <- hclust(dist(swiss.x), method="connected")
plclust(h)
cutree(h, 3)
plclust( clorder(h, cutree(h, 3) ))

h <- hclust(dist(swiss.x), method="average")
initial <- tapply(swiss.x, list(rep(cutree(h, 3),
   ncol(swiss.x)), col(swiss.x)), mean)
dimnames(initial) <- list(NULL, dimnames(swiss.x)[[2]])
km <- kmeans(swiss.x, initial)
swiss.pca <- princomp(swiss.x)
swiss.pca
swiss.px <- predict(swiss.pca)
dimnames(km$centers)[[2]] <- dimnames(swiss.x)[[2]]
swiss.centers <- predict(swiss.pca, km$centers)
eqscplot(swiss.px[, 1:2], type="n",
   xlab="first principal component",
   ylab="second principal component")
text(swiss.px[,1:2], labels = km$cluster)
points(swiss.centers[,1:2], pch=3, cex=3)
identify(swiss.px[, 1:2], cex=0.5)

swiss.pam <- pam(swiss.px, 3)
summary(swiss.pam)
eqscplot(swiss.px[, 1:2], type="n",
   xlab="first principal component",
   ylab="second principal component")
text(swiss.px[,1:2], labels = swiss.pam$clustering)
points(swiss.pam$medoid[,1:2], pch=3, cex=3)

fanny(swiss.px, 3)

#pltree(agnes(swiss.x, method="single"))
#pltree(diana(swiss.x))

library(mclust)
h <- mhtree(swiss.x, modelid = "VVV")
(mh <- as.vector(mhclass(h, 3)))

z <- me(swiss.x, modelid = "VVV", z = (ctoz(mh)+1/3)/2)
eqscplot(swiss.px[, 1:2], type = "n",
         xlab = "first principal component",
         ylab = "second principal component")
text(swiss.px[, 1:2], labels = max.col(z))

vals <- emclust(swiss.x) # all possible models, 0:9 clusters.
sm <- summary(vals, swiss.x)
eqscplot(swiss.px[, 1:2], type = "n",
         xlab = "first principal component",
         ylab = "second principal component")
text(swiss.px[, 1:2], labels = sm$classification)


# Kohonen's SOM

library(class)
lcrabs <- log(crabs[, 4:8])
gr <- somgrid(topo = "hexagonal")
crabs.som <- batchSOM(lcrabs, gr, c(4, 4, 2, 2, 1, 1, 1, 0, 0))
plot(crabs.som)

bins <- as.numeric(knn1(crabs.som$code, lcrabs, 0:47))
plot(crabs.som$grid, type = "n")
symbols(crabs.som$grid$pts[, 1], crabs.som$grid$pts[, 2],
        circles = rep(0.4, 48), inches = F, add = T)
text(crabs.som$grid$pts[bins, ] + rnorm(400, 0, 0.1),
     as.character(crabs.grp))

crabs.som2 <- SOM(lcrabs, gr); plot(crabs.som2)

bins <- as.numeric(knn1(crabs.som2$code, lcrabs, 0:47))
plot(crabs.som2$grid, type = "n")
symbols(crabs.som2$grid$pts[, 1], crabs.som2$grid$pts[, 2],
        circles = rep(0.4, 48), inches = F, add = T)
text(crabs.som2$grid$pts[bins, ] + rnorm(400, 0, 0.1),
     as.character(crabs.grp))


# exploratory projection pursuit

xgobi(lcrabs, colors=c("SkyBlue", "SlateBlue", "Orange",
              "Red")[rep(1:4, rep(50, 4))])

ggobi(lcrabs, colors=rep(1:4, rep(50, 4)))

fgl.col <- c("SkyBlue", "SlateBlue", "Orange", "Orchid",
             "Green", "HotPink")[fgl$type]

xgobi(fgl[, -10], colors=fgl.col)

xgobi(fgl[1:185, -10], colors=fgl.col[1:185])

ggobi(fgl[1:185, -10], colors=codes(fgl.col[1:185]))


## look at MDS dynamically
dist2full <- function(dis)
{
        n <- attr(dis, "Size")
        full <- matrix(0, n, n)
        full[lower.tri(full)] <- dis
        full + t(full)
}
fd <- dist2full(dist(as.matrix(fgl[-40, -10])))
xgvis(fd,  colors=fgl.col[-40])


### viruses

virus.tot <- apply(virus[,1:18], 1, sum)
truehist(virus.tot, h=5)
boxplot(split(virus.tot, virus$type))
bwplot(virus$type ~ virus.tot)

# extract Tobamoviruses
virus3 <- virus[c(10:28,30:48),1:18]
v3 <- sapply(virus3, function(x) (x-mean(x))/sqrt(var(x)))


par(mfrow=c(2,2))
v3.prc <- princomp(virus3, cor=F)$scores[,1:2]
eqscplot(v3.prc, xlab="", ylab="", type="n")
text(v3.prc, labels = row.names(virus3), cex=0.5)

v3.sam <- sammon(dist(as.matrix(virus3)))
eqscplot(v3.sam$points, xlab="", ylab="", type="n")
text(v3.sam$points, labels = row.names(virus3), cex=0.5)

v3.sam <- sammon(dist(v3))
eqscplot(v3.sam$points, xlab="", ylab="", type="n")
text(v3.sam$points, labels = row.names(virus3), cex=0.5)
par(mfrow=c(1,1))

v3.mds <- isoMDS(dist(v3))
eqscplot(v3.mds$points, xlab="", ylab="", type="n")
text(v3.mds$points, labels = row.names(virus3), cex=0.5)

## This next crashes my copy of S+6.1 for Windows
par(pty="s", mfrow=c(1,2))
plot(dist(v3), dist(v3.sam$points), xlab="observed distances",
     ylab="fitted distances")
v3.sh <- Shepard(dist(v3), v3.mds$points)
plot(v3.sh, xlab="observed distances", ylab="fitted distances",
  type="n")
points(v3.sh, cex=0.3)
lines(v3.sh$x, v3.sh$yf, type="S")
par(pty="m", mfrow=c(1,1))

## visualize MDS
d3 <- dist2full(dist(v3))
xgvis(d3)
vv <- sapply(virus[, -19], function(x) (x-mean(x))/sqrt(var(x)))
dv <- dist2full(dist(as.matrix(virus[,-19])))
vv.col <- c("DeepSkyBlue1", "OrangeRed1", "DeepPink", "Yellow")[virus[, 19]]
xgvis(dv, colors=vv.col)


par(mfrow=c(1,3))
h <- hclust(dist(v3), method="connected")
plclust(clorder(h, cutree(h, 6)), label=as.character(c(10:28,30:48)),
        main="single-link")
h <- hclust(dist(v3), method="compact")
plclust(clorder(h, cutree(h, 5)), label=as.character(c(10:28,30:48)),
        main="complete-link")
h <- hclust(dist(v3), method="average")
plclust(clorder(h, cutree(h, 6)), label=as.character(c(10:28,30:48)),
        main="group average")

par(mfrow=c(1,1))
initial <- tapply(v3, list(rep(cutree(h, 6), ncol(v3)), col(v3)), mean)
v3.km <- kmeans(v3, initial)
eqscplot(v3.sam$points, xlab="", ylab="", type="n")
text(v3.sam$points, labels = as.character(v3.km$cluster), cex=1)

xgobi(v3)

ggobi(v3)


# Categorical data

caith1 <- as.matrix(caith)
names(dimnames(caith1)) <- c("eyes", "hair")
mosaicplot(caith1, color = T)

House <- crosstabs(Freq ~ Type + Infl + Cont + Sat, housing)
mosaicplot(House, color = T)

corresp(caith)
caith2 <- caith
dimnames(caith2)[[2]] <- c("F", "R", "M", "D", "B")
#par(mfcol = c(1, 3))
plot(corresp(caith2, nf = 2)); title("symmetric")
plot(corresp(caith2, nf = 2), type = "rows"); title("rows")
plot(corresp(caith2, nf = 2), type = "col"); title("columns")

farms.mca <- mca(farms, abbrev = T)  # Use levels as names
plot(farms.mca, cex = rep(0.7, 2), axes = F)


#-----------------------------------------------------------------

# League tables

ft2 <- ft1; names(ft2) <- abbreviate(names(ft1))
T2 <- sapply(Times[,1:8], function(x) x/IQR(x))
dimnames(T2)[[1]] <- row.names(Times)
FT <- sapply(ft2[,1:16], function(x) x/IQR(x))
dimnames(FT)[[1]] <- row.names(ft2)

xgobi(Times, color=c("Yellow", "SkyBlue", "Orange")[Times$type])

xgobi(FT, color=c(rep("Yellow", 5), rep("SkyBlue",51), rep("Orange", 41)))

biplot(princomp(Times[,1:8]),cex=0.7); title(main="Times: Raw")
biplot(princomp(T2), expand=0.8, cex=0.7); title(main="Times: IQR scaled")
biplot(princomp(FT), expand=0.8, cex=0.7); title(main="FT: IQR scaled")

Times.sam <- sammon(dist(T2))
Times.iso <- isoMDS(dist(T2))
FT.sam <- sammon(dist(FT))
FT.iso <- isoMDS(dist(FT))

eqscplot(Times.sam$points, type="n", xlab="", main="Sammon: Times, IQR")
text(Times.sam$points, labels = row.names(Times), cex=0.7)

eqscplot(Times.iso$points, type="n", xlab="", main="MDS: Times, IQR")
text(Times.iso$points, labels = row.names(Times), cex=0.7)

eqscplot(FT.sam$points, type="n", xlab="", main="Sammon: FT, IQR")
text(FT.sam$points, labels = row.names(ft1), cex=0.7)

eqscplot(FT.iso$points, type="n", xlab="", main="MDS: FT, IQR")
text(FT.iso$points, labels = row.names(ft1), cex=0.7)

