# script for Statistical Data Mining Course. # Copyright (C) 1998 B. D. Ripley # attach(wtloss) plot(Days, Weight, type="p",ylab="Weight (kg)") Wt.lbs <- pretty(range(Weight*2.205)) lines(smooth.spline(Days, Weight), col=4) lines(loess.smooth(Days, Weight, degree=2, family="symmetric"), col=6) detach() attach(mcycle) par(mfrow = c(3,2)) plot(times, accel, main="Polynomial regression") lines(times, fitted(lm(accel ~ poly(times, 3)))) lines(times, fitted(lm(accel ~ poly(times, 6))), lty=3) legend(40, -100, c("degree=3", "degree=6"), lty=c(1,3),bty="n") plot(times, accel, main="Natural splines") lines(times, fitted(lm(accel ~ ns(times, df=5)))) lines(times, fitted(lm(accel ~ ns(times, df=10))), lty=3) lines(times, fitted(lm(accel ~ ns(times, df=20))), lty=4) legend(40, -100, c("df=5", "df=10", "df=20"), lty=c(1,3,4), bty="n") plot(times, accel, main="Smoothing splines") lines(smooth.spline(times, accel)) plot(times, accel, main="Lowess") lines(lowess(times, accel)) lines(lowess(times, accel, 0.2), lty=3) legend(40, -100, c("default span", "f = 0.2"), lty=c(1,3), bty="n") plot(times, accel, main ="ksmooth") lines(ksmooth(times, accel,"normal", bandwidth=5)) lines(ksmooth(times, accel,"normal", bandwidth=2), lty=3) legend(40, -100, c("bandwidth=5", "bandwidth=2"), lty=c(1,3), bty="n") plot(times, accel, main ="supsmu") lines(supsmu(times, accel)) lines(supsmu(times, accel, bass=3), lty=3) legend(40, -100, c("default", "bass=3"), lty=c(1,3), bty="n") library(ksmooth) attach(mcycle) plot(times, accel) lines(locpoly(times, accel, bandwidth=dpill(times,accel))) lines(locpoly(times, accel, bandwidth=dpill(times,accel), degree=2), lty=3) detach() library(locfit, first=T) fit <- locfit(accel ~ times, alpha = 0.3, data=mcycle) plot(fit, se.fit=T, get.data=T) # next few lines may not work well on Windows locfit fit2 <- locfit(accel ~ times, ev="data", alpha=0.2, data=mcycle) y <- resid(fit2) fit3 <- locfit(log(y^2) ~ times, deg=1, alpha=1, ev="data", data=mcycle) va <- pmax(exp(fitted(fit3)), 20) fit <- locfit(accel ~ times, alpha=c(0,0,2), weights=1/va, ev="grid", mg=200, data=mcycle) plot(fit, se.fit=T, get.data=T) par(mfrow=c(1,1)) rock.lm <- lm(log(perm) ~ area + peri + shape, rock) summary(rock.lm) rock.gam <- gam(log(perm) ~ s(area) + s(peri) + s(shape), control=gam.control(maxit=50, bf.maxit=50), data=rock) summary(rock.gam) anova(rock.lm, rock.gam) par(mfrow=c(2,3), pty="s") plot(rock.gam, se=T) rock.gam1 <- gam(log(perm) ~ area + peri + s(shape), data=rock) plot(rock.gam1, se=T) anova(rock.lm, rock.gam1, rock.gam) par(mfrow=c(1,1), pty="m") set.seed(123) cpus0 <- cpus[, 2:8] # excludes names, authors' predictions for(i in 1:3) cpus0[,i] <- log10(cpus0[,i]) samp <- sample(1:209, 100) cpus.lm <- lm(log10(perf) ~ ., data=cpus0[samp,]) test <- function(fit) sqrt(sum((log10(cpus0[-samp, "perf"]) - predict(fit, cpus0[-samp,]))^2)/109) test(cpus.lm) cpus.gam <- gam(log10(perf) ~ ., data=cpus0[samp, ]) cpus.gam2 <- step.gam(cpus.gam, scope=list( "syct" = ~ 1 + syct + s(syct, 2) + s(syct), "mmin" = ~ 1 + mmin + s(mmin, 2) + s(mmin), "mmax" = ~ 1 + mmax + s(mmax, 2) + s(mmax), "cach" = ~ 1 + cach + s(cach, 2) + s(cach), "chmin" = ~ 1 + chmin + s(chmin, 2) + s(chmin), "chmax" = ~ 1 + chmax + s(chmax, 2) + s(chmax) )) print(cpus.gam2$anova, digits=3) test(cpus.gam2) Xin <- as.matrix(cpus0[samp,1:6]) library(mda) test2 <- function(fit) { Xp <- as.matrix(cpus0[-samp,1:6]) sqrt(sum((log10(cpus0[-samp, "perf"]) - predict(fit, Xp))^2)/109) } cpus.bruto <- bruto(Xin, log10(cpus0[samp,7])) test2(cpus.bruto) cpus.bruto$type cpus.bruto$df # examine the fitted functions par(mfrow=c(3,2)) Xp <- matrix(sapply(cpus0[samp, 1:6], mean), 100, 6, byrow=T) for(i in 1:6) { xr <- sapply(cpus0, range) Xp1 <- Xp; Xp1[,i] <- seq(xr[1,i], xr[2,i], len=100) Xf <- predict(cpus.bruto, Xp1) plot(Xp1[ ,i], Xf, xlab=names(cpus0)[i], ylab="", type="l") } cpus.mars <- mars(Xin, log10(cpus0[samp,7])) showcuts <- function(obj) { tmp <- obj$cuts[obj$sel, ] dimnames(tmp) <- list(NULL, dimnames(Xin)[[2]]) tmp } showcuts(cpus.mars) test2(cpus.mars) # examine the fitted functions Xp <- matrix(sapply(cpus0[samp, 1:6], mean), 100, 6, byrow=T) for(i in 1:6) { xr <- sapply(cpus0, range) Xp1 <- Xp; Xp1[,i] <- seq(xr[1,i], xr[2,i], len=100) Xf <- predict(cpus.mars, Xp1) plot(Xp1[ ,i], Xf, xlab=names(cpus0)[i], ylab="", type="l") } cpus.mars2 <- mars(Xin, log10(cpus0[samp,7]), degree=2) showcuts(cpus.mars2) test2(cpus.mars2) cpus.mars6 <- mars(Xin, log10(cpus0[samp,7]), degree=6) showcuts(cpus.mars6) test2(cpus.mars6) library(ppr) attach(rock) rock1 <- data.frame(area=area/10000, peri=peri/10000, shape=shape, perm=perm) detach() rock.ppr <- ppr(log(perm) ~ area + peri + shape, data=rock1, nterms=2, max.terms=5) rock.ppr summary(rock.ppr) par(mfrow=c(3,2)) plot(rock.ppr) plot(update(rock.ppr, bass=5)) plot(update(rock.ppr, sm.method="gcv", gcvpen=2)) rock.ppr2 <- update(rock.ppr, sm.method="gcv", gcvpen=2) summary(rock.ppr2) summary(rock1) # to find the ranges of the variables Xp <- expand.grid(area=seq(0.1,1.2,0.05), peri=seq(0,0.5,0.02), shape=0.2) trellis.device() rock.grid <- cbind(Xp,fit=predict(rock.ppr2, Xp)) wireframe(fit ~ area+peri, rock.grid, screen=list(z=160,x=-60), aspect=c(1,0.5), drape=T) dev.off() cpus.ppr <- ppr(log10(perf) ~ ., data=cpus0[samp,], nterms=2, max.terms=10, bass=5) cpus.ppr cpus.ppr <- ppr(log10(perf) ~ ., data=cpus0[samp,], nterms=8, max.terms=10, bass=5) test(cpus.ppr) ppr(log10(perf) ~ ., data=cpus0[samp,], nterms=2, max.terms=10, sm.method="spline") cpus.ppr2 <- ppr(log10(perf) ~ ., data=cpus0[samp,], nterms=7, max.terms=10, sm.method="spline") test(cpus.ppr2) attach(mammals) a <- ace(body, brain) o1<- order(body); o2 <- order(brain) par(mfrow=c(2,2)) plot(body, brain, main="Original Data", log="xy") plot(body[o1], a$tx[o1], main="Transformation of body wt", type="l", log="x") plot(brain[o2], a$ty[o2], main="Transformation of brain wt", type="l", log="x") plot(a$tx, a$ty, main="Transformed y vs x") a <- avas(body, brain) par(mfrow=c(2,2)) plot(body, brain, main="Original Data", log="xy") plot(body[o1], a$tx[o1], main="Transformation of body wt", type="l", log="x") plot(brain[o2], a$ty[o2], main="Transformation of brain wt", type="l", log="x") plot(a$tx, a$ty, main="Transformed y vs x") detach() attach(cpus0) cpus.avas <- avas(cpus0[, 1:6], perf) plot(log10(perf), cpus.avas$ty) par(mfrow=c(2,3)) for(i in 1:6) plot(cpus0[, i], cpus.avas$tx[, i], xlab=names(cpus0[i]), ylab="") detach()