#-*- S -*- ### Venables & Ripley (1999) Statistical Complements library(MASS) options(width=65, digits=5, height=9999) postscript(file="stat.ps", width=8, height=6, pointsize=9) # Chapter 5 # for later use, from section 5.6 perm.t.test <- function(d) { # ttest is function(x) mean(x)/sqrt(var(x)/length(x)) binary.v <- function(x, digits) { if(missing(digits)) { mx <- max(x) digits <- if(mx > 0) 1 + floor(log(mx, base = 2)) else 1 } ans <- 0:(digits - 1) lx <- length(x) x <- rep(x, rep(digits, lx)) x <- (x %/% 2^ans) %% 2 dim(x) <- c(digits, lx) x } digits <- length(d) n <- 2^digits x <- d * 2 * (binary.v(1:n, digits) - 0.5) mx <- matrix(1/digits, 1, digits) %*% x s <- matrix(1/(digits - 1), 1, digits) vx <- s %*% (x - matrix(mx, digits, n, byrow=T))^2 as.vector(mx/sqrt(vx/digits)) } rm(A, B) # precautionary clear-out attach(shoes) tperm <- perm.t.test(B - A) # see section 5.6 detach() gal <- galaxies/1000 set.seed(101); m <- 1000 res <- numeric(m) for (i in 1:m) res[i] <- median(sample(gal, replace=T)) # 5.5 Density estimation if(platform()=="WIN386") library(logsplin) else library(logspline) library(logspline) # logsplin on Windows attach(geyser) geyser.ls <- logspline.fit(duration, lbound=0) x <- seq(0.5, 6, len=200) truehist(duration, nbins=15, xlim=c(0.5,6), ymax=1.2) lines(x, dlogspline(x, geyser.ls)) detach() par(mfrow=c(1,3), pty="s") truehist(tperm, xlab="diff") tperm.ls <- logspline.fit(tperm) x <- seq(-5, 5, len=200) lines(x, dlogspline(x, tperm.ls)) sres <- c(sort(tperm), 5); yres <- (0:1024)/1024 plot(sres, yres, type="S", xlab="diff", ylab="cdf") lines(x, plogspline(x, tperm.ls)) x <- c(0.0005, seq(0.001, 0.999, 0.001), 0.9995) plot( qt(x, 9), qlogspline(x, tperm.ls), xlab="Quantiles of t on 9 df", ylab="Fitted quantiles", type="l", xlim=c(-5, 5), ylim=c(-5, 5)) points( qt(ppoints(tperm), 9), sort(tperm) ) par(mfrow=c(1,1), pty="m") truehist(res, nbins=nclass.FD(res), ymax=4, xlab="bootstrap samples") x <- seq(20, 22, length=500) res.ls <- logspline.fit(res) lines(x, dlogspline(x, res.ls)) points(res.ls$knots, dlogspline(res.ls$knots, res.ls)) res.ls <- logspline.fit(res, penalty=2) lines(x, dlogspline(x, res.ls), lty=3) points(res.ls$knots, dlogspline(res.ls$knots, res.ls)) x <- seq(8000, 35000, 200) plot(x, dlogspline(x, logspline.fit(galaxies)), type="l", xlab="velocity of galaxy", ylab="density") lines(density(galaxies, n=200, window="gaussian", width=width.SJ(galaxies)), lty=3) library(locfit) geyser.lf <- locfit(~ duration, data=geyser, flim=c(0.5, 6)) plot(geyser.lf, get.data=T, mpv=200, ylim=c(0,1)) geyser.lf1 <- locfit(~ duration, data=geyser, flim=c(0.5,6), alpha=c(0.15, 0.9)) lines(geyser.lf1, m=200, lty=3) # Chapter 6 # 6.5 Robust and resistant regression summary(lm(stack.loss ~ stack.x), cor=T) lqs(stack.x, stack.loss, method="lms", nsamp="exact") summary(lqs(stack.x, stack.loss, method="lms", nsamp="exact")$residuals) lqs(stack.x, stack.loss, method="lts", nsamp="exact") summary(lqs(stack.x, stack.loss, method="lts", nsamp="exact")$residuals) stack.rl <- rlm(stack.loss ~ stack.x) summary(stack.rl, cor=F) round(stack.rl$w, 2) summary(rlm(stack.loss ~ stack.x, method="MM"), cor=F) x1 <- stack.x[,1]; x2 <- stack.x[,2] stack.loess <- loess(log(stack.loss) ~ x1*x2, span=0.5, family="symmetric") stack.plt <- expand.grid(x1=seq(50,80,0.5), x2=seq(17,27,0.2)) stack.plt$z <- as.vector(predict(stack.loess, stack.plt)) dupls <- c(2,7,8,11) contourplot(z ~ x1*x2, stack.plt, aspect=1, xlab="Air flow", ylab="Water temp", panel = function(x, y, subscripts, ...){ panel.contourplot(x, y, subscripts, ...) panel.xyplot(x1, x2) text(x1[-dupls] + par("cxy")[1] , x2[-dupls] + 0.5* par("cxy")[2], as.character(seq(x1)[-dupls]), cex=0.7) }) # Chapter 7 # 7.6 Gamma models clotting <- data.frame( u = c(5,10,15,20,30,40,60,80,100), lot1 = c(118,58,42,35,27,25,21,19,18), lot2 = c(69,35,26,21,18,16,13,12,12) ) clot1 <- glm(lot1 ~ log(u), data=clotting, family=Gamma) summary(clot1, cor=F) clot1$deviance/clot1$df.residual gamma.dispersion(clot1) clot2 <- glm(lot2 ~ log(u), data=clotting, family=Gamma) summary(clot2, cor=F) clot2$deviance/clot2$df.residual gamma.dispersion(clot2) gm <- glm(Days + 0.1 ~ Age*Eth*Sex*Lrn, quasi(link=log, variance=mu^2), data=quine) summary(gm, cor=F) gamma.shape(gm, verbose=T) summary(gm, dispersion = gamma.dispersion(gm), cor=F) sum((residuals(gm, type="resp")/fitted(gm))^2/gm$df.residual) gm$deviance/gm$df.residual gamma.dispersion(gm) # Chapter 8 wtloss.st <- c(b0=90, b1=95, th=120) expn1 <- deriv(y ~ b0 + b1 * 2^(-x/th), c("b0", "b1", "th"), function(b0, b1, th, x) {}) wtloss.gr <- nls(Weight ~ expn1(b0, b1, th, Days), data = wtloss, start = wtloss.st, trace = T) # 8.5 Profiles expn3 <- deriv3(~ b0 + b1*2^(-x/th), c("b0","b1","th"), function(x, b0, b1, th) {}) wtloss.he <- nls(Weight ~ expn3(Days, b0, b1, th), wtloss, start = coef(wtloss.gr)) rms.curv(wtloss.he) # Chapter 9 # 9.1 Additive models and scatterplot smoothers library(KernSmooth) # ksmooth on Windows attach(mcycle) plot(times, accel) lines(locpoly(times, accel, bandwidth=dpill(times,accel))) lines(locpoly(times, accel, bandwidth=dpill(times,accel), degree=2), lty=3) detach() # from Chapter 6 set.seed(123) cpus.samp <- sample(1:209, 100) # from Chapter 9 cpus0 <- cpus[, 2:8] for(i in 1:3) cpus0[, i] <- log10(cpus0[, i]) Xin <- as.matrix(cpus0[cpus.samp,1:6]) library(mda) test2 <- function(fit) { Xp <- as.matrix(cpus0[-cpus.samp,1:6]) sqrt(sum((log10(cpus0[-cpus.samp, "perf"]) - predict(fit, Xp))^2)/109) } cpus.bruto <- bruto(Xin, log10(cpus0[cpus.samp, 7])) test2(cpus.bruto) cpus.bruto$type cpus.bruto$df # examine the fitted functions par(mfrow=c(3,2)) Xp <- matrix(sapply(cpus0[cpus.samp, 1:6], mean), 100, 6, byrow=T) for(i in 1:6) { xr <- sapply(cpus0, range) Xp1 <- Xp; Xp1[,i] <- seq(xr[1,i], xr[2,i], len=100) Xf <- predict(cpus.bruto, Xp1) plot(Xp1[ ,i], Xf, xlab=names(cpus0)[i], ylab="", type="l") } cpus.mars <- mars(Xin, log10(cpus0[cpus.samp,7])) showcuts <- function(obj) { tmp <- obj$cuts[obj$sel, ] dimnames(tmp) <- list(NULL, dimnames(Xin)[[2]]) tmp } showcuts(cpus.mars) test2(cpus.mars) # examine the fitted functions Xp <- matrix(sapply(cpus0[cpus.samp, 1:6], mean), 100, 6, byrow=T) for(i in 1:6) { xr <- sapply(cpus0, range) Xp1 <- Xp; Xp1[,i] <- seq(xr[1,i], xr[2,i], len=100) Xf <- predict(cpus.mars, Xp1) plot(Xp1[ ,i], Xf, xlab=names(cpus0)[i], ylab="", type="l") } cpus.mars2 <- mars(Xin, log10(cpus0[cpus.samp,7]), degree=2) showcuts(cpus.mars2) test2(cpus.mars2) cpus.mars6 <- mars(Xin, log10(cpus0[cpus.samp,7]), degree=6) showcuts(cpus.mars6) par(mfrow=c(1,1)) library(sm) attach(birthwt) sm.logit(age, low, h=5, display="se") detach() library(locfit) bwt.lf <- locfit(low ~ age+lwt, data=birthwt, family="binomial", deg=1, scale=0, alpha=c(0,5)) plot(bwt.lf, get.data=T) pima.lf <- locfit(I(type=="Yes") ~ glu + bmi, data=Pima.tr, family="binomial", scale=0, alpha=c(0,5)) par(mfrow=c(1,2), pty="s") plot(pima.lf, get.data=T); plot(pima.lf, type="persp") par(mfrow=c(1,1), pty="m") # Chapter 10 # 10.4 Tree-structured survival analysis # VA is constructed on page 387 VA.temp <- as.data.frame(cancer.vet) dimnames(VA.temp)[[2]] <- c("treat", "cell", "stime", "status", "Karn", "diag.time","age","therapy") attach(VA.temp) VA <- data.frame(stime, status, treat=factor(treat), age, Karn, diag.time, cell=factor(cell), prior=factor(therapy)) detach() library(rpart) set.seed(123) VA.rp <- rpart(Surv(stime, status) ~ ., data=VA, minsplit=10) plotcp(VA.rp) printcp(VA.rp) print(VA.rp, cp=0.09) library(tssa, first=T) VA.tssa <- tssa(stime ~ treat + age + Karn + diag.time + prior, status, data=VA, minbuc=10) VA.tssa summary(VA.tssa) tree.screens() plot(VA.tssa) text(VA.tssa) if(interactive()) km.tssa(VA.tssa) close.screen(all=T) library(survcart, first=T) VA.st <- survtree(stime ~ treat + age + Karn + diag.time + cell + prior, data=VA, status, fact.flag=c(F,T,T,T,F,F)) plot(prune.survtree(VA.st)) set.seed(123); tr <- sample(nrow(VA), 90) VA1 <- VA[tr,]; VA2 <- VA[-tr,] VA.st1 <- update(VA.st, data=VA1) VA.st1.pr <- prune.survtree(VA.st1, newdata=VA2, zensor.newdata=VA2$status) VA.st1.pr attach(VA.st1.pr) dev <- dev + k*size dev - 2*size dev - 4*size detach() prune(VA.st1, k=4) graph.survtree(prune(VA.st, k=3.5), VA$stime, VA$status, xtile=0.5, interactive=F) # Chapter 11 # 11.3 Correspondence analysis farms.mca <- mca(farms, abbrev=T) # Use levels as names plot(farms.mca, cex=rep(0.7,2)) # 11.10 Factor analysis swiss.FA <- factanal(swiss.x, factors=2, method="mle") swiss.FA summary(swiss.FA) factanal(swiss.x, factors=2, method="mle", control=list(iter.max=100, unique.tol=1e-20))$uniq A <- loadings(swiss.FA) %*% t(loadings(swiss.FA)) + diag(swiss.FA$uniq) round(cor(swiss.x) - A, 3) rm(A) swiss.FA1 <- factanal(swiss.x, method="mle") swiss.FA1 summary(swiss.FA1) ability.cov <- matrix(scan(n=36),6,6) 24.641 5.991 33.520 6.023 20.755 29.701 5.991 6.700 18.137 1.782 4.936 7.204 33.520 18.137 149.831 19.424 31.430 50.753 6.023 1.782 19.424 12.711 4.757 9.075 20.755 4.936 31.430 4.757 52.604 66.762 29.701 7.204 50.753 9.075 66.762 135.292 dimnames(ability.cov) <- list( c("general","picture","blocks","maze","reading","vocab"), c("general","picture","blocks","maze","reading","vocab")) ability.cl <- list(cov=ability.cov, center=rep(0,6), n.obs=112) ability.FA <- factanal(covlist=ability.cl, method="mle") ability.FA ability.FA <- update(ability.FA, factors=2) ability.FA summary(ability.FA) rotate(swiss.FA, rotation="promax") rotate(loadings(swiss.FA), rotation="promax") loadings(rotate(ability.FA, rotation="oblimin")) par(pty="s") L <- loadings(ability.FA) eqscplot(L, xlim=c(0,1), ylim=c(0,1)) #identify(L, dimnames(L)[[1]]) oblirot <- rotate(loadings(ability.FA), rotation="oblimin") naxes <- solve(oblirot$tmat) arrows(rep(0,2), rep(0,2), naxes[,1], naxes[,2]) par(pty="m") ir <- rbind(iris[,,1], iris[,,2], iris[,,3]) ir.pca <- princomp(log(ir), cor=T) loadings(rotate(ir.pca, n=2)) A <- loadings(ir.pca) %*% diag(ir.pca$sdev) dimnames(A)[[2]] <- names(ir.pca$sdev) B <- rotate(A[, 1:2], normalize=F)$rmat print.loadings(B) # Chapter 12 # 12.1 Estimators of survivor curves library(muhaz) attach(Aids2) plot(muhaz(death-diag+0.9, status=="D"), n.est.grid=250) detach() if(platform()=="WIN386") library(logsplin) else library(logspline) g1 <- gehan[gehan$treat=="control",] g2 <- gehan[gehan$treat=="6-MP",] logspline.plot( logspline.fit(uncensored=g1[g1$cens==1,"time"], right=g1[g1$cens==0,"time"], lbound=0), what="s", xlim=c(0,35)) g2.ls <- logspline.fit(uncensored=g2[g2$cens==1,"time"], right=g2[g2$cens==0,"time"], lbound=0) xx <- seq(0, 35, len=100) lines(xx, 1 - plogspline(xx, g2.ls), lty=3) library(locfit) plot(locfit( ~ time, cens=1-cens, data=g1, family="hazard", alpha=0.5, xlim=c(0, 1e10)), xlim=c(0, 25), ylim=c(0, 0.3)) lines(locfit( ~ time, cens=1-cens, data=g2, family="hazard", alpha=0.5, xlim=c(0, 1e10)), lty=3) library(HEFT) attach(Aids2) aids.heft <- heft.fit(death-diag+0.9, status=="D") heft.summary(aids.heft) par(mfrow=c(2,2)) heft.plot(aids.heft, what="s", ylim=c(0,1)) heft.plot(aids.heft) detach() par(mfrow=c(1,1)) # 12.5 Non-parametric models with covariates library(sm) attach(heart[heart$transplant==1,]) sm.survival(age+48, log10(stop - start), event, h=5, p=0.50) library(locfit) td <- stop - start; Age <- age+48 plot(locfit(~ td + Age, cens=1-event, scale=0, alpha=0.5, xlim=list(td=c(0,1e10)), flim=list(td=c(0,365))), type="persp") library(hazcov) heart.hc <- hazcov(Surv(td, event) ~ Age, span=0.5) plot(heart.hc) persp.hazcov(Hazard.Rate ~ Time*Age, heart.hc) heart.50 <- hazcov(Surv(td, event) ~ Age, span=0.5, trace.hat="exact") for(alpha in seq(0.1, 1, 0.1)) { heart.tmp <- hazcov(Surv(td, event) ~ Age, span=alpha, trace.hat="exact") print(c(alpha, wcp(heart.tmp, heart.50))) } heart.hc <- hazcov(Surv(td, event) ~ Age, span=0.5, ls=T) plot(heart.hc) persp.hazcov(Hazard.Rate ~ Time*Age, heart.hc) detach() attach(VA) library(HARE) options(contrasts=c("contr.treatment", "contr.poly")) VAx <- model.matrix( ~ treat+age+Karn+cell+prior, VA)[,-1] VA.hare <- hare.fit(stime, status, VAx) hare.summary(VA.hare) library(HEFT) VA.heft <- heft.fit(stime, status, leftlog=0) heft.plot(VA.heft, what="h") nstime <- -log(1 - pheft(stime, VA.heft)) survreg(Surv(stime, status) ~ 1, data=VA) plot(sort(nstime), -log(1-pweibull(sort(stime), 1/1.1736, exp(4.9731))), type="l", xlab="HEFT-transformed", ylab="Weibull-transformed") VA.hare2 <- hare.fit(nstime, status, VAx) hare.summary(VA.hare2) # 12.6 Expected survival rates stan1 <- coxph(Surv(start, stop, event) ~ strata(transplant)+ year + year:transplant + age + surgery, heart) plot(survfit(stan1), conf.int=T, log=T, lty=c(1,3)) year <- dates("7/1/91") expect <- survexp(~ ratetable(sex="male", year=year, age=65*365.25), times = seq(0, 1400, 30), ratetable=survexp.uswhite) lines(expect$time, expect$surv, lty=4) expect <- survexp(stop ~ ratetable(sex=1, year=year*365.25, age=(age+48)*365.25), times = seq(0, 1400, 30), ratetable = survexp.uswhite, data = heart, subset = diff(c(id, 0)) != 0, cohort = T, conditional = T) lines(expect$time, expect$surv, lty=4) # Chapter 13 # 13.1 Second-order summaries library(lspec) lh.ls <- lspec.fit(lh) lspec.summary(lh.ls) lspec.plot(lh.ls, log="y") lspec.plot(lh.ls, what="p") deaths.ls <- lspec.fit(deaths) lspec.plot(deaths.ls, log="y", main="deaths") lspec.plot(deaths.ls, what="p") accdeaths.ls <- lspec.fit(accdeaths) lspec.plot(accdeaths.ls, log="y", main="accdeaths") lspec.plot(accdeaths.ls, what="p") nott.ls <- lspec.fit(window(nottem, end=c(1936,12))) lspec.plot(nott.ls, log="y", main="nottem") lspec.plot(nott.ls, what="p") lspec.plot(lspec.fit(accdeaths, minmass=7000), log="y") lspec.plot(lspec.fit(accdeaths, minmass=1000), log="y") # 13.2 Multiple time series par(mfrow=c(2,2)) spectrum(mdeaths, spans=c(3,3)) spectrum(fdeaths, spans=c(3,3)) mfdeaths.spc <- spec.pgram(ts.union(mdeaths, fdeaths), spans=c(3,3)) plot(mfdeaths.spc$freq, mfdeaths.spc$coh, type="l", ylim=c(0,1), xlab="squared coherency", ylab="") gg <- 2/mfdeaths.spc$df se <- sqrt(gg/2) coh <- sqrt(mfdeaths.spc$coh) lines(mfdeaths.spc$freq, (tanh(atanh(coh) + 1.96*se))^2, lty=3) lines(mfdeaths.spc$freq, (pmax(0, tanh(atanh(coh) - 1.96*se)))^2, lty=3) plot(mfdeaths.spc$freq, mfdeaths.spc$phase, type="l", ylim=c(-pi, pi), xlab="phase spectrum", ylab="") cl <- asin( pmin( 0.9999, qt(0.95, 2/gg-2)* sqrt(gg*(coh^{-2} - 1)/(2*(1-gg)) ) ) ) lines(mfdeaths.spc$freq, mfdeaths.spc$phase + cl, lty=3) lines(mfdeaths.spc$freq, mfdeaths.spc$phase - cl, lty=3) mfdeaths.spc <- spec.pgram(ts.union(mdeaths, lag(fdeaths, 4)), spans=c(3,3)) plot(mfdeaths.spc$freq, mfdeaths.spc$coh, type="l", ylim=c(0,1), xlab="coherency", ylab="") gg <- 2/mfdeaths.spc$df se <- sqrt(gg/2) coh <- sqrt(mfdeaths.spc$coh) lines(mfdeaths.spc$freq, (tanh(atanh(coh) + 1.96*se))^2, lty=3) lines(mfdeaths.spc$freq, (pmax(0, tanh(atanh(coh) - 1.96*se)))^2, lty=3) phase <- (mfdeaths.spc$phase + pi)%%(2*pi) - pi plot(mfdeaths.spc$freq, phase, type="l", ylim=c(-pi, pi), xlab="phase spectrum", ylab="") cl <- asin( pmin( 0.9999, qt(0.95, 2/gg-2)* sqrt(gg*(mfdeaths.spc$coh^{-2} - 1)/(2*(1-gg)) ) ) ) lines(mfdeaths.spc$freq, phase + cl, lty=3) lines(mfdeaths.spc$freq, phase - cl, lty=3) # Chapter 14 # 14.5 Module S+SpatialStats module(spatial) if(exists("krige")) { topo.kr <- krige(z ~ loc(x, y) + x + y + x^2 + x*y + y^2, data=topo, covfun=exp.cov, range=0.7, sill=770) topo.kr prsurf <- predict(topo.kr, se.fit = T, grid = list(x=c(0, 6.5, 50), y=c(0, 6.5, 50))) topo.plt1 <- contourplot(fit ~ x*y, data=prsurf, pretty=F, at=seq(700, 1000, 25), aspect=1, panel = function(...){ panel.contourplot(...) points(topo) }) topo.plt2 <- contourplot(se.fit ~ x*y, data=prsurf, pretty=F, at=c(20, 25), aspect=1) print(topo.plt1, split=c(1,1,2,1), more=T) print(topo.plt2, split=c(2,1,2,1)) topo.kr2 <- krige(z ~ loc(x, y) + x + y + x^2 + x*y + y^2, data = topo, covfun = gauss.cov, range = 1, sill = 600, nugget = 100) topo.kr3 <- krige(z ~ loc(x, y), data = topo, covfun = gauss.cov, range = 2, sill = 6500, nugget = 100) topo.var <- variogram(z ~ loc(x, y), data=topo) model.variogram(topo.var, gauss.vgram, range=2, sill=6500, nugget=100, ask=F) topo.cov <- covariogram(z ~ loc(x, y), data=topo) model.variogram(topo.cov, gauss.cov, range=2, sill=4000, nugget=2000, ask=F) topo.ls <- krige(z ~ loc(x, y) + x + y + x^2 + x*y + y^2, data=topo, covfun=exp.cov, range=0) topo.res <- residuals(topo.ls) topo.var <- variogram(topo.res ~ loc(x, y), data=topo) model.variogram(topo.var, exp.vgram, range=1, sill=1000, ask=F) topo.var <- covariogram(topo.res ~ loc(x, y), data=topo) model.variogram(topo.var, gauss.cov, range=1, sill=210, nugget=90, ask=F) plot(variogram(z ~ loc(x, y), data=topo, azimuth = c(0, 90), tol.azimuth = 45), aspect=0.7, layout=c(2,1)) plot(variogram(topo.res ~ loc(x, y), data=topo, azimuth = c(0, 90), tol.azimuth = 45), aspect=0.7, layout=c(2,1)) library(spatial) # our library, for next line only. pines <- data.frame(ppinit("pines.dat")[c("x", "y")]) pines <- spp(pines, "x", "y", bbox(c(0,9.6), c(0, 10)), drop=T) attributes(pines) par(pty = "s", mfrow=c(2,2)) plot(pines, boundary = T) Lhat(pines, maxdist = 5) Lenv(pines, 25, process = "binomial", maxdist=5) Lhat(pines, maxdist =1.5) Lenv(pines, 100, process = "Strauss", maxdist = 1.5, cpar = 0.2, radius = 0.7) } # end of stat.q