###################################################################
### ch10.r ###
### ###
### This is an R script for producing examples in ###
### Chapter 10 of ###
### Snijders, Tom A.B., and Bosker, Roel J. ###
### Multilevel Analysis: ###
### An Introduction to Basic and Advanced Multilevel Modeling, ###
### second edition. ###
### London etc.: Sage Publishers, 2012 ###
### ###
### version April 4, 2014 ###
### ###
###################################################################
# Read data
mlbook_red <- read.table("mlbook2_r.dat", header=TRUE)
# what are the names
names(mlbook_red)
# Note that school means can be calculated by functions such as
# schmeans <- aggregate(mlbook_red, by = list(mlbook_red$schoolnr), mean)
# But this is not necessary here because the school means
# are already in the data set.
library(nlme)
###################################################################
### ###
### Within - group OLS residuals ###
### ###
###################################################################
# First we estimate Model 1 of Table 8.1.
summary(mod1 <-
lme(langPOST ~ IQ_verb*ses + sex + sch_iqv*sch_ses,
random = ~ IQ_verb|schoolnr,
data = mlbook_red, method="ML"))
VarCorr(mod1)
# Next we calculate OLS regressions per school class.
# To be able to use the compareFits function,
# we do keep the school-level variables, although they have
# no sense for the OLS regressions.
olselist <- lmList(langPOST ~ IQ_verb*ses + sex + sch_iqv*sch_ses| schoolnr, data=mlbook_red)
# The compareFits function compares the OLS estimates with the
# posterior means of the multilevel model.
# We do this only for the level-1 effects.
comp.models <- compareFits(coef(olselist), coef(mod1), c(1,2,3,4,7))
plot(comp.models)
# The plots shows the much greater variability of the
# OLS estimates.
# We see that there are a few OLS outliers.
# For further use, we wish to have the within-group residual d.f. available
# First see how olselist is structured:
str(olselist, 1)
# Apparently it is a list of results for each of the groups.
str(olselist[[1]], 1)
# This shows that we can get get the within-group residual d.f. as follows:
df_res <- sapply(olselist, function(x){x$df.residual})
table(df_res)
# To study the outliers for, e.g., the effect of sex,
# the plot of comp.models shows that the value -10 may be used to
# separate outliers from the rest. These schools are flagged by
low.sex.effect <- which(comp.models[,1,"sex"] < -10)
# Are these particularly small groups?
df_res[low.sex.effect]
# Yes indeed, they are. This may well account for
# the deviating effects of sex.
# The regression results for these groups are listed through
comp.models[low.sex.effect,,]
# The first group (labeled 10) has particularly deviant results,
# but it has df_res = 0,
# so the precision is indeed expected to be very poor.
# The other schools also have so small df_res,
# that these outliers seem not to be very bothering.
# Further down we continue with the study of the OLS residuals.
# Example 10.1
# Make a selection of the schools with d.f. at least 10
use <- df_res >= 10
# The number of such schools is
sum(use)
# Now we need to get the within-group residual standard deviations
# To see how we get this, first study
?summary.lm
# and search for the word "variance".
# For the first group, residual s.d. is
summary(olselist[[1]])$sigma
# This shows that we can compute
sigma2_res <- sapply(olselist, function(x){(summary(x)$sigma)^2})
# Make a plot of residual variance against residual d.f.:
plot(df_res, sigma2_res)
# There are some outliers for the very low residual d.f.
# Formula (10.3)
ls_tot <- sum((df_res*log(sigma2_res))[use])/sum(df_res[use])
d <- (sqrt(0.5*df_res))*(log(sigma2_res) - ls_tot)
(H <- sum((d^2)[use]))
# The associated p-value is
1-pchisq(H, sum(use)-1)
qqnorm(d)
# Find the outliers
outliers <- which(abs(d) > 3)
df_res[outliers]
d[outliers]
# Example 10.2
# The within-group OLS residuals are obtained by
resi <- residuals(olselist)
# We illustrate only the dependence of langPOST on IQ_verb.
# First let us plot the raw variables
with(mlbook_red,plot(IQ_verb,langPOST))
# This is not optimally informative, because there are many plot symbols
# that represent many cases, and the number (multiplicity) of cases
# is not visualized.
# To visualize this multiplicity, we may use
with(mlbook_red,plot(jitter(IQ_verb),jitter(langPOST)))
with(mlbook_red,lines(lowess(IQ_verb,langPOST, f=.2), lwd=3))
# This figure suggests that for IQ_verb roughly between -1 and +1,
# the slope is higher than outside of this interval.
# However, these plots do not control for the other variables,
# so this is not something that leads directly to
# modification of the model.
# Therefore we now make similar plots for the residuals.
with(mlbook_red,plot(jitter(IQ_verb),jitter(resi)))
with(mlbook_red,lines(lowess(IQ_verb,resi, f=.2), lwd=3))
# It's a bit black there in the middle!
# As on pages 121-123 and 164, we include two quadratic spline functions:
temp1 <- mlbook_red$IQ_verb^2
temp2 <- (abs(mlbook_red$IQ_verb))*mlbook_red$IQ_verb
iq2.plus <- 0.5*(temp1+temp2)
iq2.min <- 0.5*(temp1-temp2)
# As a check:
plot(mlbook_red$IQ_verb, iq2.plus)
plot(mlbook_red$IQ_verb, iq2.min)
# Now we estimate the model to which the two spline effects are added.
# The number of iterations must be allowed to be larger than the default;
# hence the "control" parameter.
summary(mod5 <-
lme(langPOST ~ IQ_verb*ses + sex + sch_iqv*sch_ses
+ iq2.plus + iq2.min,
random = ~ IQ_verb|schoolnr,
data = mlbook_red, method="ML",
control=list(msMaxIter=100)))
# Compute the within-group OLS residuals:
mlbook_plus <- cbind(mlbook_red,iq2.min,iq2.plus)
olselist5 <- lmList(langPOST ~ IQ_verb*ses + sex +
iq2.plus + iq2.min | schoolnr, data=mlbook_plus)
resi5 <- residuals(olselist5)
with(mlbook_red,plot(jitter(IQ_verb),jitter(resi5)))
with(mlbook_red,lines(lowess(IQ_verb,resi5, f=.2), lwd=3))
# Even though it still is black in the middle,
# the lowess curve looks pretty flat indeed.
# Now we wish to study standardized within-group OLS residuals.
# We do this using "by", if necessary look up the help function
# ?by
# to understand the following construction.
# First define a function that gives the studentized OLS residuals
# for this model in a given data frame:
res_st5 <- function(x){
rstudent(lm(langPOST ~ IQ_verb*ses + sex + iq2.plus + iq2.min,
data=x))
}
# Compute within-school studentized OLS residuals
resi_st <- by(mlbook_plus, mlbook_red$schoolnr, res_st5)
# Find out the structure of this object, and try to understand
# why we use the following transformation
rs <- unlist(resi_st)
# Some of the residuals are NA or exactly 0.
sum(is.na(rs))
sum(rs[!is.na(rs)]==0)
# This is because of the presence of small groups.
# However, it is unclear why unit 1250, with schoolnr=94,
# has a NaN standardized residual.
# Perfect collinearity of the other elements in this group???
# We leave this question aside.
# To make a QQ plot, we use the