echo 1
note macro dinfl_2.obe.
note This is a MLwiN macro for calculating deletion influence statistics
note for level-two units.
note It is based on Snijders and Berkhof (2008, Section 3),
note and implements the methods also explained in
note Section 10.7 of Snijders and Bosker (2011).
note Predicted values and covariance matrices are determined
note after taking one iteration note step of the estimation algorithm
note for data from which group j was deleted.
note This is a good approximation to deletion diagnostics.
note Further, level-one standardized residuals are calculated
note based on level-two unit deletion.
note This macro presupposes a two-level model,
note and the random part may be arbitrary.
note It is also assumed that the usual unit column "cons" exists.
note The columns c177-c209 and also c301 and higher will be used.
note Any information contained in these columns will be overwritten!!!
note The macro contains also the possibility to calculate
note parameter-wise diagnostics. These are now `commented away'
note by starting the lines with "note !!".
echo 0
batch 1
pref 0
post 0
maxi 5000
pref 0
post 0
start
echo 1
fixed
rand
like
echo 0
note Prepare groups g6 and g8 of the size = number of fixed effects.
note Put all explanatory variables in group g5,
note and prepare group g10 of the same size;
note prepare g7, g9 of size = (total number of random part parameters);
note g5 is for storing the data,
note g6 and g7 are for the parameter-wise influence statistics,
note and g8 and g9 are for temporary use in calculating these.
note All explanatory variables in group g5:
link -1 g5
gsize g5 b11
nfixe b3
nrnd b6
note b3 = # variables with fixed effect
note b6 = number of parameters in random part
erase c209
note First g6 and g7 for output columns:
note !! calc b14 = 210 + b3 - 1
note !! link c210-cb14 g6
note !! erase c210-cb14
note !! calc b13 = b14 + 1
note !! calc b14 = b13 + b6 - 1
note !! link cb13-cb14 g7
note !! erase cb13-cb14
note Then g8, g9, g10 for temporary columns:
calc b14 = 300 + b6
link c301-cb14 g9
calc b13 = b14 + 1
calc b14 = b13 + b3 - 1
link cb13-cb14 g8
calc b13 = b14 + 1
calc b14 = b13 + b11 - 1
link cb13-cb14 g10
note store entire data
calc g10 = g5
yvar b25
calc c187 = cb25
mark 0 c1096-c1099
calc c196 = c1096
calc c197 = c1097
calc c198 = c1098
calc c199 = c1099
note Make unit-2 identifier c181 with consecutive numbers 1...b4
note and store group identifiers in c204, group sizes in c182 and c205.
idco 1 b17
idco 2 b18
calc c193 = cb17
calc c194 = cb18
mlre "cons" cb18 c181
mlco cb18 c182
take cb18 c182 c204 c205
nunit 2 b4
note prepare output columns:
put b4 0 c201
put b4 0 c202
put b4 0 c203
echo 1
note Number of iterations will be (one for each group)
print b4
echo 0
batch 0
itnu 1 1
note Cycle through all groups
loop b1 1 b4
say .
note b1 = group number
pick b1 c205 b5
note b5 = number of cases in this group
pick b1 c204 b8
note b8 = group identifier in original model
note prepare data omitting group number b1
omit b1 c181 c193 c194 g10 c187 c180 cb17 cb18 g5 cb25
note only one step:
next
note calculate estimated covariance matrices;
note in INFL.OBE / RESS2.OBE, this is done earlier, after estimation
note for the complete data.
imat b6 c183
calc c183 = 0.000001*c183 + sym(c1097)
calc c191 = diag(c183)
calc c191 = 1/sqrt(c191)
calc c183 = inv(c183)
imat b3 c186
calc c186 = 0.000001*c186 + sym(c1099)
calc c192 = diag(c186)
calc c192 = 1/sqrt(c192)
calc c186 = inv(c186)
note Now c183 is the inverse estimated covariance matrix of the
note parameter estimates of the random part,
note c186 is the inverse estimated covariance matrix of the
note estimated fixed effects.
note c191 is 1/(standard errors of random part parameters),
note c192 is 1/(standard errors of fixed part parameters).
note The 0.000001*imat's are added to avoid non-p.s.d. matrices.
note influence on random part:
calc c188 = c196 - c1096
calc c179 = (~c188)*.c183*.c188
pick 1 c179 b12
edit b1 c201 b12
note influence per parameter:
note !! calc c188 = c188*c191
note !! transpose 1 c188 g9
note !! append g7 g9 g7
note influence on fixed part:
calc c188 = c198 - c1098
calc c179 = (~c188)*.c186*.c188
pick 1 c179 b12
edit b1 c202 b12
note influence per parameter:
note !! calc c188 = c188*c192
note !! transpose 1 c188 g8
note !! append g6 g8 g6
note standardized level-two residual:
note First restore total data (including group b1)
calc cb17 = c193
calc cb18 = c194
calc cb25 = c187
calc g5 = g10
note put covariance matrix of responses in c189
note and level-one residuals in c177.
note Since c189 might be non-invertible, 0.000001*imat is added again.
yres b1 c177
vmat b1 c189
imat b5 c185
calc c189 = 0.000001*c185 + c189
calc c185 = diag(c189)
calc c185 = c177/sqrt(c185)
note c185 are level-1 standardized group-deletion residuals
calc c178 = (~c177)*.inv(c189)*.c177
pick 1 c178 b21
note b21 is standardized residual (c178 is 1x1 matrix is column of length 1)
edit b1 c203 b21
append c209 c185 c209
note reset parameter estimates for full model:
calc c1096 = c196
calc c1097 = c197
calc c1098 = c198
calc c1099 = c199
endloop
say \n
note restore model
calc cb25 = c187
calc cb17 = c193
calc cb18 = c194
calc g5 = g10
note scale and calculate diagnostics
calc b7 = b3 + b6
calc c200 = (c201 + c202)/b7
calc c201 = c201/b6
calc c202 = c202/b3
cpro c203 c205 c206
note Before calculating the normal deviates of c206,
note these values must be truncated to avoid arithmetic errors.
calc c177 = 0*c206 + 1e-8
calc c178 = 0*c206 + 0.9999999
max c206 c177 c206
min c206 c178 c206
ned c206 c207
nsco c207 c208
note sum of chi squares
sum c203 b13
sum c205 b15
cpro b13 b15 b16
note clean up
erase c177-c189 c191-c194 c196-c199
erase c301-cb14
erase c90
note c90 is used by MLwiN for storing successive deviance values
note and will get very long if you don't delete it.
link 0 g5
link 0 g8
link 0 g9
link 0 g10
mark 1 c1096-c1099
note !! echo 1
note !! From c210 onward, a set of diagnostic columns:
note !! first columns with influence statistics (cF_hj)
note !! for each fixed parameter (group g6)
note !! then columns with influence statistics (cR_hj)
note !! for each random part parameter (group g7).
note output omnibus test:
echo 1
note Omnibus chi-squared statistic with degrees of freedom and p-value:
print b13 b15 b16
name c200 'C_j' c201 'CR_j' c202 'CF_j'
name c203 'S2_j' c204 'unit2_j' c205 'n_j' c209 'res_1'
name c206 'p-val' c207 'N_Res' c208 'NR_Exp'
note Plot of influence on fixed (horizontal) and random (vertical) parts:
plot c201 c202
note Plot of influence as function of group identifier:
plot c200 c204
note Plot of level-1 residuals as function of group identifier:
plot c209 cb18
note Plot of influence as function of group sizes:
plot c200 c205
note Influence as function of significance of group residuals:
plot c200 c206
note Probability plot for level-two residuals:
plot c207 c208
note c204 is group identifier (unit2_j)
note c205 is group size (n_j)
note Groupwise influence statistics:
note c201 = influence on random part parameters (CR_j)
note c202 = influence on fixed parameters (CF_j)
note c200 = combined influence diagnostic (C_j)
note c203 = standardized multivariate residual (S2_j)
note (c203 approximately chi squared, d.f. in c205)
note c206 = p-values for c203
note c207 = observed normal deviates for c206
note c208 = expected normal deviates for c206
note Level-one influence statistics:
note c209 = standardized level-1 group-deletion residuals (res_1)