#Monte-Carlo example, Prussian Horse-kick fatality O<-c(109,65,22,3,1,0) n<-sum(O) #step 1 compute the MLE and Pearson stat on the original data mu<-sum((0:5)*O)/n pr0<-dpois(0:4,lambda=mu) (pr0<-c(pr0,1-sum(pr0))) E<-n*pr0 (P<-sum( (O-E)^2/E )) (1-pchisq(P,4)) #Monte Carlo test #auxiliary function to count O's MyCount<-function(y) { my<-max(y) O<-rep(0,my+1) for (j in 0:my) {O[j+1]<-O[j+1]+sum(y==j)} c(O,0) } y<-c(1,3,2,0,0,0,1,1,2,0); MyCount(y) M<-10000 Pm<-rep(0,M) for (m in 1:M) { #Step 2 - simulate synthetic data ym<-rpois(n=n,lambda=mu) #count the number in each cell from 0 to 4 and >=5 Om<-MyCount(ym) #step 3 compute the MLE and Pearson stat for the synthetic data mum<-mean(ym) #MLE pr0m<-dpois(0:max(ym),lambda=mum) pr0m<-c(pr0m,1-sum(pr0m)) Em<-n*pr0m Pm[m]<-sum( (Om-Em)^2/Em ) } #step 4 estimate the p-value (p<-mean(Pm>P)) (p.std<-sqrt(p*(1-p)/M)) ###################################################### #KS test for Exp fit to Bat detection distance data #Step 1. Notice the data are already sorted y<-c(23,27,34,40,42,45,52,56,62,68,83) (mu<-mean(y)) n<-11 #Step 2. The cdf at the data, pexp(y,lambda) is the Exp(lambda) cdf F<-pexp(y,rate=1/mu) x<-seq(0,2*max(y),length.out=20) plot(x,pexp(x,rate=1/mu),type='l',xlab='data y',ylab='F(y), F_e(y)'); lines(c(0,y,1.5*max(y)),(0:(n+1))/n,type='s') windows() plot(ecdf(y),xlim=c(0,2*max(y))) lines(x,pexp(x,rate=1/mu),type='l',xlab='data y',ylab='F(y), F_e(y)',main='') #the empirical cdf is F_e(y_(i);y)=i/n so F_e-F at i=1...n is (1:n)/n-F #and F-F_e(y_(i-1)) at i=1...n is F-(0:(n-1))/n #the KS statistic d is the largest difference (d<-max(c((1:n)/n-F,F-(0:(n-1))/n))) M<-10000 d.sim<-rep(0,M) for (i in 1:M) { Ym<-sort(rexp(n,1/mu)) #Step 3 (each i=1...M) - and remember to sort! mum<-mean(Ym) #Step 4.i MLE of ith data set Fm<-pexp(Ym,rate=1/mum) d.sim[i]<-max(c((1:n)/n-Fm,Fm-(0:(n-1))/n)) #Step 4.ii KS for ith data } (p<-mean(d.sim>d)) #step 5 p-hat