#############
## Sheet 1 ##
#############
#### question 8
x1 <- rnorm(100)
qqnorm(x1)
x2 <- rexp(100)
qqnorm(x2)
x3 <- runif(100)
qqnorm(x3)
x4 <- rt(100, df = 1)
qqnorm(x4)
#### question 9
# to see all four plots at once,
# i.e. to arrange the plots in a 2 x 2 array,
# use par(mfrow = c(2, 2)) and then the qqnorm commands
par(mfrow = c(2, 2))
# from now on plots will be in a 2 x 2 array
x1 <- rnorm(100)
qqnorm(x1, main = "Normal Q-Q plot: normal data")
x2 <- rexp(100)
qqnorm(x2, main = "Normal Q-Q plot: exponential data")
x3 <- runif(100)
qqnorm(x3, main = "Normal Q-Q plot: uniform data")
x4 <- rt(100, df = 1)
qqnorm(x4, main = "Normal Q-Q plot: Cauchy data")
# to get back to a 1 x 1 array of plots you would use
# par(mfrow = c(1, 1))
# try multiple plots to see how much variation there is
# from one sample to another
# normal data, n = 100, try running this a few times
for (i in 1:4) {
x <- rnorm(100)
qqnorm(x)
}
# and repeat but with x <- rexp(100)
# and with x <- runif(100)
# and with x <- rt(100, df = 1)
# next, vary the sample size
# normal data, n = 10
for (i in 1:4) {
x <- rnorm(10)
qqnorm(x)
}
# useful to also try n = 20, 50
# useful to also try exponential data (using rexp),
# and uniform data (using runif),
# and Cauchy, or t, data (using rt)
# e.g. uniform distribution, n = 20
for (i in 1:4) {
x <- runif(20)
qqnorm(x)
}
# can also look at t-distributions with different numbers
# of degrees of freedom
# e.g. t-distribution with 5 dgrees of freedom, n = 10
for (i in 1:4) {
x <- rt(10, df = 5)
qqnorm(x)
}
#### question 10
n <- 100
x <- rnorm(n)
k <- 1:n
plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot",
ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]")
# now try replacing x <- rnorm(n) by x <- rexp(n)
x <- rexp(n)
plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot",
ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]")
# are interarrival times exponential?
# exponential Q-Q plot with data on insurance claim interarrival times
x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/interarrivals.txt")
n <- length(x)
k <- 1:n
plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot",
ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]")
# are claim amounts exponential?
# exponential Q-Q plot with data on insurance claim amounts
x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/amounts.txt")
n <- length(x)
k <- 1:n
plot(-log(1 - k/(n+1)), sort(x), main = "Exponential Q-Q Plot",
ylab = "Ordered data", xlab = "-log[1 - k/(n+1)]")
# are interarrival times Pareto?
# Pareto Q-Q plot for interarrival times
x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/interarrivals.txt")
n <- length(x)
k <- 1:n
plot(-log(1 - k/(n+1)), sort(log(x)),
main = "Pareto Q-Q Plot: interarrivals",
ylab = "log(Ordered data)", xlab = "-log[1 - k/(n+1)]")
# are claim amounts Pareto?
# Pareto Q-Q plot for claim amounts
x <- scan("http://www.stats.ox.ac.uk/~laws/partA-stats/data/amounts.txt")
n <- length(x)
k <- 1:n
plot(-log(1 - k/(n+1)), sort(log(x)),
main = "Pareto Q-Q Plot: amounts",
ylab = "log(Ordered data)", xlab = "-log[1 - k/(n+1)]")