Particle Filter

The model is described by the following equations, for all \(t\geq 0\) \[X_0 \sim \mathcal{N}(0, 1), \quad X_{t+1} \sim \mathcal{N}(\rho X_{t}, 1)\] \[Y_t \sim \mathcal{N}(X_t, 1)\] We first generate \(100\) data points from the model, with \(\rho = 0.95\).

rho = 0.95
datalength <- 100
Y <- rep(0, datalength)
X <- rep(0, datalength)
X[1] <- rnorm(1, mean = 0, sd = 1)
Y[1] <- rnorm(1, mean = X[1], sd = 1)
for (t in 2:datalength) {
    X[t] <- rnorm(1, mean = rho * X[t - 1], sd = 1)
    Y[t] <- rnorm(1, mean = X[t], sd = 1)
}

The model is linear and Gaussian, hence we can use a Kalman filter to calculate the filtering means \(\mathbb{E}[X_t \mid Y_{0:t}]\) for all \(t\). The package FKF can be used as follows.

library(FKF)
## Loading required package: RUnit
kf.res <- fkf(a0 = 0, P0 = matrix(1), dt = matrix(0), ct = matrix(0), Tt = matrix(rho), 
    Zt = matrix(1), HHt = matrix(1), GGt = matrix(1), yt = matrix(Y, nrow = 1), 
    check.input = TRUE)
kalman.means <- data.frame(mean = as.numeric(kf.res$att), time = 1:datalength)

Now that we have a “ground truth” to compare our results with, let’s turn to Sequential Importance Sampling. To implement the method we need a sequence of proposal distributions. We use the model distributions as proposal, which is called the “prior proposal” in the lecture notes. The initial distribution is called \(\mu\), the transition \(f\) and the measurement \(g\).

rmu <- function(n) rnorm(n, mean = 0, sd = 1)
rf <- function(x) rnorm(length(x), mean = rho * x, sd = 1)
dg <- function(x, y) dnorm(y, mean = x, sd = 1, log = TRUE)

We will use the following function to normalize the weights: that is, from a vector \((\log w_i)_{i=1}^N\), create the vector \((W_i)_{i=1}^N\) where \(W_i = w_i / \sum_{j=1}^N w_j\). There is a trick: removing the maximum from the log weights, which significantly increases numerical stability.

normalize_weight <- function(logweights) {
    weights <- exp(logweights - max(logweights))
    weights <- weights/sum(weights)
    return(weights)
}

Sequential Importance Sampling itself keeps proposing new particles using the proposal distribution and computing the new incremental weights. Here we are only interested in the filtering means.

SIS <- function(N, datalength) {
    SIS_means <- rep(0, datalength)  # filtering means
    # step 1
    xparticles <- rmu(N)
    logweights <- dg(xparticles, Y[1])
    weights <- normalize_weight(logweights)
    SIS_means[1] <- sum(weights * xparticles)
    # step t > 1
    for (time in 2:datalength) {
        xparticles <- rf(xparticles)
        logweights <- logweights + dg(xparticles, Y[time])
        weights <- normalize_weight(logweights)
        SIS_means[time] <- sum(weights * xparticles)
    }
    return(SIS_means)
}

Launch SIS with various number of particles N, \(100\) times independently. The results are stored in a big data frame.

nrepeats <- 100
Ns <- c(128, 256, 512, 1024)
library(foreach)
sis.df <- foreach(index_repeat = 1:nrepeats, .combine = rbind) %:% foreach(N = Ns, 
    .combine = rbind) %do% {
    SIS_means <- SIS(N, datalength)
    data.frame(time = 1:datalength, SIS_means = SIS_means, index_repeat = index_repeat, 
        N = N)
}

We want to compare the results with ones obtained with the Kalman filter. To do so, we merge the data frames by time indices. Then we use dplyr to compute the mean squared error for each choice of N and each time. The results are then plotted in a grid.

sis.df <- merge(sis.df, kalman.means, by = "time")
sis.df$index_repeat <- factor(sis.df$index_repeat)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
MSE.df <- sis.df %>% mutate(SE = (SIS_means - mean)^2) %>% group_by(time, N) %>% 
    summarize(MSE = mean(SE))
ggplot(MSE.df, aes(x = time, y = MSE)) + geom_line() + facet_grid(N ~ .)

We see that the Mean Squared Error tends to increase over time. Indeed, one particle ends up having a normalized weight of nearly 1, while the other particles have a weight close to zero. In the following graphs, we can see that increasing the number of particles helps at time 1 but not at time 100, where there still is only one particle with all the weight. We would need way more particles to see an improvement at time 100. What is plotted is \(\vert \widehat{\mathbb{E}}[X_t \mid Y_{0:t}] - \mathbb{E}[X_t \mid Y_{0:t}] \vert\) for each of the \(100\) independent experiments and each choice of \(N\).

library(dplyr)
ggplot(sis.df %>% mutate(abs.err = abs(SIS_means - mean)) %>% filter(time == 
    1), aes(x = N, y = abs.err, colour = index_repeat)) + geom_point() + scale_x_log10(breaks = Ns) + 
    theme(legend.position = "none") + ylab("errors at time 1")

ggplot(sis.df %>% mutate(abs.err = abs(SIS_means - mean)) %>% filter(time == 
    100), aes(x = N, y = abs.err, colour = index_repeat)) + geom_point() + scale_x_log10(breaks = Ns) + 
    theme(legend.position = "none") + ylab("errors at time 100")

Now we add the resampling step. For this we use the multinomial resampling scheme, implemented as follows.

multinomial_resampling <- function(normalized_weights) {
    N <- length(normalized_weights)
    return(sample(x = 1:N, size = N, prob = normalized_weights, replace = TRUE))
}

The resampling function takes a vector of normalized weights and returns a vector of ancestors. The particle filter is similar to sequential importance sampling, except for the extra resampling steps.

SMC <- function(N, datalength) {
    SMC_means <- rep(0, datalength)  # filtering means
    # step 1
    xparticles <- rmu(N)
    logweights <- dg(xparticles, Y[1])
    weights <- normalize_weight(logweights)
    SMC_means[1] <- sum(weights * xparticles)
    # resampling
    xparticles <- xparticles[multinomial_resampling(weights)]
    # step t > 1
    for (time in 2:datalength) {
        xparticles <- rf(xparticles)
        logweights <- dg(xparticles, Y[time])
        weights <- normalize_weight(logweights)
        SMC_means[time] <- sum(weights * xparticles)
        # resampling
        xparticles <- xparticles[multinomial_resampling(weights)]
    }
    return(list(SMC_means = SMC_means))
}

Launch SMC with various number of particles N, \(100\) times independently. The results are stored in a big data frame.

library(dplyr)
smc.df <- foreach(index_repeat = 1:nrepeats, .combine = rbind) %:% foreach(N = Ns, 
    .combine = rbind) %do% {
    SMC_results <- SMC(N, datalength)
    data.frame(time = 1:datalength, SMC_means = SMC_results$SMC_means, index_repeat = index_repeat, 
        N = N)
}
smc.df <- merge(smc.df, kalman.means, by = "time")
smc.df$index_repeat <- factor(smc.df$index_repeat)
MSE.df <- smc.df %>% mutate(SE = (SMC_means - mean)^2) %>% group_by(time, N) %>% 
    summarize(MSE = mean(SE))
ggplot(MSE.df, aes(x = time, y = MSE)) + geom_line() + facet_grid(N ~ .)