From d87a5ffc15b127fca7db50e0861a2a8bfa5e56c7 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Tue, 19 Nov 2024 12:06:40 +0100
Subject: [PATCH 01/17] First attempt at flu pandemic example, add model and
 data, but no fitting yet

---
 2009-flu-uk.qmd                | 183 +++++++++++++++++++++++++++++++++
 _quarto.yml                    |   1 +
 data/andre_estimates_21_02.txt |  38 +++++++
 3 files changed, 222 insertions(+)
 create mode 100644 2009-flu-uk.qmd
 create mode 100644 data/andre_estimates_21_02.txt

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
new file mode 100644
index 0000000..56d8144
--- /dev/null
+++ b/2009-flu-uk.qmd
@@ -0,0 +1,183 @@
+# Modelling the 2009 UK flu pandemic
+
+```{r}
+#| include: false
+source("common.R")
+```
+
+```{r, echo=FALSE}
+r_output <- function(path, highlight = NULL) {
+  if (is.null(highlight)) {
+    prefix <- "```r"
+  } else {
+    prefix <- sprintf('```{.r code-line-numbers="%s"}', highlight)
+  }
+  writeLines(c(prefix, readLines(path), "```"))
+}
+set.seed(1) # always the same
+knitr::knit_hooks$set(small_margins = function(before, options, envir) {
+  if (before) {
+    par(mar = c(4, 4, .1, .1))
+  }
+})
+```
+
+This chapter explores the use of `odin` and `monty` to model the dynamics of the 2009 A/H1N1 influenza pandemic in England and Wales incorporating changes in social contacts during holiday periods. We demonstrate how to construct, fit, and analyse a compartmental model to infer key epidemiological parameters.
+
+## Overview of the Pipeline
+
+This chapter illustrates how to:
+
+1. Construct an SEIR model with time-varying transmission using `odin`.
+2. Implement an observation model, derive a likelihood based on observations using `odin`.
+3. Run Bayesian inference using Markov chain Monte Carlo (MCMC) with `monty`.
+4. Analyse and interpret MCMC results to estimate parameters such as:
+   - The basic reproduction number $R_0$.  
+   - The ascertainment probability, i.e., the proportion of symptomatic infections reported as cases.
+
+```{r}
+library(odin2)
+library(dust2)
+```
+
+## Data Preparation
+
+The dataset consists of weekly case estimates, stratified by age group. For simplicity, we aggregate the data into a single time series for the analysis.
+
+## Loading data
+
+Data is the number of weekly cases of A/H1N1 pandemic cases from June 2009, and initially broken in seven age-groups. The data file is taken from the supplement material in [@endo_introduction_2019] and can be found [here](https://github.com/akira-endo/Intro-PMCMC/blob/master/assets/andre_estimates_21_02.txt).
+
+We will fit our model to all cases aggregated so we need to load our data, add the age-group, and then build a dataframe (a table where each rows is an observations of certains variables in the columns) with 'Days' and 'cases'. We use the `dplyr` package, a popular R package for data handling, with the '%>%' ('pipe') operator but other methods are possible.  
+
+```{r}
+library(dplyr)
+
+# Read the data
+original_data <- read.csv(file = "data/andre_estimates_21_02.txt", sep = "\t") %>% rowSums()
+
+# Group all age groups
+data <- data.frame(Cases = original_data) %>%
+  mutate(Days = seq(7, by = 7, length.out = n())) %>%
+  dplyr::select(Days, Cases)
+```
+
+```{r, echo=FALSE}
+q_n <- 1
+```
+
+## Plotting the data
+
+You can plot the data and see the two waves of infections and the presence of holiday periods.
+```{r}
+#plot the data
+plot(data$Day, data$Cases, pch = 19, col = "red",
+     xlab="Days since start of epidemics",
+     ylab="Official cases count")
+
+# Add the holiday periods - summer holidays and half-term
+# Holiday calendar assuming 30% contact reduction during summer and 15% during half term
+hol_t <- c(0,38,85,140,147)
+hol_v <- c(1,.70,1,.85,1)
+#summer holiday block
+rect(xleft = hol_t[2],ybottom = 0, xright = hol_t[3], ytop = max(data$Cases)*1.2, col = "#aaaabb33", border = NA)
+#half-term holiday block
+rect(xleft = hol_t[4],ybottom = 0, xright = hol_t[5], ytop = max(data$Cases)*1.2, col = "#aaaabb33", border = NA)
+```
+
+## Understanding the model
+
+The SEIR model divides the population into four compartments:
+
+- **S**usceptible: Individuals at risk of infection.  
+- **E**xposed: Infected individuals who are not yet infectious.  
+- **I**nfectious: Actively transmitting the disease.  
+- **R**ecovered: Immune individuals.  
+
+The model incorporates time-varying transmission rates to account for changes in contact patterns during holidays. Using `odin`, we define this system of differential equations:
+
+## Compiling the model
+
+The model needs to be transpiled (e.g. translated from one programming language to another) in order to create some C++ code. That code is then compiled and loaded to create a "fast" model that can be called from the R environment. 
+
+```{r, echo = TRUE}
+seir <- odin2::odin({
+# initial conditions
+initial(S) <- (1 - 2 * alpha) * N
+initial(E) <- alpha*N
+initial(I) <- alpha*N
+initial(R) <- 0
+initial(incidence, zero_every = 7) <- 0
+
+# equations
+deriv(S) <- - hol * beta * S * I / N
+deriv(E) <- hol * beta * S * I / N - gamma * E
+deriv(I) <- gamma * E - sigma * I
+deriv(R) <- sigma * I
+deriv(incidence) <- gamma * E
+
+# parameter values
+R_0 <- parameter(1.5)
+L <- 1
+D <- 1.25
+alpha <- parameter(1e-4) # initial proportion
+N <- 55000000
+
+# convert parameters
+hol <- interpolate(h_times, h_values, "constant")
+h_times <- parameter()
+h_values <- parameter()
+dim(h_times) <- parameter(rank=1)
+dim(h_values) <- length(h_times)
+gamma <- 1 / L
+sigma <- 1 / D
+beta <- R_0 * sigma})
+```
+
+## Running the model
+
+Once the model is compiled, it is possible to generate one (or more!) instance of your model by using the `dust_system_create()` and your model generator.
+
+```{r}
+#mod <- seir$new(h_times = hol_t, h_values = hol_v)
+mod <- dust_system_create(seir,
+                          pars = list(h_times = hol_t, h_values = hol_v))
+```
+
+```{r}
+# A vector with parameter values
+# First parameter -> proportion of population initially infected
+# Second argument -> R_0
+# Third argument -> Proportion of infection detected 
+par0 <- c(5e-5, 1.3, 0.1)
+
+# This vector should be passed as a list
+dust_system_update_pars(sys = mod,
+                        pars = list(alpha=par0[1],
+                                    R_0=par0[2],
+                                    h_times = hol_t,
+                                    h_values= hol_v))
+```
+
+Then the model can be run by specifying the time points between which to integrate the ODEs. Note that the first point (here 0) set the inital time of the integration. Once the time vector t is defined we can run the model by using the "$run(t)" method.
+```{r}
+t <- c(0,data$Days)
+dust_system_set_state_initial(mod)
+y <- dust_system_simulate(mod, t)
+```
+
+Let's plot the I compartment over time.
+
+```{r}
+plot(t, dust_unpack_state(mod, y)$incidence, type = "l", col = "red", lwd = 2)
+```
+
+It looks a bit like the actual epidemic; a good sign, but let compare our model with the data.
+
+# Part 2. Observation model, likelihood and prior distributions
+
+We now have a dataset and a working model, all loaded in our R environment. We need to link the output of this model with the data in order to derive a likelihood function. As we are working within a Bayesian framework we also need to define the prior distribution for our parameters. 
+
+## Cumulative incidence and observed cases
+
+The fundamental component of our model if the 'I' compartment tracking continuously the number of infectious people in our population (an infection prevalence). However, what we observe is a fraction of the cumulative incidence, more precisely the number of new cases in a period of 7 days (a week). In our `odin` model we track the cumulative incidence, and we have simulated it using 7 days intervals. We can thus write a simple function giving the cumulative incidence between two days:
diff --git a/_quarto.yml b/_quarto.yml
index 7b55dd7..0b04d93 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -25,6 +25,7 @@ book:
       chapters:
       - inference.qmd
       - differentiability.qmd
+      - 2009-flu-uk.qmd
     - references.qmd
   appendices:
     - installation.qmd
diff --git a/data/andre_estimates_21_02.txt b/data/andre_estimates_21_02.txt
new file mode 100644
index 0000000..8fd1157
--- /dev/null
+++ b/data/andre_estimates_21_02.txt
@@ -0,0 +1,38 @@
+1	3	30	42	42	9	2
+4	13	133	161	184	47	8
+16	55	850	479	464	159	20
+58	374	3806	1617	1475	373	42
+138	619	6748	2909	2391	630	83
+245	1302	9965	5220	5199	1524	185
+1125	5128	32217	17649	15897	4680	531
+1325	5223	22269	18436	17795	4595	492
+220	2721	9253	11989	13468	2622	207
+105	1401	4765	8232	8709	1670	116
+42	506	1935	4087	4143	1050	66
+22	247	987	2136	2129	841	48
+12	123	534	1315	1450	637	49
+7	164	927	2256	2040	658	32
+10	205	2122	2676	2236	694	75
+18	269	3575	2456	2431	1045	49
+29	401	5294	3431	3261	1544	71
+33	806	8567	6521	6265	1993	95
+74	1094	10662	8251	7726	2735	148
+151	2168	20452	11872	11388	3946	233
+258	4110	30913	14966	16874	6583	270
+363	4255	20205	12929	19952	8612	500
+262	2922	13160	11259	16817	7636	488
+227	2973	16423	8477	13376	6799	413
+240	2670	14900	5416	9326	5321	290
+226	1864	8405	3271	5400	3092	210
+215	1300	4819	2172	4067	2025	139
+154	930	3756	1748	3216	1784	138
+105	644	2363	1313	2430	1457	123
+69	366	1018	727	1351	1031	69
+39	163	378	529	1066	1100	82
+44	156	434	715	1472	1283	78
+28	99	325	421	888	690	36
+26	117	325	308	628	246	13
+23	97	307	202	394	66	3
+14	56	201	150	307	25	1
+10	41	102	75	156	7	0
+7	16	37	34	90	0	0
\ No newline at end of file

From c82ea8c6c1bedcc307b6e5d8d72d0e6e1807ede3 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Tue, 19 Nov 2024 14:04:21 +0100
Subject: [PATCH 02/17] Intent adding comparison model

---
 2009-flu-uk.qmd | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 56d8144..4b386ad 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -131,7 +131,12 @@ dim(h_times) <- parameter(rank=1)
 dim(h_values) <- length(h_times)
 gamma <- 1 / L
 sigma <- 1 / D
-beta <- R_0 * sigma})
+beta <- R_0 * sigma
+
+# observation model
+cases <- data()
+cases ~ Poisson(incidence)
+})
 ```
 
 ## Running the model
@@ -181,3 +186,12 @@ We now have a dataset and a working model, all loaded in our R environment. We n
 ## Cumulative incidence and observed cases
 
 The fundamental component of our model if the 'I' compartment tracking continuously the number of infectious people in our population (an infection prevalence). However, what we observe is a fraction of the cumulative incidence, more precisely the number of new cases in a period of 7 days (a week). In our `odin` model we track the cumulative incidence, and we have simulated it using 7 days intervals. We can thus write a simple function giving the cumulative incidence between two days:
+
+```{r}
+data$time <- data$Days
+filter <- dust_unfilter_create(seir, data = data, time_start = 0)
+dust_likelihood_run(filter, list(alpha=par0[1],
+                                    R_0=par0[2],
+                                    h_times = hol_t,
+                                    h_values= hol_v))
+```

From bc0d93fecdfd9a2f6d405feabd3b06a9de4eb909 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Mon, 25 Nov 2024 11:21:59 +0000
Subject: [PATCH 03/17] First attempt at UK pandemic pipeline

---
 2009-flu-uk.qmd | 83 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 8cd52f9..3bfc0f1 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -38,6 +38,7 @@ This chapter illustrates how to:
 ```{r}
 library(odin2)
 library(dust2)
+library(monty)
 ```
 
 ## Data Preparation
@@ -130,8 +131,9 @@ sigma <- 1 / D
 beta <- R_0 * sigma
 
 # observation model
-cases <- data()
-cases ~ Poisson(incidence)
+rho <- parameter(0.1)
+Cases <- data()
+Cases ~ Poisson(rho*incidence)
 })
 ```
 
@@ -175,6 +177,11 @@ plot(t, dust_unpack_state(mod, y)$incidence, type = "l", col = "red", lwd = 2)
 
 It looks a bit like the actual epidemic; a good sign, but let compare our model with the data.
 
+```{r}
+plot(t, dust_unpack_state(mod, y)$incidence, type = "l", col = "red", lwd = 2)
+points(data$Days, data$Cases)
+```
+
 # Part 2. Observation model, likelihood and prior distributions
 
 We now have a dataset and a working model, all loaded in our R environment. We need to link the output of this model with the data in order to derive a likelihood function. As we are working within a Bayesian framework we also need to define the prior distribution for our parameters. 
@@ -183,6 +190,10 @@ We now have a dataset and a working model, all loaded in our R environment. We n
 
 The fundamental component of our model if the 'I' compartment tracking continuously the number of infectious people in our population (an infection prevalence). However, what we observe is a fraction of the cumulative incidence, more precisely the number of new cases in a period of 7 days (a week). In our `odin` model we track the cumulative incidence, and we have simulated it using 7 days intervals. We can thus write a simple function giving the cumulative incidence between two days:
 
+## Setting up the likelihood
+
+The likelihood is a function that takes a parameter as argument and return the probability density that our model (transmission model + observation) generates exactly the observed data. This number is usually very small as it is the product of each individual observation density given the model. As opposed to the example in the lecture, we thus always work using the log-density values rather than the density. Products then become sums and divisions become differences.
+
 ```{r}
 data$time <- data$Days
 filter <- dust_unfilter_create(seir, data = data, time_start = 0)
@@ -191,3 +202,71 @@ dust_likelihood_run(filter, list(alpha=par0[1],
                                     h_times = hol_t,
                                     h_values= hol_v))
 ```
+
+This is the likelihood associated with the dust model written using odin. We know need to move this into the monty statistical world to be able to integrate this into our pipeline. For this use the `packer` concept that allows us to definne a one-to-one translation between the two worlds.
+
+```{r}
+packer <- monty_packer(c("alpha", "R_0", "rho"),
+                       fixed = list(h_times = hol_t,h_values= hol_v))
+```
+
+This defines two functions that allows to 'pack' (define) and 'unpack' (define) our parameters.
+
+Now building the `monty` model for the likelihood of the model is very easy:
+
+```{r}
+likelihood <- dust_likelihood_monty(filter, packer)
+```
+
+## Setting up the prior distribution
+
+We similarly set up a log_prior function. Note that we use the function to exclude impossible values such as proportion below 0 or above 1. This is to avoid to get logdensity of -infinity given that these values would have a density value of 0 (=impossible).
+
+The other ingredient we need is a prior. This we can construct with monty_dsl as before, note that the names are matching the parameters from the likelihood as it should be.
+
+```{r}
+prior <- monty_dsl({
+  alpha ~ Beta(a = 4e-4, b = 2)
+  R_0 ~ Gamma(2, 0.7)
+  rho ~ Uniform(0, 1)
+})
+```
+
+## Setting up the posterior
+
+Our posterior is the product of the likelihood and prior, or the sum of their logs:
+
+```{r}
+posterior <- likelihood + prior
+```
+
+Next, we define a sampler; we'll start with a random walk with a fairly arbitrary diagonal proposal matrix:
+
+```{r}
+vcv <- diag(c(2e-8,2e-4,1e-4))
+sampler <- monty_sampler_random_walk(vcv)
+```
+
+We start this off, using explicit initial conditions
+
+```{r}
+#| cache: true
+init_values <- packer$pack(list(alpha=par0[1],
+                  R_0=par0[2],
+                  rho=0.1,
+                  h_times = hol_t,
+                  h_values= hol_v))
+samples <- monty_sample(posterior, sampler, 10000, initial = init_values,
+                        n_chains = 3)
+```
+
+::: {.callout-note}
+We have used explicit initial conditions here, which might not be what you want in all situations.  Better might be to sample from the prior, but we have not yet implemented support to try a few points from the sample before getting a point with finite density, which is really needed here.
+:::
+
+Here the log posterior density of our three chains over time, showing a rapid improvement in the posterior probability density followed by what might be reasonable (but not great) mixing:
+
+```{r}
+matplot(samples$density, type = "l", lty = 1,
+        xlab = "Sample", ylab = "Log posterior probability density")
+```

From d19f845c1eab353cd73aacdeee76a795d9886d30 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Mon, 25 Nov 2024 23:09:30 +0000
Subject: [PATCH 04/17] *testing values

---
 2009-flu-uk.qmd | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 3bfc0f1..d977c8d 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -243,7 +243,7 @@ posterior <- likelihood + prior
 Next, we define a sampler; we'll start with a random walk with a fairly arbitrary diagonal proposal matrix:
 
 ```{r}
-vcv <- diag(c(2e-8,2e-4,1e-4))
+vcv <- diag(c(2e-8,2e-4,1e-4)*0.1)
 sampler <- monty_sampler_random_walk(vcv)
 ```
 
@@ -256,6 +256,7 @@ init_values <- packer$pack(list(alpha=par0[1],
                   rho=0.1,
                   h_times = hol_t,
                   h_values= hol_v))
+init_values <- c(3e-5,1.35,0.05)
 samples <- monty_sample(posterior, sampler, 10000, initial = init_values,
                         n_chains = 3)
 ```
@@ -270,3 +271,12 @@ Here the log posterior density of our three chains over time, showing a rapid im
 matplot(samples$density, type = "l", lty = 1,
         xlab = "Sample", ylab = "Log posterior probability density")
 ```
+```{r}
+samples_df <- posterior::as_draws_df(samples)
+posterior::summarise_draws(samples_df)
+```
+
+```{r}
+bayesplot::mcmc_pairs(samples_df)
+```
+

From 8336a8bef37f1e770695f91a4e62893c430c21b4 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Mon, 25 Nov 2024 23:10:49 +0000
Subject: [PATCH 05/17] Testing values

---
 2009-flu-uk.qmd | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index d977c8d..6bcc454 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -243,7 +243,8 @@ posterior <- likelihood + prior
 Next, we define a sampler; we'll start with a random walk with a fairly arbitrary diagonal proposal matrix:
 
 ```{r}
-vcv <- diag(c(2e-8,2e-4,1e-4)*0.1)
+vcv <- diag(c(5e-10,5e-5,1e-5))*0.0001
+#vcv <- matrix(c(4.3e-12,-1.2e-8,3.6e-9,-1.2e-8,4.9e-5,-1e-5,3.6e-9,-1e-5,1.1e-5), nrow = 3)*0.1
 sampler <- monty_sampler_random_walk(vcv)
 ```
 
@@ -256,8 +257,8 @@ init_values <- packer$pack(list(alpha=par0[1],
                   rho=0.1,
                   h_times = hol_t,
                   h_values= hol_v))
-init_values <- c(3e-5,1.35,0.05)
-samples <- monty_sample(posterior, sampler, 10000, initial = init_values,
+init_values <- c(3e-5,1.35,0.04)
+samples <- monty_sample(posterior, sampler, 100000, initial = init_values,
                         n_chains = 3)
 ```
 

From dfa97c6c94ef1f9ee829dcf8500b747d60ff6ec2 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Tue, 26 Nov 2024 05:25:44 +0000
Subject: [PATCH 06/17] Update value for mcmc

---
 2009-flu-uk.qmd | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 6bcc454..c183dca 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -243,8 +243,8 @@ posterior <- likelihood + prior
 Next, we define a sampler; we'll start with a random walk with a fairly arbitrary diagonal proposal matrix:
 
 ```{r}
-vcv <- diag(c(5e-10,5e-5,1e-5))*0.0001
-#vcv <- matrix(c(4.3e-12,-1.2e-8,3.6e-9,-1.2e-8,4.9e-5,-1e-5,3.6e-9,-1e-5,1.1e-5), nrow = 3)*0.1
+#vcv <- diag(c(5e-10,5e-5,1e-5))*0.0001
+vcv <- matrix(c(2.5e-14,-1.04e-10,5.6e-12,-1.04e-10,4.9e-07,-2.5e-8,5.6e-12,-2.5e-8,3.5e-9), nrow = 3)
 sampler <- monty_sampler_random_walk(vcv)
 ```
 
@@ -257,8 +257,8 @@ init_values <- packer$pack(list(alpha=par0[1],
                   rho=0.1,
                   h_times = hol_t,
                   h_values= hol_v))
-init_values <- c(3e-5,1.35,0.04)
-samples <- monty_sample(posterior, sampler, 100000, initial = init_values,
+init_values <- c(3.05e-5,1.354,0.04)
+samples <- monty_sample(posterior, sampler, 10000, initial = init_values,
                         n_chains = 3)
 ```
 
@@ -271,6 +271,7 @@ Here the log posterior density of our three chains over time, showing a rapid im
 ```{r}
 matplot(samples$density, type = "l", lty = 1,
         xlab = "Sample", ylab = "Log posterior probability density")
+length(unique(samples$density))/length(samples$density)
 ```
 ```{r}
 samples_df <- posterior::as_draws_df(samples)

From c2e45c7e07d9c9864ca2ebdfa4a437de7db23181 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Tue, 26 Nov 2024 12:03:17 +0000
Subject: [PATCH 07/17] Add adaptive sampler

---
 2009-flu-uk.qmd | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index c183dca..3a1c787 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -243,9 +243,8 @@ posterior <- likelihood + prior
 Next, we define a sampler; we'll start with a random walk with a fairly arbitrary diagonal proposal matrix:
 
 ```{r}
-#vcv <- diag(c(5e-10,5e-5,1e-5))*0.0001
-vcv <- matrix(c(2.5e-14,-1.04e-10,5.6e-12,-1.04e-10,4.9e-07,-2.5e-8,5.6e-12,-2.5e-8,3.5e-9), nrow = 3)
-sampler <- monty_sampler_random_walk(vcv)
+vcv <- diag(c(5e-10,5e-5,1e-5))
+sampler <- monty_sampler_adaptive(vcv, initial_vcv_weight = 100)
 ```
 
 We start this off, using explicit initial conditions

From 3c9649338886a4d7ae008563880fa187fd636261 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Tue, 26 Nov 2024 14:54:32 +0000
Subject: [PATCH 08/17] *Pull 1000 parameters and simulate from them

---
 2009-flu-uk.qmd | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 3a1c787..cc342b0 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -281,3 +281,23 @@ posterior::summarise_draws(samples_df)
 bayesplot::mcmc_pairs(samples_df)
 ```
 
+Finally we can get an idea of the fit of the samples by looking at 100 trajectories from the samples.
+
+```{r}
+n_groups <- 1000
+i_rand <- sample(dim(samples$pars)[2],n_groups)
+list_par <- lapply(i_rand, FUN = function(i) packer$unpack(samples$pars[,i,sample(3,1)]))
+
+mod_mult <- dust_system_create(seir,
+                          pars = list_par, n_groups = n_groups)
+
+t <- c(0,data$Days)
+dust_system_set_state_initial(mod_mult)
+y <- dust_system_simulate(mod_mult, t)
+y <- dust_unpack_state(mod_mult, y)
+
+matplot(t, t(y$incidence), type = "l", lty = 1, col = "#00000044",
+        xlab = "Time", ylab = "Infected population")
+points(data$Days, data$Cases/mean(samples$pars["rho",,]), pch=19, col="red")
+```
+

From 283b86cecd2847cdf7d565eda2522b4e0ad04465 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Wed, 27 Nov 2024 12:01:37 +0000
Subject: [PATCH 09/17] Add example of direct sampling from the prior

---
 2009-flu-uk.qmd | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index cc342b0..fc20f04 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -232,6 +232,20 @@ prior <- monty_dsl({
 })
 ```
 
+Let's inspect the our prior `monty` model
+```{r}
+prior
+```
+
+As it has been built using the monty DSL, it comes with gradient calculation, direct sampling (so no need to use one of the Monte Carlo algorithm) and accepts multiple parameters.
+
+Let's try to generate samples and check that they match our prior distribution:
+```{r}
+r <- monty_rng$new(n_streams = 3)
+monty_model_direct_sample(prior, r)
+```
+
+
 ## Setting up the posterior
 
 Our posterior is the product of the likelihood and prior, or the sum of their logs:

From 656f860fd3ddba86c8ed0d764471fd3ef17c11e2 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Wed, 27 Nov 2024 12:08:22 +0000
Subject: [PATCH 10/17] More readible output format for direct samples

---
 2009-flu-uk.qmd | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index fc20f04..fdfe563 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -241,8 +241,9 @@ As it has been built using the monty DSL, it comes with gradient calculation, di
 
 Let's try to generate samples and check that they match our prior distribution:
 ```{r}
-r <- monty_rng$new(n_streams = 3)
-monty_model_direct_sample(prior, r)
+n_streams <- 10
+r <- monty_rng$new(n_streams = n_streams)
+prior_samples <- matrix(monty_model_direct_sample(prior, r), nrow = n_streams)
 ```
 
 

From 457c1e481aeda4a447b5a0c8444b698dc61190ef Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Wed, 27 Nov 2024 14:26:23 +0000
Subject: [PATCH 11/17] * Replace Poisson observation model by Negative
 Binomial * Fit overdispersion as part of the fitting pipeline

---
 2009-flu-uk.qmd | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index fdfe563..60cc09a 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -101,8 +101,8 @@ The model needs to be transpiled (e.g. translated from one programming language
 seir <- odin2::odin({
 # initial conditions
 initial(S) <- (1 - 2 * alpha) * N
-initial(E) <- alpha*N
-initial(I) <- alpha*N
+initial(E) <- alpha * N
+initial(I) <- alpha * N
 initial(R) <- 0
 initial(incidence, zero_every = 7) <- 0
 
@@ -132,8 +132,9 @@ beta <- R_0 * sigma
 
 # observation model
 rho <- parameter(0.1)
+eta <- parameter(10)
 Cases <- data()
-Cases ~ Poisson(rho*incidence)
+Cases ~ NegativeBinomial(size = eta, mu = rho*incidence)
 })
 ```
 
@@ -152,7 +153,7 @@ mod <- dust_system_create(seir,
 # First parameter -> proportion of population initially infected
 # Second argument -> R_0
 # Third argument -> Proportion of infection detected 
-par0 <- c(5e-5, 1.3, 0.1)
+par0 <- c(5e-5, 1.3, 0.1, 10)
 
 # This vector should be passed as a list
 dust_system_update_pars(sys = mod,
@@ -206,7 +207,7 @@ dust_likelihood_run(filter, list(alpha=par0[1],
 This is the likelihood associated with the dust model written using odin. We know need to move this into the monty statistical world to be able to integrate this into our pipeline. For this use the `packer` concept that allows us to definne a one-to-one translation between the two worlds.
 
 ```{r}
-packer <- monty_packer(c("alpha", "R_0", "rho"),
+packer <- monty_packer(c("alpha", "R_0", "rho", "eta"),
                        fixed = list(h_times = hol_t,h_values= hol_v))
 ```
 
@@ -229,6 +230,7 @@ prior <- monty_dsl({
   alpha ~ Beta(a = 4e-4, b = 2)
   R_0 ~ Gamma(2, 0.7)
   rho ~ Uniform(0, 1)
+  eta ~ Exponential(mean = 1000)
 })
 ```
 
@@ -241,11 +243,17 @@ As it has been built using the monty DSL, it comes with gradient calculation, di
 
 Let's try to generate samples and check that they match our prior distribution:
 ```{r}
-n_streams <- 10
+n_streams <- 10000
 r <- monty_rng$new(n_streams = n_streams)
 prior_samples <- matrix(monty_model_direct_sample(prior, r), nrow = n_streams)
+colnames(prior_samples) <- prior$parameters
 ```
 
+```{r}
+bayesplot::mcmc_pairs(prior_samples)
+```
+
+
 
 ## Setting up the posterior
 
@@ -258,7 +266,7 @@ posterior <- likelihood + prior
 Next, we define a sampler; we'll start with a random walk with a fairly arbitrary diagonal proposal matrix:
 
 ```{r}
-vcv <- diag(c(5e-10,5e-5,1e-5))
+vcv <- diag(c(5e-10,5e-5,1e-5,1))
 sampler <- monty_sampler_adaptive(vcv, initial_vcv_weight = 100)
 ```
 
@@ -269,9 +277,10 @@ We start this off, using explicit initial conditions
 init_values <- packer$pack(list(alpha=par0[1],
                   R_0=par0[2],
                   rho=0.1,
+                  eta=10,
                   h_times = hol_t,
                   h_values= hol_v))
-init_values <- c(3.05e-5,1.354,0.04)
+init_values <- c(3.05e-5,1.354,0.04, 10)
 samples <- monty_sample(posterior, sampler, 10000, initial = init_values,
                         n_chains = 3)
 ```

From 60c8fa374ec2888900af7ed44faaf301f63be70a Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Wed, 27 Nov 2024 18:41:02 +0000
Subject: [PATCH 12/17] Restructuring and adding some text

---
 2009-flu-uk.qmd | 113 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 88 insertions(+), 25 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 60cc09a..952b275 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -21,17 +21,26 @@ knitr::knit_hooks$set(small_margins = function(before, options, envir) {
   }
 })
 ```
+## Introduction
 
-This chapter explores the use of `odin` and `monty` to model the dynamics of the 2009 A/H1N1 influenza pandemic in England and Wales incorporating changes in social contacts during holiday periods. We demonstrate how to construct, fit, and analyse a compartmental model to infer key epidemiological parameters.
+### Objective of this chapter
 
-## Overview of the Pipeline
+The 2009 A/H1N1 influenza pandemic posed a significant public health challenge in England and Wales, requiring rapid and evidence-based decision-making to mitigate its impact. This chapter explores the use of `odin` and `monty` to model the dynamics of the 2009 A/H1N1 influenza pandemic in England and Wales incorporating changes in social contacts during holiday periods. We demonstrate how to construct, fit, and analyse a compartmental model to infer key epidemiological parameters.
+
+The chapter is structured to guide the reader through the entire modelling pipeline, from data preparation and visualisation to model construction, parameter inference, and validation. Along the way, we highlight the integration of real-world data, discuss challenges in linking models to observed cases, and showcase the use of Bayesian methods for robust parameter estimation.
+
+### Overview of the pipeline
 
 This chapter illustrates how to:
 
 1. Construct an SEIR model with time-varying transmission using `odin`.
 2. Implement an observation model, derive a likelihood based on observations using `odin`.
 3. Run Bayesian inference using Markov chain Monte Carlo (MCMC) with `monty`.
-4. Analyse and interpret MCMC results to estimate parameters such as:
+4. Analyse and interpret MCMC results.
+
+### Key parameters of interest
+
+
    - The basic reproduction number $R_0$.  
    - The ascertainment probability, i.e., the proportion of symptomatic infections reported as cases.
 
@@ -41,13 +50,13 @@ library(dust2)
 library(monty)
 ```
 
-## Data Preparation
+## Data preparation and visualisation
 
-The dataset consists of weekly case estimates, stratified by age group. For simplicity, we aggregate the data into a single time series for the analysis.
+Data is the number of weekly cases of A/H1N1 pandemic cases from June 2009, and initially broken in seven age-groups. For simplicity, we aggregate the data into a single time series for the analysis.
 
-## Loading data
+### Loading data
 
-Data is the number of weekly cases of A/H1N1 pandemic cases from June 2009, and initially broken in seven age-groups. The data file is taken from the supplement material in [@endo_introduction_2019] and can be found [here](https://github.com/akira-endo/Intro-PMCMC/blob/master/assets/andre_estimates_21_02.txt).
+The data file is taken from the supplement material in [@endo_introduction_2019] and can be found [here](https://github.com/akira-endo/Intro-PMCMC/blob/master/assets/andre_estimates_21_02.txt).
 
 We will fit our model to all cases aggregated so we need to load our data, add the age-group, and then build a dataframe (a table where each rows is an observations of certains variables in the columns) with 'Days' and 'cases'. We use the `dplyr` package, a popular R package for data handling, with the '%>%' ('pipe') operator but other methods are possible.  
 
@@ -63,7 +72,7 @@ data <- data.frame(Cases = original_data) %>%
   dplyr::select(Days, Cases)
 ```
 
-## Plotting the data
+### Plotting the data
 
 You can plot the data and see the two waves of infections and the presence of holiday periods.
 ```{r}
@@ -81,21 +90,79 @@ rect(xleft = hol_t[2],ybottom = 0, xright = hol_t[3], ytop = max(data$Cases)*1.2
 #half-term holiday block
 rect(xleft = hol_t[4],ybottom = 0, xright = hol_t[5], ytop = max(data$Cases)*1.2, col = "#aaaabb33", border = NA)
 ```
+## Model design
 
-## Understanding the model
+### Transmission model
 
-The SEIR model divides the population into four compartments:
+Similarly to the SIR model, we saw already, the SEIR model divides the population into four compartments:
 
 - **S**usceptible: Individuals at risk of infection.  
 - **E**xposed: Infected individuals who are not yet infectious.  
 - **I**nfectious: Actively transmitting the disease.  
 - **R**ecovered: Immune individuals.  
 
-The model incorporates time-varying transmission rates to account for changes in contact patterns during holidays. Using `odin`, we define this system of differential equations:
+Additionally, the model incorporates time-varying transmission rates to account for changes in contact patterns during holidays.
+
+The SEIR model is governed by the following system of ordinary differential equations:
+
+$$
+\begin{aligned}
+    \frac{dS}{dt} &= - \beta(t) \frac{S I}{N} \\
+    \frac{dE}{dt} &= \beta(t) \frac{S I}{N} - \gamma E \\
+    \frac{dI}{dt} &= \gamma - \sigma I \\
+    \frac{dR}{dt} &= \sigma I
+\end{aligned}
+$$
+
+The parameters for the transmission model are defined as follows:
+$$
+\begin{aligned}
+    R_0 &\text{ (basic reproduction number): } R_0 = 1.5, \\
+    \gamma &\text{ (rate of transition from exposed to infectious): } \gamma = \frac{1}{L}, \\
+    \sigma &\text{ (recovery rate): } \sigma = \frac{1}{D}, \\
+    \beta &\text{ (transmission rate): } \beta = R_0 \cdot \sigma, \\
+    N &\text{ (total population): } N = 55,000,000, \\
+    \text{hol} &\text{ (time-varying factor): interpolated from } h_{\text{times}} \text{ and } h_{\text{values}}.
+\end{aligned}
+$$
+
+The initial conditions are:
+$$
+\begin{aligned}
+    S(0) &= (1 - 2 \alpha) N \\
+    E(0) &= \alpha N \\
+    I(0) &= \alpha N \\
+    R(0) &= 0 \\
+\end{aligned}
+$$
+
+### Observation model
+
+The fundamental component of our transmission model is the 'I' compartment tracking continuously the number of infectious people in our population (an infection prevalence). However, what we observe through our surveillance system is a fraction of the cumulative incidence, more precisely an estimate of the number of new cases in a period of 7 days (a week).
+
+In our `odin` model we can track the weekly cumulative incidence $Z(t)$ by:
 
-## Compiling the model
+1. adding a differential equation integrating the instantaneous incidence:
+$$\frac{dZ}{dt} = \gamma E$$
 
-The model needs to be transpiled (e.g. translated from one programming language to another) in order to create some C++ code. That code is then compiled and loaded to create a "fast" model that can be called from the R environment. 
+2. reseting $Z(t) = 0$ at the beginning of each week.
+
+Then we get for each $t$ that is a multiple of 7:
+
+$$Z(t) = \int_{t-7}^{t} \gamma E$$
+i.e. the cumulative incidence over a week.
+
+Finally, the observation model is defined as:
+$$
+\text{Cases(t)} \sim \text{NegativeBinomial}(\mu = \rho Z(t), \text{size} = \eta),
+$$
+where \(\rho\) is the ascertainment probability, and $\eta$ is the dispersion parameter.
+
+### Odin code of the model
+
+Below is the `odin` code for our model - the structure matches closely the mathematical formulation. Note the `Cases <- data()` lines telling that `Cases` are observations. The model will compile and work even when it is not linked with any dataset. However providing data is essentiel to compute a likelihood with `dust` or derive a `monty` statistical model out of it.
+
+This step takes time. The model needs to be transpiled (e.g. translated from one programming language to another) in order to create some C++ code and then that code is compiled and loaded in the R environnment. After this, a "fast" model can be called from the R environment and used to simulate scenarios or perform inference. 
 
 ```{r, echo = TRUE}
 seir <- odin2::odin({
@@ -138,7 +205,7 @@ Cases ~ NegativeBinomial(size = eta, mu = rho*incidence)
 })
 ```
 
-## Running the model
+### Running the model
 
 Once the model is compiled, it is possible to generate one (or more!) instance of your model by using the `dust_system_create()` and your model generator.
 
@@ -183,15 +250,11 @@ plot(t, dust_unpack_state(mod, y)$incidence, type = "l", col = "red", lwd = 2)
 points(data$Days, data$Cases)
 ```
 
-# Part 2. Observation model, likelihood and prior distributions
+## The MCMC pipeline
 
 We now have a dataset and a working model, all loaded in our R environment. We need to link the output of this model with the data in order to derive a likelihood function. As we are working within a Bayesian framework we also need to define the prior distribution for our parameters. 
 
-## Cumulative incidence and observed cases
-
-The fundamental component of our model if the 'I' compartment tracking continuously the number of infectious people in our population (an infection prevalence). However, what we observe is a fraction of the cumulative incidence, more precisely the number of new cases in a period of 7 days (a week). In our `odin` model we track the cumulative incidence, and we have simulated it using 7 days intervals. We can thus write a simple function giving the cumulative incidence between two days:
-
-## Setting up the likelihood
+### Setting up the likelihood
 
 The likelihood is a function that takes a parameter as argument and return the probability density that our model (transmission model + observation) generates exactly the observed data. This number is usually very small as it is the product of each individual observation density given the model. As opposed to the example in the lecture, we thus always work using the log-density values rather than the density. Products then become sums and divisions become differences.
 
@@ -219,7 +282,7 @@ Now building the `monty` model for the likelihood of the model is very easy:
 likelihood <- dust_likelihood_monty(filter, packer)
 ```
 
-## Setting up the prior distribution
+### Setting up the prior distribution
 
 We similarly set up a log_prior function. Note that we use the function to exclude impossible values such as proportion below 0 or above 1. This is to avoid to get logdensity of -infinity given that these values would have a density value of 0 (=impossible).
 
@@ -253,9 +316,7 @@ colnames(prior_samples) <- prior$parameters
 bayesplot::mcmc_pairs(prior_samples)
 ```
 
-
-
-## Setting up the posterior
+### Setting up the posterior
 
 Our posterior is the product of the likelihood and prior, or the sum of their logs:
 
@@ -263,6 +324,8 @@ Our posterior is the product of the likelihood and prior, or the sum of their lo
 posterior <- likelihood + prior
 ```
 
+### Chosing the sampler
+
 Next, we define a sampler; we'll start with a random walk with a fairly arbitrary diagonal proposal matrix:
 
 ```{r}
@@ -320,7 +383,7 @@ dust_system_set_state_initial(mod_mult)
 y <- dust_system_simulate(mod_mult, t)
 y <- dust_unpack_state(mod_mult, y)
 
-matplot(t, t(y$incidence), type = "l", lty = 1, col = "#00000044",
+matplot(t, t(y$incidence), type = "l", lty = 1, col = "#00008822",
         xlab = "Time", ylab = "Infected population")
 points(data$Days, data$Cases/mean(samples$pars["rho",,]), pch=19, col="red")
 ```

From caf6cbb366aa3cd553c66be5e5d489e7f0e214eb Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Wed, 27 Nov 2024 23:51:01 +0000
Subject: [PATCH 13/17] Amend text

---
 2009-flu-uk.qmd | 66 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 17 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 952b275..299ebcb 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -75,21 +75,44 @@ data <- data.frame(Cases = original_data) %>%
 ### Plotting the data
 
 You can plot the data and see the two waves of infections and the presence of holiday periods.
+
 ```{r}
-#plot the data
-plot(data$Day, data$Cases, pch = 19, col = "red",
-     xlab="Days since start of epidemics",
-     ylab="Official cases count")
-
-# Add the holiday periods - summer holidays and half-term
-# Holiday calendar assuming 30% contact reduction during summer and 15% during half term
-hol_t <- c(0,38,85,140,147)
-hol_v <- c(1,.70,1,.85,1)
-#summer holiday block
-rect(xleft = hol_t[2],ybottom = 0, xright = hol_t[3], ytop = max(data$Cases)*1.2, col = "#aaaabb33", border = NA)
-#half-term holiday block
-rect(xleft = hol_t[4],ybottom = 0, xright = hol_t[5], ytop = max(data$Cases)*1.2, col = "#aaaabb33", border = NA)
+# Set up the plot
+plot(data$Day, data$Cases, pch = 19, col = "red", 
+     xlab = "Days since start of epidemics",
+     ylab = "Official cases count",
+     main = "Cases over Time with Holiday Periods",
+     ylim = c(0, max(data$Cases) * 1.3), # Add padding to the y-axis
+     xlim = c(min(data$Day), max(data$Day)))
+
+# Add the holiday periods
+hol_t <- c(0, 38, 85, 140, 147) # Holiday timings
+hol_v <- c(1, 0.70, 1, 0.85, 1) # Contact reduction
+
+# Summer holidays
+rect(xleft = hol_t[2], ybottom = 0, xright = hol_t[3], 
+     ytop = max(data$Cases) * 1.2, 
+     col = adjustcolor("skyblue", alpha.f = 0.3), border = NA)
+text(x = mean(hol_t[2:3]), y = max(data$Cases) * 1.25, 
+     labels = "Summer Holiday", col = "blue", cex = 0.8)
+
+# Half-term holiday
+rect(xleft = hol_t[4], ybottom = 0, xright = hol_t[5], 
+     ytop = max(data$Cases) * 1.2, 
+     col = adjustcolor("lightgreen", alpha.f = 0.3), border = NA)
+text(x = mean(hol_t[4:5]), y = max(data$Cases) * 1.25, 
+     labels = "Half-Term", col = "darkgreen", cex = 0.8)
+
+# Legend
+legend("topright", legend = c("Cases", "Summer Holiday", "Half-Term"),
+       pch = c(19, NA, NA), col = c("red", "skyblue", "lightgreen"),
+       pt.bg = c(NA, "skyblue", "lightgreen"), 
+       lty = c(NA, 1, 1), lwd = c(NA, 10, 10), 
+       bg = "white", inset = 0.02, bty = "n")
+
 ```
+
+
 ## Model design
 
 ### Transmission model
@@ -106,14 +129,24 @@ Additionally, the model incorporates time-varying transmission rates to account
 The SEIR model is governed by the following system of ordinary differential equations:
 
 $$
+\left\{
 \begin{aligned}
     \frac{dS}{dt} &= - \beta(t) \frac{S I}{N} \\
     \frac{dE}{dt} &= \beta(t) \frac{S I}{N} - \gamma E \\
     \frac{dI}{dt} &= \gamma - \sigma I \\
     \frac{dR}{dt} &= \sigma I
 \end{aligned}
+\right.
 $$
 
+where:
+
+- $\beta(t)$ is the time-dependent transmission rate, representing the rate at which susceptible individuals become exposed through contact with infectious individuals.
+- $\gamma$ is the rate at which exposed individuals transition to the infectious stage (the inverse of the incubation period).
+- $\sigma$ is the recovery rate, indicating the rate at which infectious individuals recover or are removed from the infectious population.
+- $N$ is the total population size, assumed to remain constant.
+- $S(t)$, $E(t)$, $I(t)$, and $R(t)$ represent the number of susceptible, exposed, infectious, and recovered individuals, respectively, at time $t$. 
+
 The parameters for the transmission model are defined as follows:
 $$
 \begin{aligned}
@@ -149,14 +182,14 @@ $$\frac{dZ}{dt} = \gamma E$$
 
 Then we get for each $t$ that is a multiple of 7:
 
-$$Z(t) = \int_{t-7}^{t} \gamma E$$
+$$Z(t) = \int_{t-7}^{t} \gamma E(s) \ ds$$
 i.e. the cumulative incidence over a week.
 
 Finally, the observation model is defined as:
 $$
 \text{Cases(t)} \sim \text{NegativeBinomial}(\mu = \rho Z(t), \text{size} = \eta),
 $$
-where \(\rho\) is the ascertainment probability, and $\eta$ is the dispersion parameter.
+where $\rho$ is the ascertainment probability, and $\eta$ is the dispersion parameter.
 
 ### Odin code of the model
 
@@ -210,7 +243,6 @@ Cases ~ NegativeBinomial(size = eta, mu = rho*incidence)
 Once the model is compiled, it is possible to generate one (or more!) instance of your model by using the `dust_system_create()` and your model generator.
 
 ```{r}
-#mod <- seir$new(h_times = hol_t, h_values = hol_v)
 mod <- dust_system_create(seir,
                           pars = list(h_times = hol_t, h_values = hol_v))
 ```
@@ -271,7 +303,7 @@ This is the likelihood associated with the dust model written using odin. We kno
 
 ```{r}
 packer <- monty_packer(c("alpha", "R_0", "rho", "eta"),
-                       fixed = list(h_times = hol_t,h_values= hol_v))
+                       fixed = list(h_times = hol_t, h_values= hol_v))
 ```
 
 This defines two functions that allows to 'pack' (define) and 'unpack' (define) our parameters.

From 13fef496e130e178dc71099a995b039333587ed3 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Fri, 29 Nov 2024 11:13:21 +0000
Subject: [PATCH 14/17] Add text and restructure

---
 2009-flu-uk.qmd | 72 +++++++++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 299ebcb..9847194 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -33,22 +33,18 @@ The chapter is structured to guide the reader through the entire modelling pipel
 
 This chapter illustrates how to:
 
-1. Construct an SEIR model with time-varying transmission using `odin`.
-2. Implement an observation model, derive a likelihood based on observations using `odin`.
-3. Run Bayesian inference using Markov chain Monte Carlo (MCMC) with `monty`.
+1. Construct an SEIR transmission model with time-varying contact rate using `odin`.
+2. Implement a (probabilistic) observation model in `odin`
+3. Derive a likelihood based on observations using `dust` and convert it to `monty`.
+4. Built a prior distribution for you model parameters using the `monty` DSL.
+3. Run Bayesian inference using adaptive Markov chain Monte Carlo (MCMC) with `monty`.
 4. Analyse and interpret MCMC results.
 
 ### Key parameters of interest
 
-
-   - The basic reproduction number $R_0$.  
-   - The ascertainment probability, i.e., the proportion of symptomatic infections reported as cases.
-
-```{r}
-library(odin2)
-library(dust2)
-library(monty)
-```
+We are interested in particular in two epidemiological parameters:
+   - The basic reproduction number $R_0$ of the novel influenza strain;  
+   - The ascertainment probability, i.e., the proportion of total infections reported as cases.
 
 ## Data preparation and visualisation
 
@@ -147,19 +143,13 @@ where:
 - $N$ is the total population size, assumed to remain constant.
 - $S(t)$, $E(t)$, $I(t)$, and $R(t)$ represent the number of susceptible, exposed, infectious, and recovered individuals, respectively, at time $t$. 
 
-The parameters for the transmission model are defined as follows:
-$$
-\begin{aligned}
-    R_0 &\text{ (basic reproduction number): } R_0 = 1.5, \\
-    \gamma &\text{ (rate of transition from exposed to infectious): } \gamma = \frac{1}{L}, \\
-    \sigma &\text{ (recovery rate): } \sigma = \frac{1}{D}, \\
-    \beta &\text{ (transmission rate): } \beta = R_0 \cdot \sigma, \\
-    N &\text{ (total population): } N = 55,000,000, \\
-    \text{hol} &\text{ (time-varying factor): interpolated from } h_{\text{times}} \text{ and } h_{\text{values}}.
-\end{aligned}
-$$
+We assume that the contact rate is constant outside of the holiday period, and that it drops by 30% during the summer and 15% during the half-term. We have thus:
+
+$$\beta(t) = R_{0}*\sigma * h(t)$$
+
+with $h(t)$ a piece-wise constant function equal to 0.7 during the summer, 0.85 during the half-term and 1 otherwise.
 
-The initial conditions are:
+To seed the model we assume that a small proportion of the population is infected - equally shared between the E and I compartments. The initial conditions are thus:
 $$
 \begin{aligned}
     S(0) &= (1 - 2 \alpha) N \\
@@ -178,21 +168,45 @@ In our `odin` model we can track the weekly cumulative incidence $Z(t)$ by:
 1. adding a differential equation integrating the instantaneous incidence:
 $$\frac{dZ}{dt} = \gamma E$$
 
-2. reseting $Z(t) = 0$ at the beginning of each week.
+2. reseting $Z(t) = 0$ at the beginning of each week to only account for the new cases from the current week.
 
-Then we get for each $t$ that is a multiple of 7:
+Then we get for each $t$ that is a multiple of 7 (i.e. when we change week):
 
 $$Z(t) = \int_{t-7}^{t} \gamma E(s) \ ds$$
 i.e. the cumulative incidence over a week.
 
-Finally, the observation model is defined as:
+Finally, we need to account for the imperfection of the surveillance system, assuming that only a fraction $rho$ of the infections are captured by the surveillance system and accounting for potential overdispersion we define the following observation model using a Negative-Binomial distribution with mean $\mu = \rho Z(t)$ and size $\eta$:
 $$
 \text{Cases(t)} \sim \text{NegativeBinomial}(\mu = \rho Z(t), \text{size} = \eta),
 $$
 where $\rho$ is the ascertainment probability, and $\eta$ is the dispersion parameter.
 
+::: {.callout-note}
+Even if the surveillance system was perfect we would expect a difference between the number of *symptomatic* cases and the number of infections as a proportion of flu cases are *asymptomatic* or 'pauci-symptomatic'. 
+:::
+
+### Summary of model parameters
+
+The parameters for the transmission model are defined as follows:
+$$
+\begin{aligned}
+    R_0 &\text{ (basic reproduction number): } R_0 = 1.5, \\
+    \gamma &\text{ (rate of transition from exposed to infectious): } \gamma = \frac{1}{L}, \\
+    \sigma &\text{ (recovery rate): } \sigma = \frac{1}{D}, \\
+    \beta &\text{ (transmission rate): } \beta = R_0 \cdot \sigma, \\
+    N &\text{ (total population): } N = 55,000,000, \\
+    \text{hol} &\text{ (time-varying factor): interpolated from } h_{\text{times}} \text{ and } h_{\text{values}}.
+\end{aligned}
+$$
+
 ### Odin code of the model
 
+```{r}
+library(odin2)
+library(dust2)
+library(monty)
+```
+
 Below is the `odin` code for our model - the structure matches closely the mathematical formulation. Note the `Cases <- data()` lines telling that `Cases` are observations. The model will compile and work even when it is not linked with any dataset. However providing data is essentiel to compute a likelihood with `dust` or derive a `monty` statistical model out of it.
 
 This step takes time. The model needs to be transpiled (e.g. translated from one programming language to another) in order to create some C++ code and then that code is compiled and loaded in the R environnment. After this, a "fast" model can be called from the R environment and used to simulate scenarios or perform inference. 
@@ -275,10 +289,10 @@ Let's plot the I compartment over time.
 plot(t, dust_unpack_state(mod, y)$incidence, type = "l", col = "red", lwd = 2)
 ```
 
-It looks a bit like the actual epidemic; a good sign, but let compare our model with the data.
+It looks a bit like the actual epidemic; a good sign, but let's compare our model with the data.
 
 ```{r}
-plot(t, dust_unpack_state(mod, y)$incidence, type = "l", col = "red", lwd = 2)
+plot(t, dust_unpack_state(mod, y)$incidence*par0[3], type = "l", col = "red", lwd = 2)
 points(data$Days, data$Cases)
 ```
 

From e7c14cae076b6e568534864e57db7ceb04dfcfa2 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Sun, 1 Dec 2024 20:40:38 +0000
Subject: [PATCH 15/17] More amendments

---
 2009-flu-uk.qmd | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 9847194..8124473 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -23,11 +23,11 @@ knitr::knit_hooks$set(small_margins = function(before, options, envir) {
 ```
 ## Introduction
 
-### Objective of this chapter
-
 The 2009 A/H1N1 influenza pandemic posed a significant public health challenge in England and Wales, requiring rapid and evidence-based decision-making to mitigate its impact. This chapter explores the use of `odin` and `monty` to model the dynamics of the 2009 A/H1N1 influenza pandemic in England and Wales incorporating changes in social contacts during holiday periods. We demonstrate how to construct, fit, and analyse a compartmental model to infer key epidemiological parameters.
 
-The chapter is structured to guide the reader through the entire modelling pipeline, from data preparation and visualisation to model construction, parameter inference, and validation. Along the way, we highlight the integration of real-world data, discuss challenges in linking models to observed cases, and showcase the use of Bayesian methods for robust parameter estimation.
+### Objective of this chapter
+
+The chapter is structured to guide the reader through an entire modelling pipeline, from data preparation and visualisation to model construction, parameter inference, and validation. Along the way, we highlight the integration of real-world data, discuss challenges in linking models to observed cases, and showcase the use of Bayesian methods for robust parameter estimation.
 
 ### Overview of the pipeline
 
@@ -43,6 +43,7 @@ This chapter illustrates how to:
 ### Key parameters of interest
 
 We are interested in particular in two epidemiological parameters:
+
    - The basic reproduction number $R_0$ of the novel influenza strain;  
    - The ascertainment probability, i.e., the proportion of total infections reported as cases.
 
@@ -87,8 +88,9 @@ hol_v <- c(1, 0.70, 1, 0.85, 1) # Contact reduction
 
 # Summer holidays
 rect(xleft = hol_t[2], ybottom = 0, xright = hol_t[3], 
-     ytop = max(data$Cases) * 1.2, 
-     col = adjustcolor("skyblue", alpha.f = 0.3), border = NA)
+     ytop = max(data$Cases) * 1.2,
+     col = "#87CEEB77", border = NA)
+     #col = adjustcolor("skyblue", alpha.f = 0.3), border = NA)
 text(x = mean(hol_t[2:3]), y = max(data$Cases) * 1.25, 
      labels = "Summer Holiday", col = "blue", cex = 0.8)
 
@@ -101,8 +103,8 @@ text(x = mean(hol_t[4:5]), y = max(data$Cases) * 1.25,
 
 # Legend
 legend("topright", legend = c("Cases", "Summer Holiday", "Half-Term"),
-       pch = c(19, NA, NA), col = c("red", "skyblue", "lightgreen"),
-       pt.bg = c(NA, "skyblue", "lightgreen"), 
+       pch = c(19, NA, NA), col = c("red", "#87CEEB77", "lightgreen"),
+       fill = c(NA, "skyblue", "lightgreen"),
        lty = c(NA, 1, 1), lwd = c(NA, 10, 10), 
        bg = "white", inset = 0.02, bty = "n")
 

From a8097eb18cba245efc0fc2b86cdb724bd6c4fec4 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Mon, 2 Dec 2024 11:02:55 +0000
Subject: [PATCH 16/17] Some amendment and add reference

---
 2009-flu-uk.qmd | 47 +++++++++++++++++++++++------------------------
 references.bib  | 14 ++++++++++++++
 2 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/2009-flu-uk.qmd b/2009-flu-uk.qmd
index 8124473..eb3042d 100644
--- a/2009-flu-uk.qmd
+++ b/2009-flu-uk.qmd
@@ -33,12 +33,12 @@ The chapter is structured to guide the reader through an entire modelling pipeli
 
 This chapter illustrates how to:
 
-1. Construct an SEIR transmission model with time-varying contact rate using `odin`.
+1. Construct an SEIR transmission model with time-varying contact rate using `odin`
 2. Implement a (probabilistic) observation model in `odin`
-3. Derive a likelihood based on observations using `dust` and convert it to `monty`.
-4. Built a prior distribution for you model parameters using the `monty` DSL.
-3. Run Bayesian inference using adaptive Markov chain Monte Carlo (MCMC) with `monty`.
-4. Analyse and interpret MCMC results.
+3. Derive a likelihood based on observations using `dust` and convert it to `monty`
+4. Built a prior distribution for the model parameters using the `monty` DSL
+3. Run Bayesian inference using adaptive Markov chain Monte Carlo (MCMC) with `monty`
+4. Analyse and interpret the MCMC results.
 
 ### Key parameters of interest
 
@@ -49,7 +49,7 @@ We are interested in particular in two epidemiological parameters:
 
 ## Data preparation and visualisation
 
-Data is the number of weekly cases of A/H1N1 pandemic cases from June 2009, and initially broken in seven age-groups. For simplicity, we aggregate the data into a single time series for the analysis.
+Data is the number of weekly cases of A/H1N1 pandemic cases from June 2009, and initially broken in seven age-groups. For simplicity, we aggregate the data into a single time series for this analysis.
 
 ### Loading data
 
@@ -97,14 +97,13 @@ text(x = mean(hol_t[2:3]), y = max(data$Cases) * 1.25,
 # Half-term holiday
 rect(xleft = hol_t[4], ybottom = 0, xright = hol_t[5], 
      ytop = max(data$Cases) * 1.2, 
-     col = adjustcolor("lightgreen", alpha.f = 0.3), border = NA)
+     col = "#90EE9077", border = NA)
 text(x = mean(hol_t[4:5]), y = max(data$Cases) * 1.25, 
      labels = "Half-Term", col = "darkgreen", cex = 0.8)
 
 # Legend
 legend("topright", legend = c("Cases", "Summer Holiday", "Half-Term"),
-       pch = c(19, NA, NA), col = c("red", "#87CEEB77", "lightgreen"),
-       fill = c(NA, "skyblue", "lightgreen"),
+       pch = c(19, NA, NA), col = c("red", "#87CEEB77", "#90EE9077"),
        lty = c(NA, 1, 1), lwd = c(NA, 10, 10), 
        bg = "white", inset = 0.02, bty = "n")
 
@@ -177,29 +176,27 @@ Then we get for each $t$ that is a multiple of 7 (i.e. when we change week):
 $$Z(t) = \int_{t-7}^{t} \gamma E(s) \ ds$$
 i.e. the cumulative incidence over a week.
 
-Finally, we need to account for the imperfection of the surveillance system, assuming that only a fraction $rho$ of the infections are captured by the surveillance system and accounting for potential overdispersion we define the following observation model using a Negative-Binomial distribution with mean $\mu = \rho Z(t)$ and size $\eta$:
+Finally, we need to account for the imperfection of the surveillance system, assuming that only a fraction $\rho$ of the infections are captured by the surveillance system and accounting for potential overdispersion we define the following observation model using a Negative-Binomial distribution with mean $\mu = \rho Z(t)$ and size $\eta$:
 $$
 \text{Cases(t)} \sim \text{NegativeBinomial}(\mu = \rho Z(t), \text{size} = \eta),
 $$
 where $\rho$ is the ascertainment probability, and $\eta$ is the dispersion parameter.
 
 ::: {.callout-note}
-Even if the surveillance system was perfect we would expect a difference between the number of *symptomatic* cases and the number of infections as a proportion of flu cases are *asymptomatic* or 'pauci-symptomatic'. 
+Even if the surveillance system was perfect we would expect a difference between the number of *symptomatic* cases and the number of infections as a proportion of flu cases are *asymptomatic* or 'pauci-symptomatic' (showing very few symptoms). 
 :::
 
 ### Summary of model parameters
 
-The parameters for the transmission model are defined as follows:
-$$
-\begin{aligned}
-    R_0 &\text{ (basic reproduction number): } R_0 = 1.5, \\
-    \gamma &\text{ (rate of transition from exposed to infectious): } \gamma = \frac{1}{L}, \\
-    \sigma &\text{ (recovery rate): } \sigma = \frac{1}{D}, \\
-    \beta &\text{ (transmission rate): } \beta = R_0 \cdot \sigma, \\
-    N &\text{ (total population): } N = 55,000,000, \\
-    \text{hol} &\text{ (time-varying factor): interpolated from } h_{\text{times}} \text{ and } h_{\text{values}}.
-\end{aligned}
-$$
+A summary of the parameters of our model:
+
+- $R_{0}$, the basic reproduction number,
+- $\gamma$, the inverse of the mean incubation period set to 1 day,
+- $\sigma$, the recovery rate, the inverse of the mean recovery time assumed to be 1.25 days,
+- $h(t)$, the reduction of contact due to holidays, assumed piecewise constant,
+- $\rho$, the ascertainment probability i.e. the proportion of infections we expect to ascert as cases,
+- $\eta$, the size parameter of our Negative Binomial observation model,
+- $N$ the total size of the population in England and Wales in 2009.
 
 ### Odin code of the model
 
@@ -209,9 +206,9 @@ library(dust2)
 library(monty)
 ```
 
-Below is the `odin` code for our model - the structure matches closely the mathematical formulation. Note the `Cases <- data()` lines telling that `Cases` are observations. The model will compile and work even when it is not linked with any dataset. However providing data is essentiel to compute a likelihood with `dust` or derive a `monty` statistical model out of it.
+Below is the `odin` code for our model - the structure matches closely the mathematical formulation we derived in the previous section. Note the `Cases <- data()` lines telling that `Cases` are observations though the model will compile and work even if not linked with any dataset. However providing data is essential to compute a likelihood with `dust` or derive a `monty` likelihood statistical model of our system.
 
-This step takes time. The model needs to be transpiled (e.g. translated from one programming language to another) in order to create some C++ code and then that code is compiled and loaded in the R environnment. After this, a "fast" model can be called from the R environment and used to simulate scenarios or perform inference. 
+The compilation step takes time. The model needs to be first transpiled (e.g. translated from one programming language to another) in order to create some C++ code and then that code is compiled and loaded in the R environnment. After this, a "fast" model can be called from the R environment and used to simulate scenarios or perform inference. 
 
 ```{r, echo = TRUE}
 seir <- odin2::odin({
@@ -298,6 +295,8 @@ plot(t, dust_unpack_state(mod, y)$incidence*par0[3], type = "l", col = "red", lw
 points(data$Days, data$Cases)
 ```
 
+We seem to be overpredicted the expected number of cases by a factor two or so, but the MCMC pipeline should allow us to infer the distribution of the parameters consistent with the observations.
+
 ## The MCMC pipeline
 
 We now have a dataset and a working model, all loaded in our R environment. We need to link the output of this model with the data in order to derive a likelihood function. As we are working within a Bayesian framework we also need to define the prior distribution for our parameters. 
diff --git a/references.bib b/references.bib
index e07eb38..5fff51e 100644
--- a/references.bib
+++ b/references.bib
@@ -15,3 +15,17 @@ @article{knuth84
   pages = {97–111},
   numpages = {15}
 }
+
+@article{endo_introduction_2019,
+	title = {Introduction to particle {Markov}-chain {Monte} {Carlo} for disease dynamics modellers},
+	volume = {29},
+	urldate = {2021-08-27},
+	journal = {Epidemics},
+	author = {Endo, Akira and van Leeuwen, Edwin and Baguelin, Marc},
+	month = dec,
+	year = {2019},
+	note = {Publisher: Elsevier},
+	keywords = {Hidden Markov process, Particle filter, Particle Markov-chain Monte Carlo, Sequential Monte Carlo, State-space models},
+	pages = {100363},
+	file = {PDF:/Users/mbagueli/Zotero/storage/Y3IHN6XR/Endo, van Leeuwen, Baguelin - 2019 - Introduction to particle Markov-chain Monte Carlo for disease dynamics modellers.pdf:application/pdf},
+}

From 5cc5f4cdce30f85080725031a924a7da911905b1 Mon Sep 17 00:00:00 2001
From: Marc Baguelin <m.baguelin@imperial.ac.uk>
Date: Mon, 2 Dec 2024 12:27:04 +0000
Subject: [PATCH 17/17] add gridExtra to imports in DESCRIPTION

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index be18c19..9f1688d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,6 +14,7 @@ Imports:
     devtools,
     dust2,
     fs,
+    gridExtra,
     knitr,
     monty,
     odin2,