library(MASS)
library(rgdal)

1 Introduction

The aim of this document is to put together some data and functions for calibrating COVID-19 simulations from the MetaWards model.

2 MetaWards simulations of COVID-19

Let’s start by importing some of MetaWards’ output. We’ll take this from the uq4covid GitHub repository.

We’ll assume that we’re going to calibrate to cumulative counts between given dates

calib_starts <- as.Date(c("2020/01/01", "2020/04/01"))
calib_ends <- as.Date(c("2020/03/30", "2020/05/31"))

and then put together a function to invert the MetaWards inputs, and then tidy the MetaWards output.

invert_inputs <- function(x) {
cbind(1 / x[, 3], 1 / x[, 5] + 1, x[, 1] * (1 / x[, 5] + 1), x[, 6], x[, 7])
}

raw2tidy <- function(x, starts, ends) {
x <- read.csv(x)
n_starts <- length(starts)
x_date <- as.Date(x$date)
ranges <- sapply(seq_along(starts), function(i) starts[i]:ends[i])
inds <- sapply(ranges, function(x) x_date %in% x)
xs <- lapply(seq_len(n_starts), function(i) x[inds[, i], , drop = FALSE])
xs <- lapply(xs, function(z) as.matrix(z[, which(substr(names(z), 1, 4) == "ward")[-1]]))
cum_cases <- sapply(xs, function(z) crossprod(rep(1, nrow(z)), z)[1,])
inputs <- invert_inputs(as.matrix(x[1, 3:9, drop = FALSE]))
list(input = inputs, cases = cum_cases)
}
output_path <- paste(path_to_uq4covid, "data/metawards/initial_ensemble/raw_outputs", sep = "/")
trajectories <- list.files(output_path, pattern = "wards_trajectory_I.csv.bz2", 
                           recursive = TRUE, full.names = TRUE)
metawards_output <- lapply(trajectories, raw2tidy, starts = calib_starts, ends = calib_ends)

MetaWards gives us metadata on wards, which can be download here. We’ll read them in, and also aggregate to unitary authority (UA) level.

wards <- read.csv("Ward_Lookup.csv")
lads <- subset(wards, !duplicated(LAD11CD))

Then we’ll separate the inputs and outputs, and convert inputs to \([-1, 1]\).

inputs <- t(sapply(metawards_output, function(x) unlist(x[[1]])))
limits <- read.csv(paste(path_to_uq4covid, "model_config/metawards/limits.csv", sep = "/"), nrows = 2)
for (i in seq_len(ncol(inputs))) {
  inputs[, i] <- 2 * ((inputs[, i] - limits[1, i]) / diff(limits[, i]) - .5)
}
names(inputs) <- c("incubation_time", "infectious_time", "r_zero", "lock_1_restrict", "lock_2_release")
apply(inputs, 2, range)
##            [,1]       [,2]       [,3]      [,4]      [,5]
## [1,] -0.9865469 -0.9969497 -0.9809618 -0.998374 -0.983834
## [2,]  0.9922580  0.9919337  0.9800751  0.986866  0.981626
outputs_by_ward <- lapply(metawards_output, "[[", 2)
outputs <- lapply(outputs_by_ward, function(z) apply(z, 2, function(x) tapply(x, wards$LAD11CD, sum)))

3 Census 2011 for population data

Then we’ll download population data from the 2011 Census, and add it to the UA data.

census <- read.csv("census2011_population.csv", skip = 14)
# then drop anything that isn't a UA
is_UA <- substr(census[, 1], 1, 2) %in% c("E0", "W0")
census <- subset(census, is_UA)
# tidy up a bit by stripping ` UA' from any names containing that
for (i in 2:3) {
  untidy <- grep("UA", census[, i])
  census[untidy, i] <- sapply(strsplit(census[untidy, i], " UA"), "[[", 1)
}
# then collect names, irrespective of UA type
census$name <- apply(census[, 2:4], 1, function(x) x[x != ""])
# and then identify UA types, tidy population numbers, and get IDs
census$type <- ifelse(census[, 4] == "", "UTLA", "LTLA")
census$population <- as.integer(gsub(",", "", census[, 6], fixed = TRUE))
census$id <- census[,1]
# we'll drop the rubbish, and order alphabetically, as in MetaWards output
census <- census[, c("id", "name", "type", "population")]
census <- census[order(census$name), ]
# finally we'll add these to the UA data
lads$population <- census$population[match(lads$LAD11CD, census$id)]

4 Public Health England observations

Next we’ll load the latest PHE data on COVID-19 cases, available from https://coronavirus.data.gov.uk/, a website with some surprisingly good graphics.

obs_csv <- read.csv("https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv")

Let’s tidy these up a bit.

# first we need to make sure the `Area.code' in is census$id
obs_csv <- subset(obs_csv, Area.code %in% census$id)
# then we'll put the daily cases into a matrix
# by first creating factors of the dates of UA codes
obs_dts <- as.factor(obs_csv$Specimen.date)
obs_id <- as.factor(obs_csv$Area.code)
obs_dts0 <- levels(obs_dts)
obs_locs0 <- levels(obs_id)
# and then put them in the right place
# with locations along rows and dates along columns
obs_daily <- matrix(0, length(obs_locs0), length(obs_dts0))
dimnames(obs_daily) <- list(obs_locs0, obs_dts0)
places <- cbind(as.integer(obs_id), as.integer(obs_dts))
obs_daily[places] <- obs_csv$Daily.lab.confirmed.cases
# and finally swap non-finites for zeros
obs_daily[!is.finite(obs_daily)] <- 0

5 Ensuring data match

all(rownames(obs_daily) %in% rownames(outputs[[1]]))
## [1] TRUE
all(rownames(outputs)[[1]] %in% rownames(obs_daily))
## [1] TRUE
output_keep <- rownames(outputs[[1]]) %in% rownames(obs_daily)
outputs <- lapply(outputs, function(x) x[output_keep, ])
lads <- subset(lads, output_keep)
phe_population <- read.csv("ukmidyearestimates20182019ladcodes.csv", skip = 5)
phe_match <- match(lads$LAD11CD, phe_population$Code)
lads$phe_population <- as.integer(gsub(",", "", phe_population$All.ages[phe_match], fixed = TRUE))
lads$phe_population[!is.finite(lads$phe_population)] <- lads$population[!is.finite(lads$phe_population)]
all.equal(rownames(obs_daily), rownames(outputs[[1]]))
## [1] TRUE

Finally, we’ll put accumulate the daily cases between the calibration ranges, and cache some calibration data and functions.

obs <- sapply(seq_along(calib_starts), function(i)
  rowSums(obs_daily[, as.Date(obs_dts0) %in% calib_starts[i]:calib_ends[i]]))
save(inputs, outputs, obs, lads, output_keep, calib_starts, calib_ends, file = "calibration_data.rda")
save(raw2tidy, invert_inputs, file = "calibration_functions.rda")