library(MASS)
library(rgdal)
The aim of this document is to put together some data and functions for calibrating COVID-19 simulations from the MetaWards model.
Let’s start by importing some of MetaWards’ output. We’ll take this from the uq4covid GitHub repository.
We’ll assume that we’re going to calibrate to cumulative counts between given dates
calib_starts <- as.Date(c("2020/01/01", "2020/04/01"))
calib_ends <- as.Date(c("2020/03/30", "2020/05/31"))
and then put together a function to invert the MetaWards inputs, and then tidy the MetaWards output.
invert_inputs <- function(x) {
cbind(1 / x[, 3], 1 / x[, 5] + 1, x[, 1] * (1 / x[, 5] + 1), x[, 6], x[, 7])
}
raw2tidy <- function(x, starts, ends) {
x <- read.csv(x)
n_starts <- length(starts)
x_date <- as.Date(x$date)
ranges <- sapply(seq_along(starts), function(i) starts[i]:ends[i])
inds <- sapply(ranges, function(x) x_date %in% x)
xs <- lapply(seq_len(n_starts), function(i) x[inds[, i], , drop = FALSE])
xs <- lapply(xs, function(z) as.matrix(z[, which(substr(names(z), 1, 4) == "ward")[-1]]))
cum_cases <- sapply(xs, function(z) crossprod(rep(1, nrow(z)), z)[1,])
inputs <- invert_inputs(as.matrix(x[1, 3:9, drop = FALSE]))
list(input = inputs, cases = cum_cases)
}
output_path <- paste(path_to_uq4covid, "data/metawards/initial_ensemble/raw_outputs", sep = "/")
trajectories <- list.files(output_path, pattern = "wards_trajectory_I.csv.bz2",
recursive = TRUE, full.names = TRUE)
metawards_output <- lapply(trajectories, raw2tidy, starts = calib_starts, ends = calib_ends)
MetaWards gives us metadata on wards, which can be download here. We’ll read them in, and also aggregate to unitary authority (UA) level.
wards <- read.csv("Ward_Lookup.csv")
lads <- subset(wards, !duplicated(LAD11CD))
Then we’ll separate the inputs and outputs, and convert inputs to \([-1, 1]\).
inputs <- t(sapply(metawards_output, function(x) unlist(x[[1]])))
limits <- read.csv(paste(path_to_uq4covid, "model_config/metawards/limits.csv", sep = "/"), nrows = 2)
for (i in seq_len(ncol(inputs))) {
inputs[, i] <- 2 * ((inputs[, i] - limits[1, i]) / diff(limits[, i]) - .5)
}
names(inputs) <- c("incubation_time", "infectious_time", "r_zero", "lock_1_restrict", "lock_2_release")
apply(inputs, 2, range)
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.9865469 -0.9969497 -0.9809618 -0.998374 -0.983834
## [2,] 0.9922580 0.9919337 0.9800751 0.986866 0.981626
outputs_by_ward <- lapply(metawards_output, "[[", 2)
outputs <- lapply(outputs_by_ward, function(z) apply(z, 2, function(x) tapply(x, wards$LAD11CD, sum)))
Then we’ll download population data from the 2011 Census, and add it to the UA data.
census <- read.csv("census2011_population.csv", skip = 14)
# then drop anything that isn't a UA
is_UA <- substr(census[, 1], 1, 2) %in% c("E0", "W0")
census <- subset(census, is_UA)
# tidy up a bit by stripping ` UA' from any names containing that
for (i in 2:3) {
untidy <- grep("UA", census[, i])
census[untidy, i] <- sapply(strsplit(census[untidy, i], " UA"), "[[", 1)
}
# then collect names, irrespective of UA type
census$name <- apply(census[, 2:4], 1, function(x) x[x != ""])
# and then identify UA types, tidy population numbers, and get IDs
census$type <- ifelse(census[, 4] == "", "UTLA", "LTLA")
census$population <- as.integer(gsub(",", "", census[, 6], fixed = TRUE))
census$id <- census[,1]
# we'll drop the rubbish, and order alphabetically, as in MetaWards output
census <- census[, c("id", "name", "type", "population")]
census <- census[order(census$name), ]
# finally we'll add these to the UA data
lads$population <- census$population[match(lads$LAD11CD, census$id)]
Next we’ll load the latest PHE data on COVID-19 cases, available from https://coronavirus.data.gov.uk/, a website with some surprisingly good graphics.
obs_csv <- read.csv("https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv")
Let’s tidy these up a bit.
# first we need to make sure the `Area.code' in is census$id
obs_csv <- subset(obs_csv, Area.code %in% census$id)
# then we'll put the daily cases into a matrix
# by first creating factors of the dates of UA codes
obs_dts <- as.factor(obs_csv$Specimen.date)
obs_id <- as.factor(obs_csv$Area.code)
obs_dts0 <- levels(obs_dts)
obs_locs0 <- levels(obs_id)
# and then put them in the right place
# with locations along rows and dates along columns
obs_daily <- matrix(0, length(obs_locs0), length(obs_dts0))
dimnames(obs_daily) <- list(obs_locs0, obs_dts0)
places <- cbind(as.integer(obs_id), as.integer(obs_dts))
obs_daily[places] <- obs_csv$Daily.lab.confirmed.cases
# and finally swap non-finites for zeros
obs_daily[!is.finite(obs_daily)] <- 0
all(rownames(obs_daily) %in% rownames(outputs[[1]]))
## [1] TRUE
all(rownames(outputs)[[1]] %in% rownames(obs_daily))
## [1] TRUE
output_keep <- rownames(outputs[[1]]) %in% rownames(obs_daily)
outputs <- lapply(outputs, function(x) x[output_keep, ])
lads <- subset(lads, output_keep)
phe_population <- read.csv("ukmidyearestimates20182019ladcodes.csv", skip = 5)
phe_match <- match(lads$LAD11CD, phe_population$Code)
lads$phe_population <- as.integer(gsub(",", "", phe_population$All.ages[phe_match], fixed = TRUE))
lads$phe_population[!is.finite(lads$phe_population)] <- lads$population[!is.finite(lads$phe_population)]
all.equal(rownames(obs_daily), rownames(outputs[[1]]))
## [1] TRUE
Finally, we’ll put accumulate the daily cases between the calibration ranges, and cache some calibration data and functions.
obs <- sapply(seq_along(calib_starts), function(i)
rowSums(obs_daily[, as.Date(obs_dts0) %in% calib_starts[i]:calib_ends[i]]))
save(inputs, outputs, obs, lads, output_keep, calib_starts, calib_ends, file = "calibration_data.rda")
save(raw2tidy, invert_inputs, file = "calibration_functions.rda")