In this vignette I will apply the some of the sensitivity analysis techniques given in the vignette by Jeremy Oakley to the hospital prevalances in Ensemble Zero.
this link
library(readr)
library(dplyr)
library(tidyr)
#Load and manipulate look up tables
Ward_Lookup <- read_csv("data/EnsembleZero/Ward_Lookup.csv")
names(Ward_Lookup)[11] <- "ward"
Ward_Lookup <- Ward_Lookup[,-c(3:7,10)]
WD11ToAcuteTrustIncWalesHB <- read_csv("data/EnsembleZero/WD11ToAcuteTrustIncWalesHB.csv")
#Load the ward level ensemble zero hospital prevalances for week 25 (13GB ram)
output_Hprev <- readRDS("~/Dropbox/BayesExeter/NewEnsemble/EnsembleZero/Hprev_25.rds")
#Add ward names for aggregating to trust level
NewOut <- inner_join(Ward_Lookup, output_Hprev, by="ward")
rm(output_Hprev)
#Add and then group be hospital trust
NewWithHosptial <- inner_join(WD11ToAcuteTrustIncWalesHB, NewOut, by=c("WD11CD", "WD11NM"))
#NewWithHosptial
rm(NewOut)
TrustData <- group_by(NewWithHosptial, trustId, trustName, output, replicate) %>%
summarise(Hmean=sum(Hprev)) %>%
ungroup()
#TrustData
rm(NewWithHosptial)
# Find quantiles for each parameter set and save the data
TrustQuantiles <- group_by(TrustData, trustId, trustName, output) %>%
summarise("0.05" = quantile(Hmean, 0.05), "0.25" = quantile(Hmean, 0.25), "0.5" = quantile(Hmean, 0.5), "0.75" = quantile(Hmean, 0.75), "0.95"=quantile(Hmean, 0.95)) %>%
ungroup() %>%
pivot_longer(cols="0.05":"0.95", names_to = "Quantile", values_to = "Hprev") %>%
mutate(Quantile = as.numeric(Quantile))
#Save TrustQuantiles for analysis
saveRDS(TrustQuantiles,file="data/EnsembleZero/TrustQuantilesW25.rds")
Let’s look at week 12 first
WeekNum=12
tfile <- paste0("~/Dropbox/BayesExeter/NewEnsemble/Analysis1/TrustQuantilesW",WeekNum,".rds")
TrustQuantiles <- readRDS(tfile)
head(TrustQuantiles)
## # A tibble: 6 x 5
## trustId trustName output Quantile Hprev
## <chr> <chr> <chr> <dbl> <dbl>
## 1 7A1 Betsi Cadwaladr University Local Healt… Ens0000 0.05 0.
## 2 7A1 Betsi Cadwaladr University Local Healt… Ens0000 0.25 0.
## 3 7A1 Betsi Cadwaladr University Local Healt… Ens0000 0.5 8.57e-1
## 4 7A1 Betsi Cadwaladr University Local Healt… Ens0000 0.75 4.71e+0
## 5 7A1 Betsi Cadwaladr University Local Healt… Ens0000 0.95 8.74e+0
## 6 7A1 Betsi Cadwaladr University Local Healt… Ens0001 0.05 4.13e+4
As the trust names are a little long, we do a bit of editing
TrustNamesOnly <- filter(TrustQuantiles, output=="Ens0000"&Quantile==0.5) %>%
dplyr::select(trustId,trustName)
TrustNames <- tools::toTitleCase(tolower(TrustNamesOnly$trustName))
#now automatically remove certain words
require("tm")
#have to do this iteratively, as some words only are removed in chunks
removewords1 <- c("Nhs", "Trust", "Foundation", "University Hospitals of ", "University Hospital of", "and District","University Local Board")
removewords2 <- c("University Hospitals", "University Hospital", "Teaching Hospitals", "Teaching Hospitals")
removewords3 <- c("Hospitals", "Hospital", "Teaching", "District", "Health", "Healthcare", "Services", "General", "Countess of ", "The", "Group", "Acute")
TrustNames <- strTrim(stripWhitespace(removeWords(TrustNames,removewords1)))
TrustNames <- strTrim(stripWhitespace(removeWords(TrustNames,removewords2)))
TrustNames <- strTrim(stripWhitespace(removeWords(TrustNames,removewords3)))
TrustNames <- strTrim(stripWhitespace(removeWords(TrustNames,removewords1)))
TrustNamesOnly <- mutate(TrustNamesOnly, ShortName = TrustNames)
TrustNamesOnly
## # A tibble: 134 x 3
## trustId trustName ShortName
## <chr> <chr> <chr>
## 1 7A1 Betsi Cadwaladr University Local Health… Betsi Cadwaladr
## 2 7A2 Hywel Dda University Local Health Board Hywel Dda
## 3 7A3 Swansea Bay University Local Health Boa… Swansea Bay
## 4 7A4 Cardiff and Vale University Local Healt… Cardiff and Vale
## 5 7A5 Cwm Taf Morgannwg University Local Heal… Cwm Taf Morgannwg
## 6 7A6 Aneurin Bevan University Local Health B… Aneurin Bevan
## 7 R0B South Tyneside And Sunderland NHS Found… South Tyneside and Sun…
## 8 R1F Isle Of Wight NHS Trust Isle of Wight
## 9 R1H Barts Health NHS Trust Barts
## 10 RA2 Royal Surrey NHS Foundation Trust Royal Surrey
## # … with 124 more rows
#Then to change your data names....
TrustQuantiles <- inner_join(TrustQuantiles, TrustNamesOnly, by=c("trustId", "trustName"),keep=TRUE) %>%
select(-trustName) %>%
mutate(trustName=ShortName) %>%
select(-ShortName)
The following code prepares the ensemble for emulation for all the English trusts with mogp_emulator
which I do here anyway a more formal sensitivity analysis would need one.
AllEnglandIDs <- TrustNamesOnly$trustId[7:134]
Englandtrusts <- filter(TrustQuantiles, trustId %in% AllEnglandIDs)
Englandtrusts <- select(Englandtrusts, trustName, Quantile, output, Hprev)
EmulateEngland <- dplyr::select(Englandtrusts, output, trustName, Quantile, Hprev) %>%
pivot_wider(names_from = trustName, values_from = Hprev)
design <- readRDS("~/Dropbox/BayesExeter/uq4covid.github.io/vignettes/data/EnsembleZero/inputs/design.rds")
#design <- dplyr::select(design, -repeats, -lock_2_release, -pCR, -pHRprime) %>%
design <- dplyr::select(design, -repeats, -pCR, -pHRprime) %>%
mutate(Noise = runif(dim(design)[1],0,1))
ToEmulateEngland <- inner_join(design, EmulateEngland, by="output")
ToEmulateEngland
## # A tibble: 1,000 x 145
## r_zero incubation_time infectious_time hospital_time critical_time
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.801 0.746 0.952 0.574 0.940
## 2 0.801 0.746 0.952 0.574 0.940
## 3 0.801 0.746 0.952 0.574 0.940
## 4 0.801 0.746 0.952 0.574 0.940
## 5 0.801 0.746 0.952 0.574 0.940
## 6 0.181 0.191 0.443 0.578 0.689
## 7 0.181 0.191 0.443 0.578 0.689
## 8 0.181 0.191 0.443 0.578 0.689
## 9 0.181 0.191 0.443 0.578 0.689
## 10 0.181 0.191 0.443 0.578 0.689
## # … with 990 more rows, and 140 more variables: lock_1_restrict <dbl>,
## # lock_2_release <dbl>, pEA <dbl>, pIH <dbl>, pIRprime <dbl>, pHC <dbl>,
## # GP_A <dbl>, GP_H <dbl>, GP_C <dbl>, output <chr>, Noise <dbl>,
## # Quantile <dbl>, `South Tyneside and Sunderland` <dbl>, `Isle of
## # Wight` <dbl>, Barts <dbl>, `Royal Surrey` <dbl>, `Weston Area` <dbl>,
## # Yeovil <dbl>, Bristol <dbl>, `Torbay and South Devon` <dbl>,
## # Bradford <dbl>, Southend <dbl>, `Royal Free London` <dbl>, `North
## # Middlesex` <dbl>, Hillingdon <dbl>, Kingston <dbl>, `Dorset
## # County` <dbl>, Walsall <dbl>, `Wirral University` <dbl>, `St Helens
## # and Knowsley` <dbl>, `Mid Cheshire` <dbl>, `Northern Devon` <dbl>,
## # Bedford <dbl>, `Luton and Dunstable` <dbl>, York <dbl>,
## # Harrogate <dbl>, Airedale <dbl>, `Sheffield Children's` <dbl>, `Queen
## # Elizabeth , King's Lynn.` <dbl>, `Royal United Bath` <dbl>,
## # Poole <dbl>, `Milton Keynes` <dbl>, `Basildon and Thurrock` <dbl>,
## # `East Suffolk and North Essex` <dbl>, Frimley <dbl>, `Royal
## # Bournemouth and Christchurch` <dbl>, `Royal Cornwall` <dbl>,
## # Liverpool <dbl>, `Barking, Havering and Redbridge` <dbl>,
## # Barnsley <dbl>, Rotherham <dbl>, `Chesterfield Royal` <dbl>, `North
## # West Anglia` <dbl>, `James Paget` <dbl>, `West Suffolk` <dbl>,
## # Cambridge <dbl>, Somerset <dbl>, `Royal Devon and Exeter` <dbl>,
## # Southampton <dbl>, Sheffield <dbl>, Portsmouth <dbl>, `Royal
## # Berkshire` <dbl>, `Guy's and St Thomas'` <dbl>, `Lewisham and
## # Greenwich` <dbl>, Croydon <dbl>, `St George's` <dbl>, `South
## # Warwickshire` <dbl>, `North Midlands` <dbl>, `Northern Lincolnshire
## # and Goole` <dbl>, `East Cheshire` <dbl>, Chester <dbl>, `King's
## # College` <dbl>, `Sherwood Forest` <dbl>, Plymouth <dbl>, `Coventry and
## # Warwickshire` <dbl>, Whittington <dbl>, `Royal Wolverhampton` <dbl>,
## # `Wye Valley` <dbl>, `George Eliot` <dbl>, `Norfolk and Norwich` <dbl>,
## # `Salford Royal` <dbl>, Bolton <dbl>, Tameside <dbl>, `Great
## # Western` <dbl>, Hampshire <dbl>, `Dartford and Gravesham` <dbl>,
## # Dudley <dbl>, `North Cumbria` <dbl>, Kettering <dbl>,
## # Northampton <dbl>, Salisbury <dbl>, `Doncaster and Bassetlaw` <dbl>,
## # Medway <dbl>, `Birmingham Women's and Children's` <dbl>, `Mid
## # Essex` <dbl>, `Chelsea and Westminster` <dbl>, `Princess
## # Alexandra` <dbl>, Homerton <dbl>, Gateshead <dbl>, Leeds <dbl>, …
Now I will perform a GAM-based sensitivity analysis for Cambridge (as an example I prepared for a talk at the Isaac Newton Institute)
Sensitivity1 <- dplyr::select(ToEmulateEngland, "r_zero":"GP_C","Quantile", "Cambridge") %>%
mutate(Quantile=jitter(Quantile))
Sensitivity1 <- select(Sensitivity1, -lock_2_release)
Sensitivity1 %>%
as_tibble %>%
gather('parameter', 'value', -"Cambridge") %>%
ggplot(aes(x=value, y=`Cambridge`)) +
geom_point(alpha = 0.5) +
facet_wrap(~parameter) +
labs(y="Cambridge HPrev", x='input') +
geom_smooth(method = mgcv::gam,
formula = y ~ s(x, bs = "tp"),
fill = "red",
method.args = list(method="GCV.Cp"))
We can see the main effects patterns from this plot but it’s difficult to see what is really driving the variability. The following (described in Jeremy’s vignette) is much clearer
mainEffects <- c()
for(i in c(1:14)){
gam1 <- mgcv::gam(Sensitivity1$"Cambridge"~s(as.matrix(Sensitivity1)[,i]))
mainEffects <- c(mainEffects,var(gam1$fitted)/var(Sensitivity1$"Cambridge"))
}
par(mar=c(7,4,4,2),mfrow=c(1,1))
barplot(mainEffects,names.arg = names(Sensitivity1)[c(1:14)],las=2,main= "Sensitivity Week12: Cambridge")
Essentially at week 12 (the week of lockdown in our model) we’re seeing that the ensemble variability is largely explained by pEA
the probability that exposed are asymptomatic, and pIH
the probability that infecteds are sick enough to be admitted to hospital. We also see that GP_A
, the rate asymptomatics are infecting the general population, and r_zero
are important. What happens is that before lockdown, when most exposed individuals are symptomatic and when most symptomatics end up in hospital, we see very big pandemics in hospital (makes sense so far).
I will repeat the analyses (not including the setup code) for later weeks to see what happens. First, for week 20:
Sensitivity1 %>%
as_tibble %>%
gather('parameter', 'value', -"Cambridge") %>%
ggplot(aes(x=value, y=`Cambridge`)) +
geom_point(alpha = 0.5) +
facet_wrap(~parameter) +
labs(y="Cambridge HPrev", x='input') +
geom_smooth(method = mgcv::gam,
formula = y ~ s(x, bs = "tp"),
fill = "red",
method.args = list(method="GCV.Cp"))
#Sensitivity1$Quantile <- jitter(Sensitivity1$Quantile)
mainEffects <- c()
for(i in c(1:15)){
gam1 <- mgcv::gam(Sensitivity1$"Cambridge"~s(as.matrix(Sensitivity1)[,i]))
mainEffects <- c(mainEffects,var(gam1$fitted)/var(Sensitivity1$"Cambridge"))
}
par(mar=c(7,4,4,2),mfrow=c(1,1))
barplot(mainEffects,names.arg = names(Sensitivity1)[c(1:15)],las=2,main= "Sensitivity Week 20: Cambridge")
This is the point in the simulation where lockdown is released somewhat. As we can see now, a number of other parameters have become important, including the average stay in hospital and the rate at which those in hospitals infect the general population. The scaling of the force of infection for lockdown is also important and we see rates and stays in critical care having an impact.
For week 25, where we might see the effects of relaxing lockdown, we see
Sensitivity1 %>%
as_tibble %>%
gather('parameter', 'value', -"Cambridge") %>%
ggplot(aes(x=value, y=`Cambridge`)) +
geom_point(alpha = 0.5) +
facet_wrap(~parameter) +
labs(y="Cambridge HPrev", x='input') +
geom_smooth(method = mgcv::gam,
formula = y ~ s(x, bs = "tp"),
fill = "red",
method.args = list(method="GCV.Cp"))
#Sensitivity1$Quantile <- jitter(Sensitivity1$Quantile)
mainEffects <- c()
for(i in c(1:15)){
gam1 <- mgcv::gam(Sensitivity1$"Cambridge"~s(as.matrix(Sensitivity1)[,i]))
mainEffects <- c(mainEffects,var(gam1$fitted)/var(Sensitivity1$"Cambridge"))
}
par(mar=c(7,4,4,2),mfrow=c(1,1))
barplot(mainEffects,names.arg = names(Sensitivity1)[c(1:15)],las=2,main= "Sensitivity Week 25: Cambridge")
It becomes clear that the effect of parameters is changing as the simulation continues, which suggests that even sparse data, for example on hospital prevalence should enable a lot of discrimination amongst the different parameters.