Geostatistics practical

Data from the Million Deaths Study in India can be found as india.rds in the data folder of the course web site.

library('terra')

## terra 1.8.54

x = readRDS('../data/india.rds')
values(x)[100000+1:2,]

##            yearCut  ageCut    sex     srs_id urban_rural age year Tuberculosis
## 100001 [2004,2005) [30,35) Female 1811052015       Rural  30 2004            0
## 100002 [2004,2005) [30,35) Female 1811061005       Rural  30 2004            0
##        Venomous.snakes languageGroup      pop
## 100001               0      Magadhan 43.37914
## 100002               0      Magadhan 15.59410

srs_id: geographic identifier of sampling unit (village or census area)
yearCut and year, year of survey, the former is a factor the latter numeric
ageCut and age, age group, the former is a factor
sex like it says
pop population of age-sex group in sampling unit. It’s an estimate (and not always an integer)
Tuburculosis, Venemous.snakes number of deaths in the relevant age-sex group
urban_rural: area type of the sampling unit
languageGroup:broad category of dominant language in the sampling unit

xUnique = x[!duplicated(x$srs_id),]
india = geodata::gadm("IND", level=0, path=normalizePath("~/Downloads"))
indiaT = project(india, crs(xUnique))
plot(indiaT)
points(xUnique, col='red', cex=0.5)

plot of chunk amap

indiaElev = geodata::elevation_30s('IND', path=normalizePath("~/Downloads"))
plot(indiaElev)

plot of chunk covariate

Write down the model! \[ \begin{aligned} Y_{itj} \sim & \text{Poisson}(\lambda_{itj}) \\ \end{aligned} \]

xSub = x[which(x$pop > 0 & x$age >= 40),]
xSub$logPop = log(xSub$pop)
res = geostatsp::glgm(
    Tuberculosis ~ sex + ageCut + yearCut + elevation + 
        offset(logPop) +
        f(srs_id, model='iid', prior='pc.prec', param=c(1, 0.5)),
    covariates = list(elevation = indiaElev),
    data = xSub,
    family='poisson',
    grid = 50,
    prior = list(sd = 1, range = 500*1000)
    )

knitr::kable(res$parameters$summary[,3:5], digits=2)

	0.025quant	0.5quant	0.975quant
(Intercept)	-27.30	-6.63	14.04
sexFemale	-0.88	-0.84	-0.81
ageCut[5,10)	-62.01	0.00	62.01
ageCut[10,15)	-62.01	0.00	62.01
ageCut[15,20)	-62.01	0.00	62.01
ageCut[20,25)	-62.01	0.00	62.01
ageCut[25,30)	-62.01	0.00	62.01
ageCut[30,35)	-62.01	0.00	62.01
ageCut[35,40)	-62.01	0.00	62.01
ageCut[40,45)	-21.60	-0.93	19.74
ageCut[45,50)	-21.41	-0.74	19.93
ageCut[50,55)	-21.08	-0.41	20.26
ageCut[55,60)	-20.75	-0.08	20.59
ageCut[60,65)	-20.61	0.06	20.73
ageCut[65,70)	-20.31	0.36	21.03
ageCut[70,75)	-20.15	0.52	21.19
ageCut[75,80)	-19.93	0.74	21.41
ageCut[80,130]	-20.13	0.54	21.21
yearCut[2005,2006)	0.06	0.13	0.20
yearCut[2006,2007)	0.00	0.07	0.14
yearCut[2007,2008)	-0.06	0.01	0.08
yearCut[2008,2009)	-0.10	-0.03	0.04
yearCut[2009,2010)	-0.19	-0.12	-0.05
yearCut[2010,2011)	-0.29	-0.22	-0.14
yearCut[2011,2012)	-0.29	-0.22	-0.14
yearCut[2012,2013)	-0.33	-0.25	-0.18
yearCut[2013,2013]	-0.41	-0.33	-0.26
elevation	0.00	0.00	0.00
range/1000	213.32	269.11	347.25
sd	0.40	0.46	0.54
sd srs id	0.42	0.45	0.48

toPlot = mask(res$raster[['predict.0.5quant']], indiaT)
toPlot = exp(toPlot)*100000
thecol = mapmisc::colourScale(toPlot, col='Reds', breaks=6, dec=-2, style='equal')
mapmisc::map.new(indiaT)
plot(toPlot, col=thecol$col, breaks=thecol$breaks, legend=FALSE, add=TRUE)
plot(indiaT, add=TRUE)
mapmisc::legendBreaks("bottomleft", thecol, bty='n')

plot of chunk plotFit

caluculate observed and expected

xHavePop = x[which(x$pop>0),]
xHavePop$logPop = log(xHavePop$pop)
xHavePop$ageRelevel = relevel(xHavePop$ageCut, '[60,65)')
theRates = glm(Tuberculosis ~ ageRelevel*sex + offset(logPop),family='poisson', data=values(xHavePop))
xHavePop$pred = predict.glm(theRates, values(xHavePop), type='response')

sum(xHavePop$pred)

## [1] 19121

sum(xHavePop$Tuberculosis)

## [1] 19121

byvars = c("yearCut","srs_id",'urban_rural',"year",'languageGroup')
xAggDf = aggregate(
    values(xHavePop)[,c('pred','pop','Tuberculosis','Venomous.snakes')], 
    values(xHavePop)[,byvars],
    FUN='sum')
xUnique = xHavePop[!duplicated(values(xHavePop)[, byvars]), 
    byvars] 
xAgg = merge(xUnique, xAggDf)

values(xAgg)[24060+1:4,]

##           yearCut     srs_id urban_rural year languageGroup      pred      pop
## 24061 [2007,2008) 0914691014       Rural 2007         Hindi 0.4189380 1711.978
## 24062 [2007,2008) 0914692013       Rural 2007         Hindi 0.4440820 1827.714
## 24063 [2007,2008) 0914692026       Rural 2007         Hindi 0.3439542 1415.680
## 24064 [2007,2008) 0914701004       Rural 2007         Hindi 0.2696260 1156.985
##       Tuberculosis Venomous.snakes
## 24061            1               0
## 24062           10               0
## 24063            0               0
## 24064            0               0

xAgg$logExpected = log(xAgg$pred)
res2 = geostatsp::glgm(
    Tuberculosis ~  yearCut + elevation + 
        offset(logExpected) +
        f(srs_id, model='iid', prior='pc.prec', param=c(1, 0.5)),
    covariates = list(elevation = indiaElev/1000),
    data = xAgg,
    family='poisson',
    grid = 50,
    prior = list(sd = 1, range = 500*1000)
    )
knitr::kable(res2$parameters$summary[,3:5], digits=3)

	0.025quant	0.5quant	0.975quant
(Intercept)	-0.273	-0.086	0.079
yearCut[2005,2006)	0.094	0.154	0.214
yearCut[2006,2007)	0.011	0.072	0.133
yearCut[2007,2008)	-0.013	0.048	0.109
yearCut[2008,2009)	-0.072	-0.010	0.052
yearCut[2009,2010)	-0.199	-0.135	-0.072
yearCut[2010,2011)	-0.291	-0.226	-0.161
yearCut[2011,2012)	-0.305	-0.240	-0.175
yearCut[2012,2013)	-0.369	-0.303	-0.237
yearCut[2013,2013]	-0.449	-0.382	-0.315
elevation	-0.133	-0.040	0.053
range/1000	253.973	325.412	428.757
sd	0.405	0.474	0.567
sd srs id	0.467	0.491	0.515

Your Task: do something interesting with either tuberculosis or snake bites.