Data from the Million Deaths Study in India can be found as india.rds in the data folder of the course web site.

library('terra')
## terra 1.8.54
x = readRDS('../data/india.rds')
values(x)[100000+1:2,] 
##            yearCut  ageCut    sex     srs_id urban_rural age year Tuberculosis
## 100001 [2004,2005) [30,35) Female 1811052015       Rural  30 2004            0
## 100002 [2004,2005) [30,35) Female 1811061005       Rural  30 2004            0
##        Venomous.snakes languageGroup      pop
## 100001               0      Magadhan 43.37914
## 100002               0      Magadhan 15.59410
xUnique = x[!duplicated(x$srs_id),]
india = geodata::gadm("IND", level=0, path=normalizePath("~/Downloads"))
indiaT = project(india, crs(xUnique))
plot(indiaT)
points(xUnique, col='red', cex=0.5)
plot of chunk amap
plot of chunk amap
indiaElev = geodata::elevation_30s('IND', path=normalizePath("~/Downloads"))
plot(indiaElev)
plot of chunk covariate
plot of chunk covariate

Write down the model! \[ \begin{aligned} Y_{itj} \sim & \text{Poisson}(\lambda_{itj}) \\ \end{aligned} \]

xSub = x[which(x$pop > 0 & x$age >= 40),]
xSub$logPop = log(xSub$pop)
res = geostatsp::glgm(
    Tuberculosis ~ sex + ageCut + yearCut + elevation + 
        offset(logPop) +
        f(srs_id, model='iid', prior='pc.prec', param=c(1, 0.5)),
    covariates = list(elevation = indiaElev),
    data = xSub,
    family='poisson',
    grid = 50,
    prior = list(sd = 1, range = 500*1000)
    )
knitr::kable(res$parameters$summary[,3:5], digits=2)
0.025quant 0.5quant 0.975quant
(Intercept) -27.30 -6.63 14.04
sexFemale -0.88 -0.84 -0.81
ageCut[5,10) -62.01 0.00 62.01
ageCut[10,15) -62.01 0.00 62.01
ageCut[15,20) -62.01 0.00 62.01
ageCut[20,25) -62.01 0.00 62.01
ageCut[25,30) -62.01 0.00 62.01
ageCut[30,35) -62.01 0.00 62.01
ageCut[35,40) -62.01 0.00 62.01
ageCut[40,45) -21.60 -0.93 19.74
ageCut[45,50) -21.41 -0.74 19.93
ageCut[50,55) -21.08 -0.41 20.26
ageCut[55,60) -20.75 -0.08 20.59
ageCut[60,65) -20.61 0.06 20.73
ageCut[65,70) -20.31 0.36 21.03
ageCut[70,75) -20.15 0.52 21.19
ageCut[75,80) -19.93 0.74 21.41
ageCut[80,130] -20.13 0.54 21.21
yearCut[2005,2006) 0.06 0.13 0.20
yearCut[2006,2007) 0.00 0.07 0.14
yearCut[2007,2008) -0.06 0.01 0.08
yearCut[2008,2009) -0.10 -0.03 0.04
yearCut[2009,2010) -0.19 -0.12 -0.05
yearCut[2010,2011) -0.29 -0.22 -0.14
yearCut[2011,2012) -0.29 -0.22 -0.14
yearCut[2012,2013) -0.33 -0.25 -0.18
yearCut[2013,2013] -0.41 -0.33 -0.26
elevation 0.00 0.00 0.00
range/1000 213.32 269.11 347.25
sd 0.40 0.46 0.54
sd srs id 0.42 0.45 0.48
toPlot = mask(res$raster[['predict.0.5quant']], indiaT)
toPlot = exp(toPlot)*100000
thecol = mapmisc::colourScale(toPlot, col='Reds', breaks=6, dec=-2, style='equal')
mapmisc::map.new(indiaT)
plot(toPlot, col=thecol$col, breaks=thecol$breaks, legend=FALSE, add=TRUE)
plot(indiaT, add=TRUE)
mapmisc::legendBreaks("bottomleft", thecol, bty='n')
plot of chunk plotFit
plot of chunk plotFit

caluculate observed and expected

xHavePop = x[which(x$pop>0),]
xHavePop$logPop = log(xHavePop$pop)
xHavePop$ageRelevel = relevel(xHavePop$ageCut, '[60,65)')
theRates = glm(Tuberculosis ~ ageRelevel*sex + offset(logPop),family='poisson', data=values(xHavePop))
xHavePop$pred = predict.glm(theRates, values(xHavePop), type='response')

sum(xHavePop$pred)
## [1] 19121
sum(xHavePop$Tuberculosis)
## [1] 19121
byvars = c("yearCut","srs_id",'urban_rural',"year",'languageGroup')
xAggDf = aggregate(
    values(xHavePop)[,c('pred','pop','Tuberculosis','Venomous.snakes')], 
    values(xHavePop)[,byvars],
    FUN='sum')
xUnique = xHavePop[!duplicated(values(xHavePop)[, byvars]), 
    byvars] 
xAgg = merge(xUnique, xAggDf)

values(xAgg)[24060+1:4,]
##           yearCut     srs_id urban_rural year languageGroup      pred      pop
## 24061 [2007,2008) 0914691014       Rural 2007         Hindi 0.4189380 1711.978
## 24062 [2007,2008) 0914692013       Rural 2007         Hindi 0.4440820 1827.714
## 24063 [2007,2008) 0914692026       Rural 2007         Hindi 0.3439542 1415.680
## 24064 [2007,2008) 0914701004       Rural 2007         Hindi 0.2696260 1156.985
##       Tuberculosis Venomous.snakes
## 24061            1               0
## 24062           10               0
## 24063            0               0
## 24064            0               0
xAgg$logExpected = log(xAgg$pred)
res2 = geostatsp::glgm(
    Tuberculosis ~  yearCut + elevation + 
        offset(logExpected) +
        f(srs_id, model='iid', prior='pc.prec', param=c(1, 0.5)),
    covariates = list(elevation = indiaElev/1000),
    data = xAgg,
    family='poisson',
    grid = 50,
    prior = list(sd = 1, range = 500*1000)
    )
knitr::kable(res2$parameters$summary[,3:5], digits=3)
0.025quant 0.5quant 0.975quant
(Intercept) -0.273 -0.086 0.079
yearCut[2005,2006) 0.094 0.154 0.214
yearCut[2006,2007) 0.011 0.072 0.133
yearCut[2007,2008) -0.013 0.048 0.109
yearCut[2008,2009) -0.072 -0.010 0.052
yearCut[2009,2010) -0.199 -0.135 -0.072
yearCut[2010,2011) -0.291 -0.226 -0.161
yearCut[2011,2012) -0.305 -0.240 -0.175
yearCut[2012,2013) -0.369 -0.303 -0.237
yearCut[2013,2013] -0.449 -0.382 -0.315
elevation -0.133 -0.040 0.053
range/1000 253.973 325.412 428.757
sd 0.405 0.474 0.567
sd srs id 0.467 0.491 0.515

Your Task: do something interesting with either tuberculosis or snake bites.