Data from the Million Deaths Study in India can be found as
india.rds
in the data
folder of the course web
site.
library('terra')
## terra 1.8.54
x = readRDS('../data/india.rds')
values(x)[100000+1:2,]
## yearCut ageCut sex srs_id urban_rural age year Tuberculosis
## 100001 [2004,2005) [30,35) Female 1811052015 Rural 30 2004 0
## 100002 [2004,2005) [30,35) Female 1811061005 Rural 30 2004 0
## Venomous.snakes languageGroup pop
## 100001 0 Magadhan 43.37914
## 100002 0 Magadhan 15.59410
srs_id
: geographic identifier of sampling unit (village
or census area)yearCut
and year
, year of survey, the
former is a factor the latter numericageCut
and age
, age group, the former is a
factorsex
like it sayspop
population of age-sex group in sampling unit. It’s
an estimate (and not always an integer)Tuburculosis
, Venemous.snakes
number of
deaths in the relevant age-sex groupurban_rural
: area type of the sampling unitlanguageGroup
:broad category of dominant language in
the sampling unitxUnique = x[!duplicated(x$srs_id),]
india = geodata::gadm("IND", level=0, path=normalizePath("~/Downloads"))
indiaT = project(india, crs(xUnique))
plot(indiaT)
points(xUnique, col='red', cex=0.5)
indiaElev = geodata::elevation_30s('IND', path=normalizePath("~/Downloads"))
plot(indiaElev)
Write down the model! \[ \begin{aligned} Y_{itj} \sim & \text{Poisson}(\lambda_{itj}) \\ \end{aligned} \]
xSub = x[which(x$pop > 0 & x$age >= 40),]
xSub$logPop = log(xSub$pop)
res = geostatsp::glgm(
Tuberculosis ~ sex + ageCut + yearCut + elevation +
offset(logPop) +
f(srs_id, model='iid', prior='pc.prec', param=c(1, 0.5)),
covariates = list(elevation = indiaElev),
data = xSub,
family='poisson',
grid = 50,
prior = list(sd = 1, range = 500*1000)
)
knitr::kable(res$parameters$summary[,3:5], digits=2)
0.025quant | 0.5quant | 0.975quant | |
---|---|---|---|
(Intercept) | -27.30 | -6.63 | 14.04 |
sexFemale | -0.88 | -0.84 | -0.81 |
ageCut[5,10) | -62.01 | 0.00 | 62.01 |
ageCut[10,15) | -62.01 | 0.00 | 62.01 |
ageCut[15,20) | -62.01 | 0.00 | 62.01 |
ageCut[20,25) | -62.01 | 0.00 | 62.01 |
ageCut[25,30) | -62.01 | 0.00 | 62.01 |
ageCut[30,35) | -62.01 | 0.00 | 62.01 |
ageCut[35,40) | -62.01 | 0.00 | 62.01 |
ageCut[40,45) | -21.60 | -0.93 | 19.74 |
ageCut[45,50) | -21.41 | -0.74 | 19.93 |
ageCut[50,55) | -21.08 | -0.41 | 20.26 |
ageCut[55,60) | -20.75 | -0.08 | 20.59 |
ageCut[60,65) | -20.61 | 0.06 | 20.73 |
ageCut[65,70) | -20.31 | 0.36 | 21.03 |
ageCut[70,75) | -20.15 | 0.52 | 21.19 |
ageCut[75,80) | -19.93 | 0.74 | 21.41 |
ageCut[80,130] | -20.13 | 0.54 | 21.21 |
yearCut[2005,2006) | 0.06 | 0.13 | 0.20 |
yearCut[2006,2007) | 0.00 | 0.07 | 0.14 |
yearCut[2007,2008) | -0.06 | 0.01 | 0.08 |
yearCut[2008,2009) | -0.10 | -0.03 | 0.04 |
yearCut[2009,2010) | -0.19 | -0.12 | -0.05 |
yearCut[2010,2011) | -0.29 | -0.22 | -0.14 |
yearCut[2011,2012) | -0.29 | -0.22 | -0.14 |
yearCut[2012,2013) | -0.33 | -0.25 | -0.18 |
yearCut[2013,2013] | -0.41 | -0.33 | -0.26 |
elevation | 0.00 | 0.00 | 0.00 |
range/1000 | 213.32 | 269.11 | 347.25 |
sd | 0.40 | 0.46 | 0.54 |
sd srs id | 0.42 | 0.45 | 0.48 |
toPlot = mask(res$raster[['predict.0.5quant']], indiaT)
toPlot = exp(toPlot)*100000
thecol = mapmisc::colourScale(toPlot, col='Reds', breaks=6, dec=-2, style='equal')
mapmisc::map.new(indiaT)
plot(toPlot, col=thecol$col, breaks=thecol$breaks, legend=FALSE, add=TRUE)
plot(indiaT, add=TRUE)
mapmisc::legendBreaks("bottomleft", thecol, bty='n')
caluculate observed and expected
xHavePop = x[which(x$pop>0),]
xHavePop$logPop = log(xHavePop$pop)
xHavePop$ageRelevel = relevel(xHavePop$ageCut, '[60,65)')
theRates = glm(Tuberculosis ~ ageRelevel*sex + offset(logPop),family='poisson', data=values(xHavePop))
xHavePop$pred = predict.glm(theRates, values(xHavePop), type='response')
sum(xHavePop$pred)
## [1] 19121
sum(xHavePop$Tuberculosis)
## [1] 19121
byvars = c("yearCut","srs_id",'urban_rural',"year",'languageGroup')
xAggDf = aggregate(
values(xHavePop)[,c('pred','pop','Tuberculosis','Venomous.snakes')],
values(xHavePop)[,byvars],
FUN='sum')
xUnique = xHavePop[!duplicated(values(xHavePop)[, byvars]),
byvars]
xAgg = merge(xUnique, xAggDf)
values(xAgg)[24060+1:4,]
## yearCut srs_id urban_rural year languageGroup pred pop
## 24061 [2007,2008) 0914691014 Rural 2007 Hindi 0.4189380 1711.978
## 24062 [2007,2008) 0914692013 Rural 2007 Hindi 0.4440820 1827.714
## 24063 [2007,2008) 0914692026 Rural 2007 Hindi 0.3439542 1415.680
## 24064 [2007,2008) 0914701004 Rural 2007 Hindi 0.2696260 1156.985
## Tuberculosis Venomous.snakes
## 24061 1 0
## 24062 10 0
## 24063 0 0
## 24064 0 0
xAgg$logExpected = log(xAgg$pred)
res2 = geostatsp::glgm(
Tuberculosis ~ yearCut + elevation +
offset(logExpected) +
f(srs_id, model='iid', prior='pc.prec', param=c(1, 0.5)),
covariates = list(elevation = indiaElev/1000),
data = xAgg,
family='poisson',
grid = 50,
prior = list(sd = 1, range = 500*1000)
)
knitr::kable(res2$parameters$summary[,3:5], digits=3)
0.025quant | 0.5quant | 0.975quant | |
---|---|---|---|
(Intercept) | -0.273 | -0.086 | 0.079 |
yearCut[2005,2006) | 0.094 | 0.154 | 0.214 |
yearCut[2006,2007) | 0.011 | 0.072 | 0.133 |
yearCut[2007,2008) | -0.013 | 0.048 | 0.109 |
yearCut[2008,2009) | -0.072 | -0.010 | 0.052 |
yearCut[2009,2010) | -0.199 | -0.135 | -0.072 |
yearCut[2010,2011) | -0.291 | -0.226 | -0.161 |
yearCut[2011,2012) | -0.305 | -0.240 | -0.175 |
yearCut[2012,2013) | -0.369 | -0.303 | -0.237 |
yearCut[2013,2013] | -0.449 | -0.382 | -0.315 |
elevation | -0.133 | -0.040 | 0.053 |
range/1000 | 253.973 | 325.412 | 428.757 |
sd | 0.405 | 0.474 | 0.567 |
sd srs id | 0.467 | 0.491 | 0.515 |
Your Task: do something interesting with either tuberculosis or snake bites.