Use the simulation approach to generating predicted probabilities to obtain 1000 simulated predicted probabilities for all demographic-state combinations of supporting marriage equality. Use these simulations to generate point estimates and 95% confidence intervals for the proportion of people within each state supporting marriage equality. Use at least 10 simulations.
Hint: the simulate()
function is your friend because it has a method for objects of class merMod
, which allows it to simulate new responses from the fitted model object – but be sure to account for the random effects in the model. You want to carry out the MRP process multiple times, substituting in a different simulated outcome vector each time. You can accomplish this either with a loop, or through a series of calls to apply()
with anonymous functions.
library(foreign) # load .dta files
library(lme4) # multilevel models
# read in megapoll
marriage_data <- read.dta('https://www.princeton.edu/~jkastell/MRP_primer/gay_marriage_megapoll.dta', convert.underscore = TRUE)
# read in state-level dataset
Statelevel <- read.dta('https://www.princeton.edu/~jkastell/MRP_primer/state_level_update.dta', convert.underscore = TRUE)
Statelevel <- Statelevel[order(Statelevel$sstate.initnum), ]
# read in sensus data
Census <- read.dta('https://www.princeton.edu/~jkastell/MRP_primer/poststratification%202000.dta', convert.underscore = TRUE)
Census <- Census[order(Census$cstate), ]
Census$cstate.initnum <- match(Census$cstate, Statelevel$sstate)
# from 1 for white males to 6 for hispanic females
marriage_data$race.female <- (marriage_data$female *3) + marriage_data$race.wbh
# from 1 for 18-29 with low edu to 16 for 65+ with high edu
marriage_data$age.edu.cat <- 4 * (marriage_data$age.cat -1) + marriage_data$edu.cat
# proportion of evangelicals in respondent's state
marriage_data$p.evang.full <- Statelevel$p.evang[marriage_data$state.initnum]
# proportion of mormon's in respondent's state
marriage_data$p.mormon.full <-Statelevel$p.mormon[marriage_data$state.initnum]
# combined evangelical + mormom proportions
marriage_data$p.relig.full <- marriage_data$p.evang.full + marriage_data$p.mormon.full
# kerry's % of 2-party vote in respondent's state in 2004
marriage_data$p.kerry.full <- Statelevel$kerry.04[marriage_data$state.initnum]
# same coding as above
Census$crace.female <- (Census$cfemale *3) + Census$crace.WBH
Census$cage.edu.cat <- 4 * (Census$cage.cat -1) + Census$cedu.cat
Census$cp.evang.full<- Statelevel$p.evang[Census$cstate.initnum]
Census$cp.mormon.full <- Statelevel$p.mormon[Census$cstate.initnum]
Census$cp.relig.full <- Census$cp.evang.full + Census$cp.mormon.full
Census$cp.kerry.full <- Statelevel$kerry.04[Census$cstate.initnum]
individual_model <- glmer(formula = yes.of.all ~ (1 | race.female) + (1 | age.cat)
+ ( 1 |edu.cat) + (1 | age.edu.cat) + (1 | state) + (1 | region)
+ (1 | poll) + p.relig.full + p.kerry.full, data = marriage_data,
family = binomial(link = 'logit'))
library(arm) # inverse logit function
library(doParallel) # parallelize simulation
library(ggplot2) # plots
library(plotly) # interactive plots
# parallel computing setup
registerDoParallel(makeCluster(parallel::detectCores()))
# simulate responses from model
sim <- simulate(individual_model, nsim = 50, re.form = NULL)
# create dataframe to merge with simulated outcome vectors; account for listwise deletion
sim_dat <- na.omit(marriage_data[, c('race.female', 'age.cat', 'edu.cat', 'age.edu.cat',
'state', 'region', 'poll', 'p.relig.full', 'p.kerry.full')])
# check that simulated outcome vectors have same length as dataframe
stopifnot(nrow(sim_dat) == nrow(sim))
# parallel loop for simulations
sim_outcome <- foreach(i = 1:ncol(sim), .packages = c('lme4', 'arm'), .combine = cbind) %dopar% {
# get ith simulated outcome vector
temp_dat <- data.frame(sim_dat, yes_sim = sim[, i])
# run model on simulated outcome vector
temp_mod <- glmer(yes_sim ~ (1 | race.female) + (1 | age.cat) + ( 1 |edu.cat)
+ (1 | age.edu.cat) + (1 | state) + (1 | region) + (1 | poll)
+ p.relig.full + p.kerry.full, data = temp_dat,
family = binomial(link = 'logit'))
# empty vector to hold state random effects
state_ranefs <- array(NA, c(51, 1))
# set state names as row names
dimnames(state_ranefs) <- list(c(Statelevel$sstate), 'effect')
# assign state random effects to array while preserving NAs
for (i in Statelevel$sstate) {
state_ranefs[i, ] <- ranef(temp_mod)$state[i, 1]
}
# set states with missing REs (b/c not in data) to zero
state_ranefs[, 1][is.na(state_ranefs[, 1])] <- 0
# prediction for each cell in census data
temp_pred <- invlogit(fixef(temp_mod)["(Intercept)"]
+ ranef(temp_mod)$race.female[Census$crace.female, 1]
+ ranef(temp_mod)$age.cat[Census$cage.cat, 1]
+ ranef(temp_mod)$edu.cat[Census$cedu.cat, 1]
+ ranef(temp_mod)$age.edu.cat[Census$cage.edu.cat, 1]
+ state_ranefs[Census$cstate, 1]
+ ranef(temp_mod)$region[Census$cregion, 1]
+ (fixef(individual_model)["p.relig.full"] * Census$cp.relig.full)
+ (fixef(temp_mod)["p.kerry.full"] * Census$cp.kerry.full))
# weight prediction by cell frequency
temp_wgt <- temp_pred * Census$cpercent.state
# sum proportion of cells supporting in each state and convert to percent
100 * as.vector(tapply(temp_wgt, Census$cstate, sum))
}
# collect mean, .025 and .975 quantiles, and proportion religious
sim_gg <- data.frame(State = Statelevel$sstate,
Lower = apply(sim_outcome, 1, quantile, probs = .025),
Mean = apply(sim_outcome, 1, mean),
Upper = apply(sim_outcome, 1, quantile, probs = .975),
Religion = Statelevel$p.evang + Statelevel$p.mormon)
# present estimates
sim_gg[, -ncol(sim_gg)]
# plot estimates with uncertainty, colored by religion, label to avoid reorder() in plotly
sim_plot <- ggplot(sim_gg, aes(x = reorder(State, Mean), y = Mean, ymin = Lower,
ymax = Upper, color = Religion, label = State)) +
geom_linerange(alpha = .75, size = 1) +
geom_point() +
labs(x = 'State', y = 'Estimated Support') +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 90),
legend.position = 'right',
plot.background = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank())
# interactive plot; ggplotly needs colour, not color
ggplotly(sim_plot, tooltip = c('label', 'ymax', 'y', 'ymin', 'colour'))