# Longitudinal data analysis and multilevel modelling
# Eric Guntermann, Centre for the Study of Democratic Citizenship, January 8th, 2016.
library(foreign) # Load foreign package. To read in STATA file
dat <- read.dta("cses.dta") # load data
dim(dat)
# This is a long dataset. Each row is a respondent-party combination i.e. each row gives information about respondents and their ratings of
# one party
# First, let's try running a classic OLS single-level regression
mod1 <- lm(ld ~ log(age) + female + income + religious, data=dat) # We're interested in the relationship between religiousity (no
# religious beliefs or not very religious vs. somewhat or very religious) and like-dislike scores.
# Here we run a linear model with some controls
summary(mod1) # We see a significant positive relationship between religiousity and like-dislike scores.
# Religious people tend to like parties more than non-religious people.
library(xtable) # Load xtable, which allows us to easily produce regression tables to export to LaTeX.
sink("table1.tex") # This tells R to output everything to the file table1.tex
xtable(summary(mod1)$coef)
sink(NULL) # Close the file connection
# Now let's graph it. Since the dependent variable is a continuous variable and the indendent variable is categorical, we use a
# boxplot (box-and-whisker plot)
library(ggplot2) # Load GGPLOT2 to make graphs
pdf("singlelevel.pdf") # Open connection to PDF file. Save graph to this file
qplot(factor(religious), ld, data = dat, geom = "boxplot", xlab="Religious", ylab="Like-dislike score")
dev.off() # Close connection
# Like-dislike scores seem identical.
# What's wrong with this model?
# Many things. It is based on lots of assumptions, notably that variation in like-dislike scores is only individual (i.e. average
# scores are the same across all parties) and the relationship between religiosity and like-dislike scores is the same for all parties.
# Both of these are doubtful. There are many other assumptions behind this model.
# We could estimate a model individually for each party
library(lme4) # Used for multi-level models and related functions
mod1b <- lmList(ld ~ log(age) + female + income + religious | party, data=dat)
# But, here, we lose information from other parties. The average relationship between each
# independent variable and party feelings is useful. We are losing efficiency at estimation,
# because we do not use that information as a prior
# Let's see how we can do this in a multilevel framework
# Let's start by dealing with the first issue
# Here we allow the intercept of of our model to vary by party. In other words, we allow the average score to be different for each
# party. We don't, however, try to model that intercept (i.e. explain why some parties tend to have higher scores than others).
mod2 <- lmer(ld ~ log(age) + female + income + religious + (1 | party), data=dat) # Unmodelled varying intercept and slope
summary(mod2) # Here we can see the fixed effects (average coefficient) along with hypothesis tests.
ranef(mod2) # Here we can see random effects, how coefficients vary for different parties. Keep in mind that here we only allow the
# intercept to vary.
library(lattice) # for graph
pdf("varyingintercepts.pdf") # We can plot these varying intercepts
dotplot(ranef(mod2, condvar=TRUE), scales = list(y = list(draw = FALSE)))
dev.off()
# Averages do vary quite a bit across parties
# Now let's try to explain why intercepts vary (i.e. why the average scores vary by party). Here we try to explain it using
# a dummy variable coded 1 if a party is to the right of the mean of parties in an election (based on an expert survey) and 0 if it's
# exactly in the centre or on the left.
mod3 <- lmer(ld ~ log(age) + female + income + religious + lr + (1 | party), data=dat) # Modelled varying intercept
summary(mod3) # We can see that parties on the right tend to be slightly less liked on average, but the relationship is not significant.
# In other words, ideology is not what explains why average like-dislike scores vary across parties.
sink("table3.tex")
xtable(summary(mod3)$coef)
sink(NULL)
# Now let's see whether the relationship between income and like-dislike scores varies across parties. Intuitively, it should.
mod4 <- lmer(ld ~ log(age) + female + income + religious + (1 + religious | party), data=dat) # Unmodelled varying intercept and slope
pdf("varyingintandsl.pdf")
dotplot(ranef(mod4, condVar=TRUE), scales = list(y = list(draw = FALSE)))
dev.off()
# We can see that the relationship between religiosity and like-dislike scores varies across parties.
mod5 <- lmer(ld ~ log(age) + female + income + religious + lr + lr:religious + (1 + religious | party), data=dat)
# Modelled varying intercept and slope
summary(mod5) # Looking at coefficient estimates, we can see that for parties of the left, religious people tend to have more negative
# evaluations of parties. For parties on the right, religious people have more positive evaluations. This is a cross-level interaction.
sink("table5.tex")
xtable(summary(mod5)$coef)
sink(NULL)
# Now what's wrong with this last model?
# It's still based on lots of assumptions. We assume that the average score in each election (and also country) is the same
# (i.e. same intercept). We also assume that the relationships between ideology and intercepts and that between ideology and the
# "effect" of religiosity is the same in all countries. In order to eliminate the need for these assumptions we move to a three level model.
# Before we move to that, we introduce some basics of Bayesian analysis and discuss how to fit multilevel Bayesian models.
## Now the Bayesian versions of these models
# Let's start with single-level model
# We will be using JAGS, but we'll run it from R
library(R2jags) # Load R2jags package. This is what allows us to run Bayesian models from R
set.seed(123) # Set a random number seed. This allows us to reproduce the same results
# We need to set up the data for JAGS in list format. This is a bit confusing, but actually makes a lot of sense for multilevel models
jags.dat <- as.list(dat[,c("age", "female", "income", "religious", "ld")])
# Get variables ready for model
jags.dat$income2 <- jags.dat$income=="med"
jags.dat$income3 <- jags.dat$income=="high"
jags.dat$female <- as.numeric(jags.dat$female=="Female")
jags.dat$religious <- ifelse(jags.dat$religious=="Yes",1,0)
jags.dat$income <- NULL
jags.dat$age <- log(jags.dat$age)
jags.dat$N <- nrow(na.omit(dat[,c("age", "female", "income", "religious", "ld")])) # Get the N
jags.dat1 <- jags.dat # Let's call this jags.dat1
str(jags.dat1) # Look at structure of data
mod1.jags <- function(){
for(i in 1:N){# Likelihood
ld[i] ~ dnorm(mu[i], tau) # Note that in Bayesian models, we use precision (i.e. inverse-variance) instead of variance
mu[i] <- b0 + b.age*age[i] + b.female*female[i] + b.income2*income2[i] + b.income3*income3[i] + b.religious*religious[i]
}
# Priors for coefficients
b0 ~ dnorm(0,0.001)
b.age ~ dnorm(0,0.001)
b.female ~ dnorm(0,0.001)
b.income2 ~ dnorm(0,0.001)
b.income3 ~ dnorm(0,0.001)
b.religious ~ dnorm(0,0.001)
# Priors for precision (inverse-variance)
tau <- 1/sigma
sigma ~ dunif(0,10000)
}
mod1.params <- c("b.age", "b.female", "b.income2", "b.income3", "b.religious")
# We start out by running model with few iterations. We run two parallel chains of 100 iterations and burnin of 50 (i.e.
# we ignore the first 50 iterations)
results <- jags(data=jags.dat1, parameters.to.save=mod1.params, model.file=mod1.jags, n.chains=2, n.iter=100, n.burnin=50)
# Now let's run it with 1000.
results <- jags(data=jags.dat1, parameters.to.save=mod1.params, model.file=mod1.jags, n.chains=2, n.iter=500, n.burnin=200)
# You can quickly view the results by creating a Gelman result plot
plot(results) # You can also assess convergence by seeing whether the dots for each chain overlap and by looking at R-hat
# print results to screen
round(results$BUGSoutput$summary[,c(1,2,3,7,8)],2)
# Output results in LaTeX
library(xtable)
xtab <- xtable(round(results$BUGSoutput$summary[,c(1,2,3,7,8)],2), caption="Model 1")
sink("mod1bayes.tex")
print(xtab, caption.placement = getOption("xtable.caption.placement", "top"))
sink(NULL)
# Two-level model with varying intercept
jags.dat <- as.list(dat[,c("age", "female", "income", "religious", "party", "ld")])
jags.dat$party <- as.numeric(as.ordered(jags.dat$party))
jags.dat$income2 <- jags.dat$income=="medium"
jags.dat$income3 <- jags.dat$income=="high"
jags.dat$income <- NULL
jags.dat$age <- log(jags.dat$age)
jags.dat$female <- as.numeric(jags.dat$female=="Female")
jags.dat$religious <- ifelse(jags.dat$religious=="Yes",1,0)
jags.dat$N <- nrow(dat)
jags.dat$N.party <- length(unique(jags.dat$party)) # We also need the number of parties now.
jags.dat2 <- jags.dat
mod2.jags <- function(){
for(i in 1:N){
ld[i] ~ dnorm(mu[i], tau)
mu[i] <- b.party[party[i]] + b.age*age[i] + b.female*female[i] + b.income2*income2[i] + b.income3*income3[i] + b.religious*religious[i]
}
# Priors for varying intercept (i.e. level 2)
for(j in 1:N.party){
b.party[j] ~ dnorm(party.mu, tau.party)
}
# Priors for coefficients
b.age ~ dnorm(0,0.001)
b.female ~ dnorm(0,0.001)
b.income2 ~ dnorm(0,0.0001)
b.income3 ~ dnorm(0,0.001)
b.religious ~ dnorm(0,0.001)
# Prior for mean of intercepts
party.mu ~ dnorm(0,0.001)
# Prior for precision
tau <- 1/sigma
sigma ~ dunif(0,10000) # Level 1
tau.party <- 1/sigma.party
sigma.party ~ dunif(0,10000) # Level 2
}
mod2.params <- c("b.party", "b.age", "b.female", "b.income2", "b.income3", "b.religious")
results <- jags(data=jags.dat2, parameters.to.save=mod2.params, model.file=mod2.jags, n.chains=2, n.iter=100, n.burnin=50)
results <- jags(data=jags.dat2, parameters.to.save=mod2.params, model.file=mod2.jags, n.chains=2, n.iter=500, n.burnin=200)
# Let's check for convergence
plot(results) # Gelman plot
print(results) # Rhat (Gelman-Rubin diagnostic) should be around 1
# If our model has not converged, we update it.
results2 <- update(results, 200)
# The easiest way to present varying coefficents is graphically. This graph made in GGPLOT2 shows the intercepts for each party
# along with 90% credible intervals.
output <- as.data.frame(as.matrix(as.mcmc(results2))) # Create date frame of results
b.party <- output[, grep("b.party", colnames(output), fixed=T)] # Extract intercepts
install.packages("dplyr") # You need this package to make the graph
library(dplyr)
library(reshape2)
ggplot(data = summarise(group_by(melt(b.party), variable), mean = mean(value),
lo = quantile(value, probs = c(0.05)), hi = quantile(value, probs = c(0.95))),
aes(x = reorder(variable, mean), y = mean)) + geom_hline(yintercept = 0, col = "blue") + geom_pointrange(aes(ymin = lo, ymax = hi)) + xlab("") + ylab(expression(beta[party])) + theme_bw() + coord_flip()
# To change the labels on the y axis
labs <- unique(dat$party)[as.numeric(gsub("[^0-9]", "", colnames(b.party)))]
colnames(b.party) <- labs
pdf("bayes_varyingintercepts.pdf")
ggplot(data = summarise(group_by(melt(b.party), variable), mean = mean(value),
lo = quantile(value, probs = c(0.05)), hi = quantile(value, probs = c(0.95))),
aes(x = reorder(variable, mean), y = mean)) + geom_hline(yintercept = 0, col = "blue") + geom_pointrange(aes(ymin = lo, ymax = hi)) + xlab("") + ylab(expression(beta[party])) + theme_bw() + coord_flip()
dev.off()
# To use this for other coefficients (from this or other models) just change b.party in the first line of the ggplot command to the
# coefficient name
# Two-level model with modelled varying intercepts
jags.dat <- as.list(na.omit(dat[,c("age", "female", "income", "religious", "party", "ld", "lr")]))
jags.dat$party <- as.numeric(as.ordered(jags.dat$party))
jags.dat$income2 <- jags.dat$income=="medium"
jags.dat$income3 <- jags.dat$income=="high"
jags.dat$income <- NULL
jags.dat$age <- log(jags.dat$age)
jags.dat$N <- nrow(na.omit(dat[,c("age", "female", "income", "religious", "party", "ld", "lr")]))
jags.dat$N.party <- length(unique(jags.dat$party))
jags.dat3 <- jags.dat
mod3.jags <- function(){
# Likelihood
for(i in 1:N){
ld[i] ~ dnorm(mu[i], tau)
mu[i] <- b.party[party[i]] + b.age*age[i] + b.female*female[i] + b.income2*income2[i] + b.income3*income3[i] + b.religious*religious[i]
}
# Prior for intecepts
for(j in 1:N.party){
b.party[j] ~ dnorm(party.mu[j], tau.party)
party.mu[j] <- b0 + b.lr*lr[j]
}
# Prior for first level coefficients
b.age ~ dnorm(0,0.001)
b.female ~ dnorm(0,0.001)
b.income2 ~ dnorm(0,0.0001)
b.income3 ~ dnorm(0,0.001)
b.religious ~ dnorm(0,0.001)
# Priors for second level coefficients
b0 ~ dnorm(0,0.001)
b.lr ~ dnorm(0,0.001)
# Prior for variances
sigma ~ dunif(0,10000)
tau <- 1/sigma
sigma.party ~ dunif(0,10000)
tau.party <- 1/sigma.party
}
mod3.params <- c("b.party", "b.age", "b.female", "b.income2", "b.income3", "b.religious", "b.lr")
results <- jags(data=jags.dat3, parameters.to.save=mod3.params, model.file=mod3.jags, n.chains=2, n.iter=100, n.burnin=50)
results <- jags(data=jags.dat3, parameters.to.save=mod3.params, model.file=mod3.jags, n.chains=2, n.iter=500, n.burnin=200)
# print results to screen
output <- results$BUGSoutput$summary[,c(1,2,3,7,8)]
round(output[-grep("b.party", rownames(output)),],2)
xtab <- xtable(round(output[-grep("b.party", rownames(output)),],2), caption="Model 3")
# and to LaTeX
sink("mod3bayes.tex")
print(xtab, caption.placement = getOption("xtable.caption.placement", "top"))
sink(NULL)
# Two-level model with unmodelled varying intercepts and slopes
jags.dat <- as.list(dat[,c("age", "female", "income", "religious", "party", "ld")])
jags.dat$party <- as.numeric(as.ordered(jags.dat$party))
jags.dat$income2 <- jags.dat$income=="medium"
jags.dat$income3 <- jags.dat$income=="high"
jags.dat$income <- NULL
jags.dat$age <- log(jags.dat$age)
jags.dat$N <- nrow(dat)
jags.dat$N.party <- length(unique(jags.dat$party))
jags.dat4 <- jags.dat
mod4.jags <- function(){
for(i in 1:N){
ld[i] ~ dnorm(mu[i], tau)
mu[i] <- b.party[party[i]] + b.age*age[i] + b.female*female[i] + b.income2*income2[i] + b.income3*income3[i] + b.religious[party[i]]*religious[i]
}
for(j in 1:N.party){ # Priors for varying coefficients
b.party[j] ~ dnorm(party.mu, tau.party)
b.religious[j] ~ dnorm(b.religious.mu, tau.religious)
}
# Priors for unvarying coefficients
b.age ~ dnorm(0,0.001)
b.income2 ~ dnorm(0,0.001)
b.income3 ~ dnorm(0,0.001)
b.female ~ dnorm(0,0.001)
# Priors for means of varying coefficients
party.mu ~ dnorm(0,0.001)
b.religious.mu ~ dnorm(0,001)
#Priors for variances
sigma ~ dunif(0,10000)
tau <- 1/sigma
sigma.party ~ dunif(0,10000)
tau.party <- 1/sigma.party
sigma.religious ~ dunif(0,10000)
tau.religious <- 1/sigma.religious
}
mod4.params <- c("b.party", "b.age", "b.religious", "b.income2", "b.income3", "b.female")
results <- jags(data=jags.dat4, parameters.to.save=mod4.params, model.file=mod4.jags, n.chains=2, n.iter=100, n.burnin=50)
results <- jags(data=jags.dat4, parameters.to.save=mod4.params, model.file=mod4.jags, n.chains=2, n.iter=500, n.burnin=200)
output <- as.data.frame(as.matrix(as.mcmc(results)))
b.religious <- output[, grep("b.religious", colnames(output), fixed=T)]
pdf("bayes_varyingintandsl.pdf")
ggplot(data = summarise(group_by(melt(b.religious), variable), mean = mean(value),
lo = quantile(value, probs = c(0.05)), hi = quantile(value, probs = c(0.95))),
aes(x = reorder(variable, mean), y = mean)) + geom_hline(yintercept = 0, col = "blue") + geom_pointrange(aes(ymin = lo, ymax = hi)) + xlab("") + ylab(expression(beta[female])) + theme_bw() + coord_flip()
dev.off()
# Two-level model with modelled varying intercepts and slopes
jags.dat <- as.list(dat[,c("age", "female", "income", "religious", "party", "ld", "lr")])
jags.dat$party <- as.numeric(as.ordered(jags.dat$party))
jags.dat$income2 <- jags.dat$income=="medium"
jags.dat$income3 <- jags.dat$income=="high"
jags.dat$income <- NULL
jags.dat$female <- ifelse(jags.dat$female=="Female",1,0)
jags.dat$religious <- ifelse(jags.dat$religious=="Yes",1,0)
jags.dat$age <- log(jags.dat$age)
jags.dat$N <- nrow(dat[,c("age", "female", "income", "religious", "party", "ld", "lr")])
jags.dat$N.party <- length(unique(jags.dat$party))
jags.dat5 <- jags.dat
mod5.jags <- function(){
for(i in 1:N){
ld[i] ~ dnorm(mu[i], tau)
mu[i] <- b.party[party[i]] + b.female*female[i] + b.age*age[i] + b.income2*income2[i] + b.income3*income3[i] + b.religious[party[i]]*religious[i]
}
for(j in 1:N.party){ # Priors for varying coefficients
b.party[j] ~ dnorm(party.mu[j], tau.party)
party.mu[j] <- b1 + b.lr1*lr[j]
b.religious[j] ~ dnorm(b.religious.mu[j], tau.religious)
b.religious.mu[j] <- b2 + b.lr2*lr[j]
}
# Priors for unvarying coefficients
b.age ~ dnorm(0,0.001)
b.income2 ~ dnorm(0,0.001)
b.income3 ~ dnorm(0,0.001)
b.female ~ dnorm(0,0.001)
# Priors for second level coefficients
b1 ~ dnorm(0,0.001)
b2 ~ dnorm(0,0.001)
b.lr1 ~ dnorm(0,0.001)
b.lr2 ~ dnorm(0,0.001)
# Priors for variances
sigma ~ dunif(0,10000)
tau <- 1/sigma
sigma.party ~ dunif(0,10000)
tau.party <- 1/sigma.party
sigma.religious ~ dunif(0,10000)
tau.religious <- 1/sigma.religious
}
mod5.params <- c("b.party", "b.age", "b.female", "b.income2", "b.income3", "b.religious", "b1", "b2", "b.lr1", "b.lr2")
results <- jags(data=jags.dat5, parameters.to.save=mod5.params, model.file=mod5.jags, n.chains=2, n.iter=100, n.burnin=50)
results <- jags(data=jags.dat5, parameters.to.save=mod5.params, model.file=mod5.jags, n.chains=2, n.iter=1000, n.burnin=500)
plot(results) # Check output
# Run more iterations if model has not converged
results2 <- update(results, 1000)
# print results to screen
output <- results$BUGSoutput$summary[,c(1,2,3,7,8)]
round(output[-c(grep("b.female", rownames(output)), grep("b.party", rownames(output))),],2)
xtab <- xtable(round(output[-c(grep("b.female", rownames(output)), grep("b.party", rownames(output))),],2))
# Now to latex
sink("mod5bayes.tex")
print(xtab, caption.placement = getOption("xtable.caption.placement", "top"))
sink(NULL)
# Three-level model with modelled varying intercepts and slopes and unmodelled varying coefficients from second level
# To run a three-level model, we need to create a variable indicating which election each party run in. You can ignore this code
# (but do run it)
cid <- unique(na.omit(dat[,c("age", "female", "income", "religious", "party", "ld", "lr", "countryyear")])$countryyear)
cid_byparty <- vector()
for(p in unique(dat$party)){
cy <- substring(p,1,8)
cid_byparty <- c(cid_byparty, which(cid==cy))
}
## Ignore up to here
jags.dat <- as.list(na.omit(dat[,c("age", "female", "income", "religious", "party", "lr", "ld")]))
jags.dat$party <- as.numeric(as.ordered(jags.dat$party))
jags.dat$income2 <- jags.dat$income=="medium"
jags.dat$income3 <- jags.dat$income=="high"
jags.dat$income <- NULL
jags.dat$age <- log(jags.dat$age)
jags.dat$election <- cid_byparty
jags.dat$lr <- ifelse(jags.dat$lr>0,1,0)
jags.dat$N <- nrow(na.omit(dat[,c("age", "female", "income", "religious", "party", "lr", "ld")]))
jags.dat$N.party <- length(unique(jags.dat$party))
jags.dat$N.election <- length(unique(jags.dat$election))
jags.dat$countryyear <- NULL
jags.dat6 <- jags.dat
mod6.jags <- function(){
for(i in 1:N){
ld[i] ~ dnorm(mu[i], tau)
mu[i] <- b.party[party[i]] + b.female*female[i] + b.age*age[i] + b.income2*income2[i] + b.income3*income3[i] + b.religious[party[i]]*religious[i]
}
for(j in 1:N.party){ # Priors for varying coefficients
b.party[j] ~ dnorm(party.mu[j], tau.party)
party.mu[j] <- b1[election[j]] + b.lr1[election[j]]*lr[j]
b.religious[j] ~ dnorm(b.religious.mu[j], tau.religious)
b.religious.mu[j] <- b2[election[j]] + b.lr2[election[j]]*lr[j]
}
for(k in 1:N.election){ # Third level (i.e. priors for second level coefficients)
b1[k] ~ dnorm(b1.mean, tau.b1)
b2[k] ~ dnorm(b2.mean, tau.b2)
b.lr1[k] ~ dnorm(lr1.mean, tau.lr1)
b.lr2[k] ~ dnorm(lr2.mean, tau.lr2)
}
# Priors for unvarying coefficients
b.age ~ dnorm(0,0.001)
b.income2 ~ dnorm(0,0.001)
b.income3 ~ dnorm(0,0.001)
b.female ~ dnorm(0,0.001)
# Priors for third level means
b1.mean ~ dnorm(0,0.001)
b2.mean ~ dnorm(0,0.001)
lr1.mean ~ dnorm(0,0.001)
lr2.mean ~ dnorm(0,0.001)
# Priors for variances
sigma ~ dunif(0,10000)
tau <- 1/sigma
sigma.party ~ dunif(0,10000)
tau.party <- 1/sigma.party
sigma.religious ~ dunif(0,10000)
tau.religious <- 1/sigma.religious
sigma.lr1 ~ dunif(0,10000)
tau.lr1 <- 1/sigma.lr1
sigma.lr2 ~ dunif(0,10000)
tau.lr2 <- 1/sigma.lr2
sigma.b1 ~ dunif(0,10000)
tau.b1 <- 1/sigma.b1
sigma.b2 ~ dunif(0,10000)
tau.b2 <- 1/sigma.b2
}
mod6.params <- c("b.age", "b.female", "b.religious", "b1", "b2", "b.lr1", "b.lr2")
results <- jags(data=jags.dat6, parameters.to.save=mod6.params, model.file=mod6.jags, n.chains=2, n.iter=100, n.burnin=50)
results <- jags(data=jags.dat6, parameters.to.save=mod6.params, model.file=mod6.jags, n.chains=2, n.iter=10000, n.burnin=5000)
output <- as.data.frame(as.matrix(as.mcmc(results)))
b.income3 <- output[, grep("b.religious", colnames(output), fixed=T)]
b.income3 <- b.party[, sample(ncol(b.income3), 50)]
ggplot(data = summarise(group_by(melt(b.income3), variable), mean = mean(value),
lo = quantile(value, probs = c(0.05)), hi = quantile(value, probs = c(0.95))),
aes(x = reorder(variable, mean), y = mean)) + geom_hline(yintercept = 0, col = "blue") + geom_pointrange(aes(ymin = lo, ymax = hi)) + xlab("") + ylab(expression("High vs. low Like-dislike scores") + theme_bw() + coord_flip()
# You could run a frequentist version of this model. Here's the code
mod6 <- lmer(ld ~ lr + age + lr:female + income + urban + (1 + female | party/countryyear), data=dat)
## Model with a binary dependent variable
# Frequentist version
dat$ldbin <- ifelse(dat$ld>5,1,0)
mod1 <- glmer(ldbin ~ lr + religious:lr + log(age) + female + religious + (1 + religious | party), family=binomial(link="logit"), data=dat)
# Bayesian version
jags.dat <- as.list(na.omit(dat[,c("age", "female", "income", "urban", "party", "ldbin", "lr")]))
jags.dat$party <- as.numeric(as.ordered(jags.dat$party))
jags.dat$income2 <- jags.dat$income=="medium"
jags.dat$income3 <- jags.dat$income=="high"
jags.dat$income <- NULL
jags.dat$N <- nrow(na.omit(dat[,c("age", "female", "income", "urban", "party", "ld", "lr")]))
jags.dat$N.party <- length(unique(jags.dat$party))
mod1.jags <- function(){
for(i in 1:N){
ldbin[i] ~ dbern(p[i])
logit(p[i]) <- b.party[party[i]] + b.age*age[i] + b.female*female[i] + b.income2*income2[i] + b.income3*income3[i] + b.urban*urban[i]
}
for(j in 1:N.party){
b.party[j] ~ dnorm(party.mu[j], tau.party)
party.mu[j] <- b0 + b.lr*lr[j]
}
party.mu ~ dnorm(0,0.001)
tau.party <- 1/sigma.party
sigma.party ~ dunif(0,10000)
tau <- 1/sigma
sigma ~ dunif(0,10000)
b.age ~ dnorm(0,0.001)
b.female ~ dnorm(0,0.001)
b.income2 ~ dnorm(0,0.0001)
b.income3 ~ dnorm(0,0.001)
b.urban ~ dnorm(0,0.001)
b.lr ~ dnorm(0,0.001)
}
mod1.params <- c("b.party", "b.age", "b.female", "b.income2", "b.income3", "b.urban")
mod1.inits <- function(){
list("b.party"=rnorm(583), "b.age"=rnorm(1), "b.female"=rnorm(1), "b.income2"=rnorm(1), "b.income3"=rnorm(1))
}
results <- jags(data=jags.dat, inits=mod1.inits, parameters.to.save=mod1.params, model.file=mod1.jags, n.chains=2, n.iter=1000, n.burnin=500)
## Now some time series
library(foreign)
dat <- read.dta("macropolity.dta") # Data from the Macro Polity
# Plot each of the series
plot(dat$t, dat$mood, type="l", xlab="Year", ylab="Mood")
plot(dat$t, dat$dunemployment, type="l", xlab="Year", ylab="Annual change in unemployment")
plot(dat$t, dat$inflation, type="l", xlab="Year", ylab="Inflation rate")
# Autocorrelation function: shows us the correlation between current value and values at
# various lags
acf(dat$mood)
acf(dat$dunemployment)
acf(dat$inflation)
mod1 <- lm(mood ~ inflation + dunemployment,data=dat)
mod2 <- lm(mood ~ lmood + inflation + dunemployment,data=dat)
summary(mod1)
# Let's look at the residuals
rt <- mod1$resid
rt <- rt[2:length(rt)]
rt1 <- mod1$resid
rt1 <- rt1[1:length(rt1)-1]
plot(rt1,rt)
cov(rt1,rt)
cor(rt1,rt)
mod4 <- lm(rt ~ rt1)
acf(rt)
Box.test(rt)
rt <- mod2$resid
rt <- rt[2:length(rt)]
rt1 <- mod2$resid
rt1 <- rt1[1:length(rt1)-1]
plot(rt1,rt)
cov(rt1,rt)
cor(rt1,rt)
mod3 <- lm(rt ~ rt1)
summary(mod3)
acf(rt)
Box.test(rt)
# Panel data
library(foreign)
dat <- read.dta("cat2012.dta")
# Cross-sectional model: not very convincing
mod1 <- lm(natid1 ~ vote1, data=dat)
summary(mod1)
# Panel model: more convincing
mod2 <- lm(natid2 ~ natid1 + voteintention1, data=dat)
summary(mod2)