#Bayesian Analysis Module III examples: #need to download JAGS: Just Another Gibbs Sampler outside R: https://sourceforge.net/projects/mcmc-jags/files/JAGS/ library("rjags") library("tidyverse") library("openintro") #plotting distributions: https://stefanocoretta.github.io/post/priors-ggplot2/ #Normal ggplot(data = tibble(x = -10:10), aes(x)) + stat_function(fun = dnorm, n = 1000, args = list(mean=0,sd=1), geom='area', colour='black', fill="blue") + scale_y_continuous(limits=c(0,1),breaks=c(0, 0.25, 0.50, 0.75, 1.0)) + labs(title = "Normal (Gaussian) distribution for 'standard' prior") ggplot(data = tibble(x = -10:10), aes(x)) + stat_function(fun = dnorm, n = 1000, args = list(mean=0,sd=0.5), geom='area', colour='black', fill="purple") + scale_y_continuous(limits=c(0,1),breaks=c(0, 0.25, 0.50, 0.75, 1.0)) + labs(title = "Normal (Gaussian) distribution for 'strong' prior") ggplot(data = tibble(x = -10:10), aes(x)) + stat_function(fun = dnorm, n = 1000, args = list(mean=0,sd=2), geom='area', colour='black', fill="cyan") + scale_y_continuous(limits=c(0,1),breaks=c(0, 0.25, 0.50, 0.75, 1.0)) + labs(title = "Normal (Gaussian) distribution for 'weak' prior") #Beta ggplot(data = tibble(x = 0:1), aes(x)) + stat_function(fun = dbeta, n = 1000, args = list(shape1=2, shape2=5), geom='area', colour='black', fill='red') + labs(title = "Beta distribution") ggplot(data = tibble(x = 0:1), aes(x)) + stat_function(fun = dbeta, n = 1000, args = list(shape1=0.5, shape2=0.5), geom='area', colour='black', fill='pink') + labs(title = "Beta distribution 2: Electric Boogaloo") ggplot(data = tibble(x = 0:1), aes(x)) + stat_function(fun = dbeta, n = 1000, args = list(shape1=5, shape2=1), geom='area', colour='black', fill='lavender') + labs(title = "Beta distribution 3: Distribution harder") ggplot(data = tibble(x = 0:1), aes(x)) + stat_function(fun = dbeta, n = 1000, args = list(shape1=2, shape2=2), geom='area', colour='black', fill='violet') + labs(title = "Beta distribution 4: This Time, It's Personal") #Uniform ggplot(data = tibble(x = 0:1), aes(x)) + stat_function(fun = dunif, n = 1000, args = list(min=0, max=1), geom='area', colour='black', fill='yellow') + labs(title = "Uniform distribution (across all values)") ggplot(data = tibble(x = 0:1), aes(x)) + stat_function(fun = dunif, n = 1000, args = list(min=0.25, max=0.75), geom='area', colour='black', fill='orange') + labs(title = "Uniform distribution (across range of values)") #Example 1: What are chances of winning a vote? #VIEW prior: think you fall under 50 percent of vote ggplot(data = tibble(x = 0:1), aes(x)) + stat_function(fun = dbeta, n = 1000, args = list(shape1=45, shape2=55), geom='area', colour='black', fill='red') #VIEW data: small poll (data) found 6/10 would vote for you #N/A #DEFINE model vote_model <-"model{ #Likelihood model for X X ~dbin(p, n) #Prior model for p p ~dbeta(a, b) }" #COMPILE model vote_jags <- jags.model(textConnection(vote_model), data =list(a=45, b=55, X=6, n=10), inits =list(.RNG.name="base::Wichmann-Hill", .RNG.seed=100)) #SIMULATE posterior vote_sim <- coda.samples(model=vote_jags, variable.names=c("p"), n.iter=10000) #PLOT results summary(vote_sim) plot(vote_sim) #RETREAD USING UPDATED DATA #new prior ggplot(data = tibble(x = 0:1), aes(x)) + stat_function(fun = dnorm, n = 1000, args = list(mean=0.463, sd=0.048), geom='area', colour='black', fill='red') vote_model_2 <-"model{ #Likelihood model for X X ~ dbin(p, n) #Prior models for p p ~ dnorm(0.463, 0.048) }" #updated data -> now 688 out of 1000 people said they'd vote for you vote_jags_2 <- jags.model(textConnection(vote_model_2), data =list(X=688, n=1000), inits =list(.RNG.name="base::Wichmann-Hill", .RNG.seed=100)) vote_sim_2 <- coda.samples(model=vote_jags_2, variable.names=c("p"), n.iter=10000) #PLOT results summary(vote_sim_2) plot(vote_sim_2) #https://apps.dtic.mil/dtic/tr/fulltext/u2/a624076.pdf #------------------------------------------------------------------------------------------------------------------------------; #Example 2: Simple Regression, can we model weight as a function of height? #VIEW priors: a (intercept), b (slope), & s (standard deviation) a <- rnorm(n=10000, mean=0, sd=200) b <- rnorm(n=10000, mean=1, sd=0.5) s <- runif(n=10000, min=0, max=20) samples <- data.frame('a'=a, 'b'=b, 's'=s) head(samples) ggplot(samples, aes(x=a)) + geom_density(fill="lightblue") ggplot(samples, aes(x=b)) + geom_density(fill="lightgreen") ggplot(samples, aes(x=s)) + geom_density(fill="lightyellow") #VIEW data data(bdims) par(mfrow=c(1,1)) plot(x=bdims$hgt, y=bdims$wgt) #DEFINE model weight_model <-"model{ # Likelihood model for Y[i] for(i in 1:length(Y)) { Y[i] ~ dnorm(m[i], s^(-2)) m[i] <- a + b * X[i] } # Prior models for a, b, s a ~ dnorm(0, 200^(-2)) b ~ dnorm(1, 0.5^(-2)) s ~ dunif(0, 20) }" #COMPILE model weight_jags <- jags.model(textConnection(weight_model), data =list(X=bdims$hgt, Y=bdims$wgt), inits =list(.RNG.name="base::Wichmann-Hill", .RNG.seed=100)) #SIMULATE posterior weight_sim <- coda.samples(model=weight_jags, variable.names=c("a", "b", "s"), n.iter=10000) summary(weight_sim) plot(weight_sim) #more iterations weight_sim2 <- coda.samples(model=weight_jags, variable.names=c("a", "b", "s"), n.iter=50000) summary(weight_sim2) plot(weight_sim2) #more chains weight_jags2 <- jags.model(textConnection(weight_model), data =list(X=bdims$hgt, Y=bdims$wgt), inits =list(.RNG.name="base::Wichmann-Hill", .RNG.seed=100), n.chains=4) weight_sim3 <- coda.samples(model=weight_jags2, variable.names=c("a", "b", "s"), n.iter=10000) summary(weight_sim3) traceplot(weight_sim3, mfrow = c(2, 2), ask = F) #PLOT results ggplot(data=bdims, aes(x=hgt, y=wgt))+ geom_point() + geom_abline(intercept=-102.836, slope=1.005, color='red',) + geom_abline(intercept=-91.583, slope=1.072, color='red', lty=2) + geom_abline(intercept=-114.3834,slope=0.9396, color='red', lty=2) #plotting: https://biometry.github.io/APES/LectureNotes/StatsCafe/Linear_models_jags.html #https://biometry.github.io/APES/LectureNotes/StatsCafe/Linear_models_jags.html #more refs: http://kevbase.com/old/A%20Brief%20Introduction%20to%20Bayesian%20Regression.pdf #------------------------------------------------------------------------------------------------------------------------------; #Example 3: Multiple Regression -> can we model volume by temp and weekday? #https://rstudio-pubs-static.s3.amazonaws.com/454698_321332f02c72463693bf7a974774017d.html #VIEW priors: a (weekend intercept), b (contrast b/t weekday/weekend intercepts), c (common slope) & s (residual standard deviation) a2 <- rnorm(n=10000, mean=0, sd=200) b2 <- rnorm(n=10000, mean=0, sd=200) c2 <- rnorm(n=10000, mean=0, sd=20) s2 <- runif(n=10000, min=0, max=200)Ou samples2 <- data.frame('a'=a2, 'b'=b2, 'c'=c2, 's'=s2) head(samples) ggplot(samples2, aes(x=a)) + geom_density(fill="lightblue") ggplot(samples2, aes(x=b)) + geom_density(fill="lightgreen") ggplot(samples2, aes(x=c)) + geom_density(fill="lightgrey") ggplot(samples2, aes(x=s)) + geom_density(fill="lightyellow") #VIEW data library("mosaic") data(RailTrail) head(RailTrail) RailTrail <- RailTrail %>% mutate(weekday=as.factor(weekday)) class(RailTrail$weekday) ggplot(data=RailTrail, aes(x=hightemp, y=volume, color=weekday))+ geom_point() + geom_smooth(method=lm, se=FALSE, fullrange=FALSE) #DEFINE model rail_model <- "model{ #Likelihood model for Y[i] for(i in 1:length(Y)) { Y[i] ~ dnorm(m[i], s^(-2)) m[i] <- a + b[X[i]] + c * Z[i] } # Prior models for a, b, c, s a ~ dnorm(0, 200^(-2)) b[1] <- 0 b[2] ~ dnorm(0, 200^(-2)) c ~ dnorm(0, 20^(-2)) s ~ dunif(0, 200) }" #COMPILE model rail_jags <- jags.model(textConnection(rail_model), data =list(Y=RailTrail$volume, X=RailTrail$weekday, Z=RailTrail$hightemp), inits =list(.RNG.name="base::Wichmann-Hill", .RNG.seed=100)) #SIMULATE posterior rail_sim <- coda.samples(model=rail_jags, variable.names=c("a", "b", "c", "s"), n.iter=10000) summary(rail_sim) plot(rail_sim) rail_chains <-data.frame(rail_sim[[1]]) #PLOT results ggplot(RailTrail, aes(y = volume, x = hightemp, color = weekday)) + geom_point() + geom_abline(intercept = mean(rail_chains$a), slope = mean(rail_chains$c), color = "red") + geom_abline(intercept = mean(rail_chains$a) + mean(rail_chains$b.2.), slope = mean(rail_chains$c), color = "turquoise3") #------------------------------------------------------------------------------------------------------------------------------; #Example 4: Poisson regression -> can we predict volume by temp and weekday where volume is a count? #VIEW priors: a (weekend intercept), b (contrast b/t weekday/weekend intercepts), c (common slope) & s (residual standard deviation) a3 <- rnorm(n=10000, mean=0, sd=200) b2 <- rnorm(n=10000, mean=0, sd=200) c2 <- rnorm(n=10000, mean=0, sd=20) s2 <- runif(n=10000, min=0, max=200) samples2 <- data.frame('a'=a2, 'b'=b2, 'c'=c2, 's'=s2) head(samples) ggplot(samples2, aes(x=a)) + geom_density(fill="lightblue") ggplot(samples2, aes(x=b)) + geom_density(fill="lightgreen") ggplot(samples2, aes(x=c)) + geom_density(fill="lightgrey") ggplot(samples2, aes(x=s)) + geom_density(fill="lightyellow") #VIEW data ggplot(data=RailTrail, aes(x=hightemp, y=volume))+ geom_point() ggplot(data=RailTrail, aes(x=hightemp, y=volume, color=weekday))+ geom_point() #DEFINE model poisson_model <- "model{ # Likelihood model for Y[i] for(i in 1:length(Y)) { Y[i] ~ dpois(l[i]) log(l[i]) <- a + b * X[i] } # Prior models for a, b a ~ dnorm(0, 200^(-2)) b ~ dnorm(0, 2^(-2)) }" #COMPILE model poisson_jags <- jags.model(textConnection(poisson_model), data = list(Y = RailTrail$volume, X = RailTrail$hightemp), inits = list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 100)) #SIMULATE posterior poisson_sim <- coda.samples(model = poisson_jags, variable.names = c("a", "b"), n.iter = 10000) summary(poisson_sim) plot(poisson_sim) poisson_chains <- data.frame(poisson_sim[[1]]) #PLOT results ggplot(RailTrail, aes(y = volume, x = hightemp)) + geom_point() + stat_function(fun = function(x){exp(mean(poisson_chains$a) + mean(poisson_chains$b) * x)}, color = "red") #add Weekday #DEFINE model poisson_model2 <- "model{ # Likelihood model for Y[i] for(i in 1:length(Y)) { Y[i] ~ dpois(l[i]) log(l[i]) <- a +b[X[i]] + c*Z[i] } # Prior models for a, b, c, s a ~ dnorm(0, 200^(-2)) b[1] <- 0 b[2] ~ dnorm(0, 2^(-2)) c ~ dnorm(0, 2^(-2)) }" #COMPILE model poisson_jags2 <- jags.model(textConnection(poisson_model2), data = list(Y = RailTrail$volume, X = RailTrail$weekday, Z = RailTrail$hightemp), inits = list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 100)) #SIMULATE posterior poisson_sim2 <- coda.samples(model = poisson_jags2, variable.names = c("a", "b", "c"), n.iter = 10000) summary(poisson_sim2) poisson_chains <- data.frame(poisson_sim2[[1]]) #PLOT results ggplot(RailTrail, aes(y = volume, x = hightemp, color = weekday)) + geom_point() + stat_function(fun = function(x){exp(mean(poisson_chains$a) + mean(poisson_chains$c) * x)}, color = "red") + stat_function(fun = function(x){exp(mean(poisson_chains$a) + mean(poisson_chains$b.2.) + mean(poisson_chains$c) * x)}, color = "turquoise3")