library(lme4) library(MASS) library(ggplot2) #Example 1: Can the number of of yarn warpbreaks be predicted by loom tension, # while controlling for wool type? #Visualize Data head(warpbreaks) hist(warpbreaks$breaks, col='purple') #count data (Poisson distribution) plot(breaks~tension, data=warpbreaks) #fixed effect plot(breaks~wool, data=warpbreaks) #random effect #Test Assumptions #Independence #depends on sample design and taken to be valid here #Distribution Fit #wait until model building #Model Building glmm1.0 <-lm(breaks~tension + 0, data=warpbreaks) summary(glmm1.0) #linear model glmm1.0$coefficients confint(glmm1.0) glmm1.1 <-glmer(breaks~factor(tension) + 0 + (1|wool), data=warpbreaks, family=gaussian()) summary(glmm1.1) #linear mixed model fixef(glmm1.1) confint(glmm1.1) glmm1.2 <-glm(breaks~tension + 0, data=warpbreaks, family=poisson()) summary(glmm1.2) #generalized linear model exp(glmm1.2$coefficients) exp(confint(glmm1.2)) glmm1.3 <-glmer(breaks~tension + 0 + (1|wool), data=warpbreaks, family=poisson()) summary(glmm1.3) #generalized linear mixed model exp(fixef(glmm1.3)) exp(confint(glmm1.3)) #Model Fit #Pearson Chi-Square/DF (should be close to 1.0) sum(residuals(glmm1.0, type="pearson")^2)/glmm1.0$df.residual #141.1481 sum(residuals(glmm1.1, type="pearson")^2)/51 #133.104 sum(residuals(glmm1.2, type="pearson")^2)/51 #4.60 #close sum(residuals(glmm1.3, type="pearson")^2)/51 #4.19 #closer, keep top two #Residuals par(mfrow=c(2,2)) plot(glmm1.2) plot(glmm1.3) par(mfrow=c(1,1)) #Model Comparison AIC(glmm1.2, glmm1.3) #1.3 glmm was best fit #Plot cat_1 <-c("L","M","H") mean_1 <-exp(fixef(glmm1.3)) ci_1 <-exp(confint(glmm1.3)) ll_1 <-ci_1[2:4,1]; ul_1 <-ci_1[2:4,2] warpbreaks_sum<-data.frame(Tension=cat_1, mean=mean_1, ll=ll_1, ul=ul_1) warpbreaks_sum$Tension <-factor(warpbreaks_sum$Tension, levels=c("L","M","H")) g1 <-ggplot(data=warpbreaks_sum, aes(x=Tension, y=mean, fill=Tension))+ geom_bar(stat="identity", color="black") + geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1)+ labs(y="Mean breaks") + scale_y_continuous(limits=c(0,55),breaks=c(0,10,20,30,40,50))+ geom_segment(aes(x=1, xend=3, y=50, yend=50)) + geom_text(aes(label="*"), x=2, y=52, size=7) g1 ################################################################################ #Example 2: Can the concentration of Indomethacin be predicted by time, # while controlling for subject? #Visualize Data data("Indometh") head(Indometh) hist(Indometh$conc, col='blue') #Gamma distribution plot(conc~time, data=Indometh) #fixed effect plot(conc~Subject, data=Indometh) #random effect #Test Assumptions #Independence #depends on sample design and taken to be valid here #Distribution Fit #wait until model building #Model Building glmm2.0 <-lm(conc~time, data=Indometh) summary(glmm2.0) #linear model glmm2.1 <-lmer(conc~time + (1|Subject), data=Indometh) summary(glmm2.1) #linear mixed model glmm2.2 <-glm(conc~time, data=Indometh, family=Gamma()) summary(glmm2.2) #generalized linear model glmm2.3 <-glmer(conc~time + (1|Subject), data=Indometh, family=Gamma()) summary(glmm2.3) #generalized linear mixed model #Model Fit #Pearson Chi-Square/DF (should be close to 1.0) sum(residuals(glmm2.0, type="pearson")^2)/glmm2.0$df.residual #0.201: seems better sum(residuals(glmm2.1, type="pearson")^2)/64 #0.201 sum(residuals(glmm2.2, type="pearson")^2)/64 #0.089 sum(residuals(glmm2.3, type="pearson")^2)/64 #0.077 #Residuals par(mfrow=c(2,2)) plot(glmm2.0) #terrible fit plot(glmm2.1) #terrible fit plot(glmm2.2) #okay fit plot(glmm2.3) #okay fit par(mfrow=c(1,1)) #Model Comparison AIC(glmm2.0); AIC(glmm2.1) #2.0 better AIC(glmm2.2); AIC(glmm2.3) #2.3 better, but not significantly fitted2 <- predict(glmm2.2, type="response") g2<- ggplot(glmm2.2$model) + geom_point(aes(conc, time)) + geom_line(aes(time, fitted2, color="red")) + labs(x="time", y="Indometh concentration", color="Legend") g2 ################################################################################ #Example 3: Can the number of grouse ticks be predicted by year, while controlling for location? #Visualize Data data('grouseticks') head(grouseticks) summary(grouseticks) hist(grouseticks$TICKS, col='brown') #Count data (Negative binomial) plot(TICKS~YEAR, data=grouseticks) #fixed effect plot(TICKS~LOCATION, data=grouseticks) #random effect #Test Assumptions #Independence #depends on sample design and taken to be valid here #Distribution Fit #wait until model building #Model Building glmm3.0 <-lm(TICKS~YEAR + 0, data=grouseticks) summary(glmm3.0) #linear model glmm3.0$coefficients confint(glmm3.0) glmm3.1 <-lmer(TICKS~YEAR + 0 + (1|LOCATION), data=grouseticks) summary(glmm3.1) #linear mixed model fixef(glmm3.1) confint(glmm3.1) glmm3.2 <-glm.nb(TICKS~YEAR + 0, data=grouseticks) summary(glmm3.2) #generalized linear model exp(glmm3.2$coefficients) exp(confint(glmm3.2)) glmm3.3 <-glmer.nb(TICKS~YEAR + 0 + (1|LOCATION), data=grouseticks) summary(glmm3.3) exp(fixef(glmm3.3)) exp(confint(glmm3.3)) #Model Fit #Pearson Chi-Square/DF (should be close to 1.0) sum(residuals(glmm3.0, type="pearson")^2)/glmm3.0$df.residual #155.9 sum(residuals(glmm3.1, type="pearson")^2)/400 #54.3 sum(residuals(glmm3.2, type="pearson")^2)/400 #1.4 <- a little over-dispersed sum(residuals(glmm3.3, type="pearson")^2)/400 #0.78 <-a little under-dispersed #Residuals par(mfrow=c(2,2)) plot(glmm3.2) #terrible fit plot(glmm3.3) #okay fit par(mfrow=c(1,1)) #Model Comparison AIC(glmm3.2); AIC(glmm3.3) #3.3 best fit #Plot fitted3 <- predict(glmm3.3, type="response") Year <-c("95","96","97") mean_3 <-exp(fixef(glmm3.3)) ci_3 <-exp(confint(glmm3.3)) ll_3 <-ci_3[2:4,1]; ul_3 <-ci_3[2:4,2] ticks_sum<-data.frame(Year=Year, mean=mean_3, ll=ll_3, ul=ul_3) g3 <-ggplot(data=ticks_sum, aes(x=Year, y=mean, fill=Year))+ geom_bar(stat="identity", color="black") + geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1)+ labs(y="Mean ticks") + scale_y_continuous(limits=c(0,8.5),breaks=c(0,1,2,3,4,5,6,7,8))+ geom_text(aes(label="*"), x=c(1,2,3), y=c(2.6, 8.0, 1.1), size=7) g3