library(lme4)
library(MASS)
library(ggplot2)


#Example 1: Can the number of of yarn warpbreaks be predicted by loom tension, 
#           while controlling for wool type?

#Visualize Data
head(warpbreaks)
hist(warpbreaks$breaks, col='purple') #count data (Poisson distribution)
plot(breaks~tension, data=warpbreaks) #fixed effect
plot(breaks~wool, data=warpbreaks) #random effect
  
#Test Assumptions

#Independence
#depends on sample design and taken to be valid here

#Distribution Fit
#wait until model building

#Model Building
glmm1.0 <-lm(breaks~tension + 0, data=warpbreaks) 
summary(glmm1.0) #linear model
glmm1.0$coefficients
confint(glmm1.0)

glmm1.1 <-glmer(breaks~factor(tension) + 0 + (1|wool), data=warpbreaks, family=gaussian())
summary(glmm1.1) #linear mixed model
fixef(glmm1.1)
confint(glmm1.1)

glmm1.2 <-glm(breaks~tension + 0, data=warpbreaks, family=poisson())
summary(glmm1.2) #generalized linear model
exp(glmm1.2$coefficients)
exp(confint(glmm1.2))

glmm1.3 <-glmer(breaks~tension + 0 + (1|wool), data=warpbreaks, family=poisson())
summary(glmm1.3) #generalized linear mixed model
exp(fixef(glmm1.3))
exp(confint(glmm1.3))

#Model Fit
  #Pearson Chi-Square/DF (should be close to 1.0)
sum(residuals(glmm1.0, type="pearson")^2)/glmm1.0$df.residual #141.1481
sum(residuals(glmm1.1, type="pearson")^2)/51 #133.104
sum(residuals(glmm1.2, type="pearson")^2)/51 #4.60 #close
sum(residuals(glmm1.3, type="pearson")^2)/51 #4.19 #closer, keep top two

#Residuals
par(mfrow=c(2,2))
plot(glmm1.2)
plot(glmm1.3)
par(mfrow=c(1,1))

#Model Comparison
AIC(glmm1.2, glmm1.3) #1.3 glmm was best fit

#Plot
cat_1 <-c("L","M","H")
mean_1 <-exp(fixef(glmm1.3))
ci_1 <-exp(confint(glmm1.3))
ll_1 <-ci_1[2:4,1]; ul_1 <-ci_1[2:4,2]
warpbreaks_sum<-data.frame(Tension=cat_1, mean=mean_1, ll=ll_1, ul=ul_1)
warpbreaks_sum$Tension <-factor(warpbreaks_sum$Tension, levels=c("L","M","H"))

g1 <-ggplot(data=warpbreaks_sum, aes(x=Tension, y=mean, fill=Tension))+
  geom_bar(stat="identity", color="black") +
  geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1)+
  labs(y="Mean breaks") +
  scale_y_continuous(limits=c(0,55),breaks=c(0,10,20,30,40,50))+
  geom_segment(aes(x=1, xend=3, y=50, yend=50)) +
  geom_text(aes(label="*"), x=2, y=52, size=7)
g1
################################################################################


#Example 2: Can the concentration of Indomethacin be predicted by time, 
#           while controlling for subject?


#Visualize Data
data("Indometh") 
head(Indometh)
hist(Indometh$conc, col='blue') #Gamma distribution
plot(conc~time, data=Indometh) #fixed effect
plot(conc~Subject, data=Indometh) #random effect

#Test Assumptions

#Independence
#depends on sample design and taken to be valid here

#Distribution Fit
#wait until model building

#Model Building
glmm2.0 <-lm(conc~time, data=Indometh) 
summary(glmm2.0) #linear model

glmm2.1 <-lmer(conc~time + (1|Subject), data=Indometh)
summary(glmm2.1) #linear mixed model

glmm2.2 <-glm(conc~time, data=Indometh, family=Gamma())
summary(glmm2.2) #generalized linear model

glmm2.3 <-glmer(conc~time + (1|Subject), data=Indometh, family=Gamma())
summary(glmm2.3) #generalized linear mixed model

#Model Fit
#Pearson Chi-Square/DF (should be close to 1.0)
sum(residuals(glmm2.0, type="pearson")^2)/glmm2.0$df.residual #0.201: seems better
sum(residuals(glmm2.1, type="pearson")^2)/64 #0.201
sum(residuals(glmm2.2, type="pearson")^2)/64 #0.089
sum(residuals(glmm2.3, type="pearson")^2)/64 #0.077

#Residuals
par(mfrow=c(2,2))
plot(glmm2.0) #terrible fit
plot(glmm2.1) #terrible fit
plot(glmm2.2) #okay fit
plot(glmm2.3) #okay fit
par(mfrow=c(1,1))

#Model Comparison
AIC(glmm2.0); AIC(glmm2.1) #2.0 better
AIC(glmm2.2); AIC(glmm2.3) #2.3 better, but not significantly

fitted2 <- predict(glmm2.2, type="response")

g2<- ggplot(glmm2.2$model) + geom_point(aes(conc, time)) + 
  geom_line(aes(time, fitted2, color="red")) +
  labs(x="time", y="Indometh concentration", color="Legend")
g2
################################################################################


#Example 3: Can the number of grouse ticks be predicted by year, while controlling for location?

#Visualize Data
data('grouseticks')
head(grouseticks)
summary(grouseticks)
hist(grouseticks$TICKS, col='brown') #Count data (Negative binomial)
plot(TICKS~YEAR, data=grouseticks) #fixed effect
plot(TICKS~LOCATION, data=grouseticks) #random effect

#Test Assumptions

#Independence
#depends on sample design and taken to be valid here

#Distribution Fit
#wait until model building

#Model Building
glmm3.0 <-lm(TICKS~YEAR + 0, data=grouseticks)
summary(glmm3.0) #linear model
glmm3.0$coefficients
confint(glmm3.0)

glmm3.1 <-lmer(TICKS~YEAR + 0 + (1|LOCATION), data=grouseticks)
summary(glmm3.1) #linear mixed model
fixef(glmm3.1)
confint(glmm3.1)

glmm3.2 <-glm.nb(TICKS~YEAR + 0, data=grouseticks)
summary(glmm3.2) #generalized linear model
exp(glmm3.2$coefficients)
exp(confint(glmm3.2))

glmm3.3 <-glmer.nb(TICKS~YEAR + 0 + (1|LOCATION), data=grouseticks)
summary(glmm3.3)
exp(fixef(glmm3.3))
exp(confint(glmm3.3))

#Model Fit
#Pearson Chi-Square/DF (should be close to 1.0)
sum(residuals(glmm3.0, type="pearson")^2)/glmm3.0$df.residual #155.9
sum(residuals(glmm3.1, type="pearson")^2)/400 #54.3
sum(residuals(glmm3.2, type="pearson")^2)/400 #1.4 <- a little over-dispersed
sum(residuals(glmm3.3, type="pearson")^2)/400 #0.78 <-a little under-dispersed

#Residuals
par(mfrow=c(2,2))
plot(glmm3.2) #terrible fit
plot(glmm3.3) #okay fit
par(mfrow=c(1,1))

#Model Comparison
AIC(glmm3.2); AIC(glmm3.3) #3.3 best fit

#Plot
fitted3 <- predict(glmm3.3, type="response")

Year <-c("95","96","97")
mean_3 <-exp(fixef(glmm3.3))
ci_3 <-exp(confint(glmm3.3))
ll_3 <-ci_3[2:4,1]; ul_3 <-ci_3[2:4,2]
ticks_sum<-data.frame(Year=Year, mean=mean_3, ll=ll_3, ul=ul_3)

g3 <-ggplot(data=ticks_sum, aes(x=Year, y=mean, fill=Year))+
  geom_bar(stat="identity", color="black") +
  geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1)+
  labs(y="Mean ticks") +
  scale_y_continuous(limits=c(0,8.5),breaks=c(0,1,2,3,4,5,6,7,8))+
  geom_text(aes(label="*"), x=c(1,2,3), y=c(2.6, 8.0, 1.1), size=7)
g3