library(Stat2Data) library(ggplot2) library(lme4) library(betareg) library(dplyr) #------------------------------------------------------------------------ #Example 1: Can log body size in moths predict the number of eggs laid? #Poisson #Get data data('MothEggs') #Visualize Relationship plot(Eggs~BodyMass, data=MothEggs) #Test Assumptions #Independence #depends on sample design and taken to be valid here #Distribution Fit #wait until model building #Model Building glm1_norm <- glm(Eggs~BodyMass, data=MothEggs, family="gaussian") #normal distribution glm1_norm summary(glm1_norm) glm1_pois <- glm(Eggs~BodyMass, data=MothEggs, family="poisson") #poisson distribution glm1_pois summary(glm1_pois) #Model Fit #Pearson Chi-Square/DF (should be close to 1.0) sum(residuals(glm1_norm, type="pearson")^2)/glm1_norm$df.residual #2002.9 sum(residuals(glm1_pois, type="pearson")^2)/glm1_pois$df.residual #11.9 #Residuals par(mfrow=c(2,2)) plot(glm1_norm) plot(glm1_pois) par(mfrow=c(1,1)) #Model Comparison AIC(glm1_norm) AIC(glm1_pois) #lower AIC, better fit statistic #Plots #Ref: https://stackoverflow.com/questions/23725555/add-simulated-poisson-distributions-to-a-ggplot glm1_pois$model$fitted <- predict(glm1_pois, type="response") glm1_pois$model$fitted2 <- predict(glm1_norm, type="response") colors <-c("Poisson"="blue", "Gaussian"="red") ggplot(glm1_pois$model) + geom_point(aes(BodyMass, Eggs)) + geom_line(aes(BodyMass, fitted, color="Poisson")) + geom_line(aes(BodyMass, fitted2, color="Gaussian")) + labs(x="Log Body Mass", y="Number of Eggs", color="Legend")+ scale_color_manual(values=colors) #------------------------------------------------------------------------ #Example 2: Can fertility rate in historical Swiss provinces predict infant mortality? #Get Data data('swiss') head(swiss) swiss$infant.Mortality2 <-swiss$Infant.Mortality/100 #convert to decimal (0.0-1.0) head(swiss) #Visualize Relationship plot(infant.Mortality2~Fertility, data=swiss) #Test Assumptions #Independence #depends on sample design and taken to be valid here #Distribution Fit #wait until model building #Model Building glm2_norm <- glm(infant.Mortality2~Fertility, data=swiss, family="gaussian") #normal distribution glm2_norm summary(glm2_norm) glm2_beta <- betareg(infant.Mortality2~Fertility, data=swiss) #beta distribution glm2_beta summary(glm2_beta) #Model Fit #Pearson Chi-Square/DF (should be close to 1.0) sum(residuals(glm2_norm, type="pearson")^2)/glm2_norm$df.residual #0.0007 sum(residuals(glm2_beta, type="pearson")^2)/glm2_beta$df.residual #1.0058 #Residuals par(mfrow=c(2,2)) plot(glm2_norm) plot(glm2_beta) par(mfrow=c(1,1)) #Model Comparison AIC(glm2_norm) AIC(glm2_beta) #lower AIC (though close), better fit statistic #Plots glm2_beta$model$fitted <- predict(glm2_beta, type="response") glm2_beta$model$fitted2 <- predict(glm2_norm, type="response") colors <-c("Beta"="blue", "Gaussian"="red") ggplot(glm2_beta$model) + geom_point(aes(Fertility, infant.Mortality2)) + geom_line(aes(Fertility, fitted, color="Beta")) + geom_line(aes(Fertility, fitted2, color="Gaussian")) + labs(x="Fertility", y="Infant Mortality (%)", color="Legend")+ scale_color_manual(values=colors) #------------------------------------------------------------------------ #Example 3: Can gall diameter in goldenrod galls predict the presence of fly larva? #Get Data data('Goldenrod') head(Goldenrod) Goldenrod$Fly04.2 <-ifelse(Goldenrod$Fly04=="y",1,0) #change to binary 0/1 head(Goldenrod) #Visualize Relationship plot(Fly04.2~Gdiam04, data=Goldenrod) #Test Assumptions #Independence #depends on sample design and taken to be valid here #Distribution Fit #wait until model building #Model Building glm3_norm <- glm(Fly04.2~Gdiam04, data=Goldenrod, family="gaussian") #normal distribution glm3_norm summary(glm3_norm) glm3_logi <- glm(Fly04.2~Gdiam04, data=Goldenrod, family="binomial") #logistic distribution glm3_logi summary(glm3_logi) #Model Fit #Pearson Chi-Square/DF (should be close to 1.0) sum(residuals(glm3_norm, type="pearson")^2)/glm3_norm$df.residual #0.19 sum(residuals(glm3_logi, type="pearson")^2)/glm3_logi$df.residual #1.02 #Residuals par(mfrow=c(2,2)) plot(glm3_norm) plot(glm3_logi) par(mfrow=c(1,1)) #Model Comparison AIC(glm3_norm) AIC(glm3_logi) #lower AIC (though close), better fit statistic #Plots glm3_logi$model$fitted <- predict(glm3_logi, type="response") glm3_logi$model$fitted2 <- predict(glm3_norm, type="response") colors <-c("Logistic"="blue", "Gaussian"="red") ggplot(glm3_logi$model) + geom_point(aes(Gdiam04, Fly04.2)) + geom_line(aes(Gdiam04, fitted, color="Logistic"), size=1.5) + geom_line(aes(Gdiam04, fitted2, color="Gaussian"), size=1.5) + labs(x="gall diameter", y="fly presence", color="Legend") scale_color_manual(values=colors)