#Beautiful Demos Five Example #load libraries library(MASS) library(gmodels) library(dplyr) library(ggplot2) #mtcars dataset head(mtcars) #1) Choose right variables head(mtcars$mpg) #Y-variable mtcars[0:6,3:7] #X-variables #other variables not used #2) Assumptions for Y mtcars$mpg #cont. numerical variable hist(mtcars$mpg) #close enough to normal distribution #independent sample assumed (based on design) #3) Assumptions for Xs cor(mtcars[,3:7]) #disp highly correlated with hip, drat, and wt (ensure disp not in final model with other three) lm_f <- lm(mpg~disp+hp+drat+wt+qsec, data=mtcars) #full model plot(lm_f) #residuals look good enough (no heteroscedacity or non-linearity) #4) Model selection msel <- step(lm_f, direction="both") #selection retained drat, wt, and qsec (disp not included so multicollinearity issue avoided) lm_r <-lm(mpg~drat+wt+qsec, data=mtcars) #reduced model plot(lm_r) #residuals still look good enough #5) Interpreting results AIC(lm_f, lm_r) #reduced model has marginally better AIC score summary(lm_r) #reduced model significant #wt and qsec significantly predicted mpg #As weight decreased, mpg significantly increased (B=-4.4, t=-6.5, p<0.0001). #As quarter mile time increased, mpg significantly increased (B=0.95, t=3.6, p=.0012). #6) Presenting results plot1 <-ggplot(data=mtcars, aes(x=qsec, y=mpg, size=wt))+ geom_point(alpha=0.75, shape=19) + theme_classic() plot1