#Beautiful Demos Five Example
#load libraries
library(MASS)
library(gmodels)
library(dplyr)
library(ggplot2)
#mtcars dataset
head(mtcars)
#1) Choose right variables
head(mtcars$mpg) #Y-variable
mtcars[0:6,3:7] #X-variables
#other variables not used
#2) Assumptions for Y
mtcars$mpg #cont. numerical variable
hist(mtcars$mpg) #close enough to normal distribution
#independent sample assumed (based on design)
#3) Assumptions for Xs
cor(mtcars[,3:7]) #disp highly correlated with hip, drat, and wt (ensure disp not in final model with other three)
lm_f <- lm(mpg~disp+hp+drat+wt+qsec, data=mtcars) #full model
plot(lm_f) #residuals look good enough (no heteroscedacity or non-linearity)
#4) Model selection
msel <- step(lm_f, direction="both")
#selection retained drat, wt, and qsec (disp not included so multicollinearity issue avoided)
lm_r <-lm(mpg~drat+wt+qsec, data=mtcars) #reduced model
plot(lm_r) #residuals still look good enough
#5) Interpreting results
AIC(lm_f, lm_r) #reduced model has marginally better AIC score
summary(lm_r) #reduced model significant
#wt and qsec significantly predicted mpg
#As weight decreased, mpg significantly increased (B=-4.4, t=-6.5, p<0.0001).
#As quarter mile time increased, mpg significantly increased (B=0.95, t=3.6, p=.0012).
#6) Presenting results
plot1 <-ggplot(data=mtcars, aes(x=qsec, y=mpg, size=wt))+
geom_point(alpha=0.75, shape=19) +
theme_classic()
plot1