#Get Data and Functions library('Stat2Data') #install packages if not already done so data("Backpack") data('AppleStock') data('Day1Survey') library(gmodels) library(dplyr) library(ggplot2) #------------------------------------------------------------------------ #Example 1: Is the mean resting pulse rate for stats students different across sex (F vs. M)? #Test Assumptions #Independence and Random Sampling #depends on sample design and taken to be valid here #Normality hist(Day1Survey$Pulse, col="grey") qqnorm(Day1Survey$Pulse);qqline(Day1Survey$Pulse) hist(Day1Survey$Pulse[Day1Survey$Sex=='M'],col="blue") hist(Day1Survey$Pulse[Day1Survey$Sex=='F'],col="red") #close enough for our purposes here #Equal Variance var(Day1Survey$Pulse[Day1Survey$Sex=='F'])/var(Day1Survey$Pulse[Day1Survey$Sex=='M']) #1.02, so good to go #T-test t.test(Pulse~Sex, data=Day1Survey, alternative="two.sided") #Female_mean=67.82, Male_mean=66.65, t=0.33, p=0.7428) #Graph ggplot(data=Day1Survey, aes(x=Sex, y=Pulse, fill=Sex)) + stat_boxplot(geom ='errorbar') + geom_boxplot() + scale_fill_manual(values=c("Red", "Blue")) #------------------------------------------------------------------------ #Example 2: Is the mean height for stats students different across sex (F vs. M)? #Test Assumptions #Independence and Random Sampling #depends on sample design and taken to be valid here #Normality hist(Day1Survey$Height, col="grey") qqnorm(Day1Survey$Height);qqline(Day1Survey$Height) hist(Day1Survey$Height[Day1Survey$Sex=='M'],col="blue") hist(Day1Survey$Height[Day1Survey$Sex=='F'],col="red") #a little shaky, try log transformation Day1Survey$log_Height <-log(Day1Survey$Height) hist(Day1Survey$log_Height, col="grey") qqnorm(Day1Survey$log_Height);qqline(Day1Survey$log_Height) #looks better, go with that #Equal Variance var(Day1Survey$log_Height[Day1Survey$Sex=='F'])/var(Day1Survey$log_Height[Day1Survey$Sex=='M']) #2.41, so still good to go #T-test t.test(log_Height~Sex, data=Day1Survey, alternative="two.sided") #Female_log_mean=4.17, Male_log_mean=4.27, t=-7.23, p<0.0001) #Graph Survey_sum<-Day1Survey %>% group_by(Sex) %>% summarise(mean=mean(log_Height), sd = sd(log_Height), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) ggplot(data=Survey_sum, aes(x=Sex, y=mean, fill=Sex)) + geom_bar(stat="identity") + geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1) + scale_fill_manual(values=c("red","blue")) #------------------------------------------------------------------------ #Example 3: Is the mean backpack weight for college students for back problem status (No vs. Yes)? #Test Assumptions #Independence and Random Sampling #depends on sample design and taken to be valid here #Normality hist(Backpack$BackpackWeight, col="grey") qqnorm(Backpack$BackpackWeight);qqline(Backpack$BackpackWeight) hist(Backpack$BackpackWeight[Backpack$BackProblems==0],col="green") hist(Backpack$BackpackWeight[Backpack$BackProblems==1],col="red") #good to go #Equal Variance var(Backpack$BackpackWeight[Backpack$BackProblems==0])/var(Backpack$BackpackWeight[Backpack$BackProblems==1]) #1.15, so good to go #T-test t.test(BackpackWeight~BackProblems, data=Backpack, alternative="two.sided") #No_problems_mean=11.19, Back_problems_mean=12.66, t=-1.22, p=0.2273 #Graph Backpack$BackProblems <-as.factor(Backpack$BackProblems) ggplot(data=Backpack, aes(x=BackProblems, y=BackpackWeight, fill=BackProblems)) + stat_boxplot(geom ='errorbar') + geom_boxplot() + scale_fill_manual(values=c("Green", "Red"))