library(Stat2Data) library(ggplot2) library(MASS) library(agricolae) library(dplyr) #------------------------------------------------------------------------ #Example 1: Are car noise levels different across filter type and vehicle side? #Get Data data("AutoPollution") head(AutoPollution) AutoPollution$Type <- as.factor(AutoPollution$Type) AutoPollution$Side <- as.factor(AutoPollution$Side) #Create combined variable (for use in assumption testing) AutoPollution$Type_Side <-paste(AutoPollution$Type, AutoPollution$Side,sep=":") #Test Assumptions #Independence #depends on sample design and taken to be valid here #Normality ggplot(data=AutoPollution, aes(x=Noise, fill=Type_Side))+ geom_histogram(color=1, alpha=0.75, position="identity", binwidth = 10) #not normally distributed #Equal Variance boxplot(Noise~Type+Side, data=AutoPollution, col="grey") round(tapply(AutoPollution$Noise, AutoPollution$Type_Side, var),2) 1623.61/527.78 #3.08, okay #ANOVA #did not meet normality assumption, but Kruskal Wallis only works on 1-way #use the combined variable as if it is AOV1 <-kruskal.test(Noise~Type_Side, data=AutoPollution) AOV1 #Not significant #Graph AutoPollution_sum<-AutoPollution %>% group_by(Type,Side) %>% summarise(mean=mean(Noise), sd = sd(Noise), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) p1 <-ggplot(data=AutoPollution_sum, aes(x=Type, y=mean)) + geom_bar(aes(fill=Side), stat="identity", position=position_dodge()) + geom_errorbar(aes(ymin=ll, ymax=ul, group=Side), width=0.1, position=position_dodge(0.9))+ scale_fill_manual(values=c("grey","blue")) p1 #------------------------------------------------------------------------ #Examples 2: Is blood pressure different across smoking and weight status? boxplot(SystolicBP~Smoke+Overwt, data=Blood1) #Get Data data("Blood1") head(Blood1) Blood1$Smoke <- as.factor(Blood1$Smoke) Blood1$Overwt <- as.factor(Blood1$Overwt) #Create combined variable (for use in assumption testing) Blood1$Smoke_Overwt <-paste(Blood1$Smoke, Blood1$Overwt, sep=":") #Test Assumptions #Independence #depends on sample design and taken to be valid here #Normality ggplot(data=Blood1, aes(x=SystolicBP))+ geom_histogram(fill="red", color="black",position="identity", binwidth = 10) ggplot(data=Blood1) + geom_histogram(aes(x=SystolicBP), bins=5, colour="black", fill="red") + facet_wrap(~Smoke+Overwt) #good enough #Equal Variance boxplot(SystolicBP~Smoke+Overwt, data=Blood1, col="red") round(tapply(Blood1$SystolicBP, Blood1$Smoke_Overwt, var),2) 839.33/533.61 #1.57, fine #ANOVA AOV2 <-aov(SystolicBP~Smoke*Overwt, data=Blood1) summary(AOV2) #both variable significant, but not interaction posthoc2.1 <-TukeyHSD(AOV2, "Smoke") posthoc2.1 plot(posthoc2.1) #smoking (1) had greater BP than non-smoking (0) posthoc2.2 <-TukeyHSD(AOV2, "Overwt") posthoc2.2 plot(posthoc2.2) #Weight 2 and 1 were different from weight 0, but not e/o Blood1_sum1<-Blood1 %>% group_by(Smoke) %>% summarise(mean=mean(SystolicBP), sd = sd(SystolicBP), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) p2.1 <-ggplot(data=Blood1_sum1, aes(x=Smoke, y=mean, fill=Smoke)) + geom_bar(stat="identity", colour="black") + geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1)+ labs(y="Mean Systolic Blood Pressure", x="Smoking Status")+ scale_fill_manual(values=c("white","grey")) p2.1 Blood1_sum2<-Blood1 %>% group_by(Overwt) %>% summarise(mean=mean(SystolicBP), sd = sd(SystolicBP), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) p2.2 <-ggplot(data=Blood1_sum2, aes(x=Overwt, y=mean, fill=Overwt)) + geom_bar(stat="identity", colour="black") + geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1)+ labs(y="Mean Systolic Blood Pressure", x="Weight Status")+ scale_fill_manual(values=c("white","yellow","red"))+ geom_text(aes(label=c("A","B","B"), group=Overwt), vjust=-0.85, size=6, position=position_dodge(0.9)) p2.2 #------------------------------------------------------------------------ #Example 3: Is tooth growth in guinea pigs different across Vitamin C supplement and dose? #Get data head(ToothGrowth) ToothGrowth$dose <-as.factor(ToothGrowth$dose) #Test Assumptions #Independence #depends on sample design and taken to be valid here #Normality hist(ToothGrowth$len, col="orange") #good enough ggplot(data=ToothGrowth) + geom_histogram(aes(x=len), bins=5, colour="black", fill="orange") + facet_wrap(~dose+supp) #good enough #Equal Variance boxplot(len~dose+supp,data=ToothGrowth, col='orange') ToothGrowth_var<-ToothGrowth %>% group_by(dose,supp) %>% summarise(var=var(len)) ToothGrowth_var 23.0/6.33 #3.63, good enough #ANOVA AOV3 <-aov(len~dose*supp, data=ToothGrowth) summary(AOV3) #both variables significant, so is interaction posthoc3 <-TukeyHSD(AOV3, "dose:supp") posthoc3 plot(posthoc3) dose_supp <-with(ToothGrowth, interaction(dose, supp)) AOV3.alt <-aov(len~dose_supp, data=ToothGrowth) HSD.test(AOV3.alt, "dose_supp", console=T) #need to switch over letters to organize #A <- OJ_0.5, VC_1.0 #B <- OJ_1.0, OJ_2.0, VC_2.0 #C <- VC_05 #Graph ToothGrowth_sum<-ToothGrowth %>% group_by(dose,supp) %>% summarise(mean=mean(len), sd = sd(len), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) p3 <-ggplot(data=ToothGrowth_sum, aes(x=dose, y=mean)) + geom_bar(aes(fill=supp), stat="identity", color="black", position=position_dodge()) + geom_errorbar(aes(ymin=ll, ymax=ul, group=supp), width=0.1, position=position_dodge(0.9)) + scale_fill_manual(values=c("khaki1", "khaki3", "khaki4","khaki1", "khaki3", "khaki4")) + #theme(legend.position="none")+ labs(y="Mean tooth length", x="Dose")+ scale_y_continuous(limits=c(0,35)) + geom_text(aes(label=c("A","B","C","A","C","C"), group=supp), vjust=-3, size=6, position=position_dodge(0.9)) p3