#R-Code Example for Statistical Rules to Tape to Your Forehead #Setup #Dataset from https://datadryad.org/stash/dataset/doi:10.5061/dryad.d1f0d: setwd("C:/Users/.../Example Datasets") #NEED TO SET YOUR OWN WORKING DIRECTORY shark<-read.csv("Bird_etal_shark_trophic_geography.csv") library(corrplot) library(car) library(ggplot2) library(MASS) library(gmodels) library(dplyr) library(imager) library(naniar) #------------------------------------------------------------------------------------------------------------------; names(shark) head(shark) #1) Annunciation #Are d13C levels different across shark species, or shark habitat variables? #The purpose of this study was to test the hypothesis that d13C levels were different across shark species. #The null hypothesis is that there is no difference. #------------------------------------------------------------------------------------------------------------------; #2) Exploration #overall summary(shark) #continuous variables hist(shark$d13C, col="orange") #measure of ratio of 13C: 12C, in parts per thousand qqnorm(shark$d13C, col="orange");qqline(shark$d13C) hist(shark$TL.cm., col="grey") qqnorm(shark$TL.cm., col="grey");qqline(shark$TL.cm.) hist(shark$C.N, col="yellow") qqnorm(shark$C.N, col="yellow");qqline(shark$C.N) hist(shark$Depth, col="blue") qqnorm(shark$Depth, col="blue");qqline(shark$Depth) #categorical variables summary(shark$Species, maxsum=1000) summary(shark$ProvCode) summary(shark$Habitat) #plots against d13C plot(x=shark$TL.cm., y=shark$d13C, col="grey") plot(x=shark$C.N, y=shark$d13C, col="yellow") plot(x=shark$Depth, y=shark$d13C, col="blue") shark_species<-c("Prionace glauca", "Carcharhinus plumbeus", "Scoliodon laticaudus ", "Squalus acanthias") shark1 <- shark[which(shark$Species==shark_species[1]),] shark2 <- shark[which(shark$Species==shark_species[2]),] shark3 <- shark[which(shark$Species==shark_species[3]),] shark4 <- shark[which(shark$Species==shark_species[4]),] shark_top <-rbind(shark1, shark2, shark3, shark4) shark_top <-droplevels(shark_top) plot(shark_top$d13C~shark_top$Species) shark_Prov <-c("EAFR", "NECS", "CARB", "AUSE", "NWCS", "NASE") sharkA <- shark[which(shark$ProvCode==shark_Prov[1]),] sharkB <- shark[which(shark$ProvCode==shark_Prov[2]),] sharkC <- shark[which(shark$ProvCode==shark_Prov[3]),] sharkD <- shark[which(shark$ProvCode==shark_Prov[4]),] sharkE <- shark[which(shark$ProvCode==shark_Prov[5]),] sharkF <- shark[which(shark$ProvCode==shark_Prov[6]),] shark_top2 <-rbind(sharkA, sharkB, sharkC, sharkD, sharkE, sharkF) shark_top2 <-droplevels(shark_top2) plot(shark_top2$d13C~shark_top2$ProvCode) plot(shark$d13C~shark$Habitat) #Correlations and other interactions shark_vars <- c("d13C", "TL.cm.", "C.N", "Depth") shark_cont <-na.omit(shark[shark_vars]) shark_cor <- cor(shark_cont) corrplot(shark_cor, method="number") #Design and dependencies summary(shark_top) ggplot(data=shark_top, aes(y=d13C, x=Species, fill=Habitat)) + geom_boxplot() ggplot(data=shark_top, aes(y=d13C, x=Species, fill=ProvCode)) + geom_boxplot() #------------------------------------------------------------------------------------------------------------------; #3) Assumptions #Y-variable #normality shapiro.test(shark_top$d13C) hist(shark_top$d13C, col="orange") qqnorm(shark_top$d13C, col="orange");qqline(shark_top$d13C) #X-variables #linearity + homoscedascity plot(x=shark_top$C.N, y=shark_top$d13C, col="yellow") shark_top$log.C.N <-log(shark_top$C.N) plot(x=shark_top$log.C.N, y=shark_top$d13C, col="goldenrod") plot(x=shark_top$TL.cm., y=shark_top$d13C, col="grey") plot(x=shark_top$Depth, y=shark_top$d13C, col="blue") #homogeneity of variance plot(shark_top$d13C~shark_top$Species) bartlett.test(d13C~Species, data = shark_top) #------------------------------------------------------------------------------------------------------------------; #4) Building shark_top_na <-na.omit(shark_top) #Model Type: try a linear model #Model equation; # d13C = B0 + Species + B1(Depth) + B2(TL.cm.) + B3(C.N) #Model Selection lmMod <- lm(d13C ~ Species + Depth + TL.cm. + C.N , data = shark_top_na) step(lmMod, direction = "both") #took out Depth #------------------------------------------------------------------------------------------------------------------; #5) Testing #------------------------------------------------------------------------------------------------------------------; #simplest model levels(shark_top$Species) lm1 <- lm(d13C ~ Species, data = shark_top) lm1 summary(lm1) #fuller model lm2 <- lm(d13C ~ Species + TL.cm. + C.N , data = shark_top) lm2 summary(lm2) View(shark_top[,1:7]) #issues with missing data vis_miss(shark_top[,1:7]) gg_miss_case(shark_top[,1:7], facet=Species) #final model with just Species summary(lm1) av1 <-aov(lm1) summary(av1) tukey.test<-TukeyHSD(av1) tukey.test plot(tukey.test) #6) Justifying plot(lm1) #fit statistic or AIC compared to others #------------------------------------------------------------------------------------------------------------------; #7) Presentation #Function for creating summary stats barchart.anova <- function(df, cat_var, num_var) { cat_var <- enquo(cat_var) num_var <- enquo(num_var) df_sum<<-df %>% group_by(!!cat_var) %>% summarise(mean=mean(!!num_var), sd = sd(!!num_var), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) } #Function call barchart.anova(shark_top, Species, d13C) print(df_sum) summary(lm1) # Mean d13C values were significantly different across all four species of sharks (F(3,976)=138.2, p<0.0001). # S. acanthias had the lowest mean value (-18.9), followed by C. plumbeus (-18.2), # P. glauca (-17.8), and S. laticaudus (-16.5). #table -not needed #plot df_plot <-ggplot(df_sum, aes(Species, mean))+ geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1) + geom_point(size=3) + theme_bw() + labs(y="mean d13C values", x="shark species") df_plot box() #images sharkpic1 <-load.image("C:/Users/Mark.Williamson.2/Desktop/Williamson Images/Modules/Statistical Rules/640px-Carcharhinus_plumbeus_georgia sandbar shark.jpg") sharkpic2 <-load.image("C:/Users/Mark.Williamson.2/Desktop/Williamson Images/Modules/Statistical Rules/640px-Scoliodon_laticaudus12 spadenose shark.jpg") sharkpic3 <-load.image("C:/Users/Mark.Williamson.2/Desktop/Williamson Images/Modules/Statistical Rules/640px-Squalus_acanthias_stellwagen spiny dogfish.jpg") sharkpic4 <-load.image("C:/Users/Mark.Williamson.2/Desktop/Williamson Images/Modules/Statistical Rules/Prionace glaucha blue shark Wiki Commons.jpg") par(mfrow=c(2,2)) plot(sharkpic1, axes=F, main="Carcharhinus plumbeus (sandbar shark)"); box() plot(sharkpic2, axes=F, main="Scoliodon laticaudus (spadenose shark)"); box() plot(sharkpic3, axes=F, main="Squalus acanthias (spiny dogfish)"); box() plot(sharkpic4, axes=F, main="Prionace glaucha (blue shark)"); box() par(mfrow=c(1,1)) #------------------------------------------------------------------------------------------------------------------; #8) Interpretation #------------------------------------------------------------------------------------------------------------------; #Our hypothesis was correct, d13C levels were significantly different across 4 of the most common species of sharks # from the Bird et al. shark dataset. Given the missing data and contingent nature of some of the variables, we were # not able to use other variables such as length, depth, habitat, or geographical area in the final analysis.