#Get Data and Functions
library(ggplot2) 
library(tidyverse)
library(datarium)
library(PairedData)

#------------------------------------------------------------------------
#Example 1: Is the the mean difference extra sleep for students different between two drugs?
#Test Assumptions
#Random Sampling
#depends on sample design and taken to be valid here

#Normality
hist(sleep$extra, col="grey")
#close enough for our purposes here

#No extreme values
boxplot(sleep$extra)
#no extreme values apparent

#Paired T-test
t.test(extra~group, data=sleep, alternative="two.sided", paired=TRUE) 
#mean_diff=-1.58, t=-4.0621, p=0.002833)

#Graphs
ggplot(data=sleep, aes(x=group, y=extra, group=ID, colour=ID)) +
  geom_line(size=1) + theme(legend.position="none") +
  labs(y="Extra sleep (hours)", x="Group") +
  scale_y_continuous(limits=c(-2,6),breaks=c(-2,-1,0,1,2,3,4,5,6)) +
  scale_x_discrete(limits=c(1,2),labels=c("drug 1","drug 2"), expand=c(0.1,0.1))+
  geom_point() + geom_hline(intercept) +
#------------------------------------------------------------------------

#Example 2: Is the the mean difference in weight for mice different before and after a treatment?
  #(https://www.datanovia.com/en/lessons/how-to-do-a-t-test-in-r-calculation-and-reporting/how-to-do-paired-t-test-in-r/) 

#Test Assumptions
#Random Sampling
#depends on sample design and taken to be valid here

#Normality
mice2$diff <- mice2$after-mice2$before
hist(mice2$diff, col="grey")
#not normally distributed, but small sample size, so will go with it as an example

#No extreme values
boxplot(mice2$diff)
#no extreme values apparent

#Paired T-test
t.test(mice2$before,mice2$after, alternative="two.sided", paired=TRUE)
#mean_diff=-199.48, t=-25.546, p<0.0001)

#Graph
mice2.long <- mice2 %>%
  gather(key = "group", value = "weight", before, after)
mice2.long$group2 <-c(rep(0,10),rep(1,10))
head(mice2.long, 3)

ggplot(data=mice2.long, aes(x=group2, y=weight, group=id, colour=id)) +
  geom_line(size=1) + theme(legend.position="none") +
  labs(y="Weight", x="Treatment") +
  scale_x_discrete(limits=c(0,1),labels=c("Before","After"), expand=c(0.1,0.1))
#------------------------------------------------------------------------


#Example 3: Is the mean blood level of lead different between pairs of control and exposed children?
data(BloodLead)

#Test Assumptions
#Random Sampling
#depends on sample design and taken to be valid here

#Normality
BloodLead$diff <- BloodLead$Exposed - BloodLead$Control
hist(BloodLead$diff, col="grey")
#normally distributed

#No extreme values
boxplot(BloodLead$diff)
#one extreme variable at upper end, but will go with it for example

#Paired T-test
t.test(BloodLead$Exposed, BloodLead$Control, alternative="two.sided", paired=TRUE)
#mean_diff=15.9697, t=5.783, p<0.0001)

#Graph
BloodLead.long <- BloodLead %>%
  gather(key = "group", value= "lead", Exposed, Control)

ggplot(data=BloodLead.long, aes(x=group, y=lead, group=Pair, colour=Pair)) +
  geom_line(size=1) + theme(legend.position="none")