#Making Magnificently Good Graphs in R #Setup: library(MASS) library(gmodels) library(dplyr) library(ggplot2) #------------------Histograms---------------------------------------------------------------------------# #references: #https://www.datacamp.com/community/tutorials/make-histogram-ggplot2 #https://www.r-graph-gallery.com/histogram_several_group.html #https://homepage.divms.uiowa.edu/~luke/classes/STAT4580/histdens.html #Dataset starwars2<-data.frame(starwars$name,starwars$height,starwars$mass,starwars$gender) starwars2<-starwars2[which(starwars2$starwars.gender=='male'|starwars2$starwars.gender=='female'),] starwars2<-na.exclude(starwars2) #Basic histogram h1 <-hist(Nile) h1 #Simple histogram in ggplot h2 <- ggplot(data=starwars2, aes(starwars.height))+ geom_histogram(bins=6, fill="grey", col="black") h2 #Two-sample histogram in ggplot h3 <- ggplot(data=starwars2, aes(x=starwars.height, fill=starwars.gender))+ geom_histogram(bins=6, col="black", alpha=0.6, position='identity')+ scale_fill_manual(values=c("pink", "blue")) h3 #************************** #Basic histogram -> element modification h4 <- hist(Nile, col="blue", border="black", xlab="Nile River flow rate", xlim=c(0,1500), breaks=6, main='') h4 rug(Nile) box() #Simple histogram in ggplot -> element modification x <- seq(50, 300, length.out=250) df <- with(starwars2, data.frame(x = x, y = dnorm(x, mean(starwars.height), sd(starwars.height)))) h5 <- ggplot(data=starwars2, aes(x=starwars.height))+ geom_histogram(aes(y=stat(density)), bins=6, fill="grey", col="black")+ labs(x="Character Height") + scale_x_continuous(breaks=c(50, 100, 150, 200, 250, 300))+ geom_line(data=df, aes(x=x, y=y), color="red") h5 #Two-sample histogram in ggplot -> element modification h6 <- ggplot(data=starwars2, aes(x=starwars.height, fill=starwars.gender))+ geom_histogram(bins=6, col="black", alpha=0.6, position='identity')+ scale_fill_manual(values=c("pink", "blue"), name="Gender", labels=c("Female","Male"))+ labs(x="Character Height", y="Count") + scale_x_continuous(breaks=c(50, 100, 150, 200, 250, 300))+ theme(legend.background=element_rect(colour="black"),legend.position ="top") h6 #### Exploration #### #try creating a two-sample histogram by gender for the variable 'starwars.mass' #1) give the two histograms different colors #2) make sure the x-axis label is accurate #3) play around with more or fewer bins #------------------Box Plots---------------------------------------------------------------------------# #references: #http://www.sthda.com/english/wiki/ggplot2-box-plot-quick-start-guide-r-software-and-data-visualization #http://www.sthda.com/english/wiki/ggplot2-error-bars-quick-start-guide-r-software-and-data-visualization #http://sape.inf.usi.ch/quick-reference/ggplot2/shape #http://www.sthda.com/english/wiki/ggplot2-dot-plot-quick-start-guide-r-software-and-data-visualization #Basic boxplot bx1 <-boxplot(data=cabbages, HeadWt~Cult) bx1 #Simple boxplot in ggplot bx2 <-ggplot(data=chickwts, aes(x=feed, y=weight)) + geom_boxplot() bx2 #Two-way boxplot in ggplot bx3 <-ggplot(data=cabbages, aes(y=VitC, x=Date, fill=Cult)) + geom_boxplot() bx3 #************************** #Basic boxplot -> element modification bx4 <-boxplot(data=cabbages, HeadWt~Cult, xlab="Cultivar of Cabbage", col=c("green","purple"), ylab="Cabbage Head Weight", ylim=c(0,5)) bx4 #Simple boxplot in ggplot -> element modification bx5 <-ggplot(data=chickwts, aes(x=feed, y=weight, fill=feed)) + stat_boxplot(geom ='errorbar') + geom_boxplot(outlier.colour="red", outlier.shape=18, outlier.size=3) + scale_fill_manual(values=c("orange", "brown", "green", "grey", "white", "yellow")) + theme(legend.position="none") bx5 #Two-way boxplot in ggplot -> element modification bx6 <-ggplot(data=cabbages, aes(y=VitC, x=Date, fill=Cult)) + geom_boxplot() + stat_boxplot(geom ='errorbar') + scale_fill_manual(values=c("green", "purple"), name="Cultivar", labels=c("c39","c52"))+ labs(x="Date", y="Ascorbic acid content") + scale_x_discrete(labels=c("Day 16", "Day 20", "Day 21")) + geom_dotplot(binaxis="y", stackdir="center", dotsize=0.60,position=position_dodge(0.75)) bx6 #------------------Bar Plots---------------------------------------------------------------------------# #references: #https://www.r-graph-gallery.com/4-barplot-with-error-bar.html #https://www.statmethods.net/advgraphs/axes.html #Basic bar plot V <-mtcars[mtcars$vs==0,]; S <-mtcars[mtcars$vs==1,] V.mean <-mean(V$qsec); S.mean <-mean(S$qsec) ll<-c(ci(V$qsec)[2], ci(S$qsec)[2]) ul<-c(ci(V$qsec)[3], ci(S$qsec)[3]) x<-c(1,2); y<-c(V.mean,S.mean) x<-barplot(y) br1<-barplot(y, xlab='vs', ylab='qsec', ylim=c(0,22)) br1 + arrows(x,ul,x,ll, length=0.05, angle=90, code=3) +box() #Simple bar plot in ggplot iris_sum<-iris %>% group_by(Species) %>% summarise(mean=mean(Sepal.Length), sd = sd(Sepal.Length), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) br2 <-ggplot(data=iris_sum, aes(x=Species, y=mean, fill=Species)) + geom_bar(stat="identity") + geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1) br2 #Two-way bar plot in ggplot warpbreaks_sum<-warpbreaks %>% group_by(wool,tension) %>% summarise(mean=mean(breaks), sd = sd(breaks), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) br3 <-ggplot(data=warpbreaks_sum, aes(x=wool, y=mean)) + geom_bar(aes(fill=tension), stat="identity", position=position_dodge()) + geom_errorbar(aes(ymin=ll, ymax=ul, group=tension), width=0.1, position=position_dodge(0.9)); br3 #************************** #Basic bar plot -> element modification br4<-barplot(y, col=c("red","orange"), ylim=c(0,22), axes=F) axis(side=1, at=x, labels=c("V-shaped","Straight"), cex.lab=1.5, cex.axis=1.2) axis(2, at=c(0,2.5,5,7.5,10,12.5,15,17.5,20,22.5), las=2, cex.lab=1.5, cex.axis=0.80) mtext(side=1, line=2.5, "Engine type", cex=1.3) mtext(side=2, line=2.5, "1/4 mile time (s)", cex=1.3) br4 + arrows(x,ul,x,ll, length=0.05, angle=90, code=3) +box() #Simple bar plot in ggplot -> element modification br5 <-ggplot(data=iris_sum, aes(x=Species, y=mean, fill=Species)) + geom_bar(stat="identity", color="black") + geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1) + labs(y="Mean sepal length", x="Species") + scale_y_continuous(limits=c(0,9),breaks=c(0,1,2,3,4,5,6,7,8,9)) + scale_fill_manual(values=c("slateblue3", "purple", "plum1")) + geom_segment(aes(x=1,xend=2,y=6.5,yend=6.5)) + geom_segment(aes(x=2,xend=3,y=7.5,yend=7.5)) + geom_segment(aes(x=1,xend=3,y=8.5,yend=8.5)) + geom_text(aes(label="*"), x=c(1.5,2,2.5), y=c(6.75,8.75,7.75), size=7) br5 #Two-way bar plot in ggplot -> element modification br6 <-ggplot(data=warpbreaks_sum, aes(x=wool, y=mean)) + geom_bar(aes(fill=tension), stat="identity", color="black", position=position_dodge()) + geom_errorbar(aes(ymin=ll, ymax=ul, group=tension), width=0.1, position=position_dodge(0.9)) + scale_y_continuous(limits=c(0,70),breaks=c(0,10,20,30,40,50,60,70)) + geom_text(aes(label=c("A","B","B","B","B","B"), group=tension), vjust=-5, size=6, position=position_dodge(0.9))+ theme_classic() + geom_hline(aes(yintercept=0)) br6 #------------------Scatter Plots---------------------------------------------------------------------# #references: #http://www.sthda.com/english/wiki/ggplot2-scatter-plots-quick-start-guide-r-software-and-data-visualization #http://www.sthda.com/english/wiki/r-plot-pch-symbols-the-different-point-shapes-available-in-r #https://stackoverflow.com/questions/6862742/draw-a-circle-with-ggplot2 #Basic scatter plot s1 <-plot(cars$speed~cars$dist) s1 #Simple scatter plot in ggplot Animals$log_brain <-log(Animals$brain) Animals$log_body <-log(Animals$body) s2 <-ggplot(data=Animals, aes(x=log_brain, y=log_body)) + geom_point() s2 #Two-way scatter plot in ggplot s3 <-ggplot(data=crabs, aes(x=CL, y=CW, fill=sex, color=sex)) + geom_point() s3 #************************** #Basic scatter plot -> element modification s4 <-plot(cars$speed~cars$dist, ylim=c(0,25), axes=F, xlab='', ylab='', pch=19, col="black", cex=1.2) axis(side=1, at=c(0,10,20,30,40,50,60,70,80,90,100,110,120)) axis(side=2, at=c(0,5,10,15,20,25)) mtext(side=1, line=2.5, "Speed (mph)", cex=1.3) mtext(side=2, line=2.5, "Stopping distance (ft)", cex=1.3) s4; box(); abline(lm(cars$speed~cars$dist)) #Simple scatter plot in ggplot -> element modification circ = data.frame(x=4.5, y=10.25) s5 <-ggplot(data=Animals, aes(x=log_brain, y=log_body)) + geom_point() + geom_hline(aes(yintercept=0)) + geom_smooth(method=lm, color="red") + theme_classic() + labs(y="Log of body size", x="Log of brain size") + scale_y_continuous(limits=c(-5,15),breaks=c(-5,-2.5,0,2.5,5,7.5,10,12.5,15)) + scale_x_continuous(limits=c(-1,9),breaks=c(-1,0,1,2,3,4,5,6,7,8,9)) + geom_point(aes(x=x, y=y), data=circ, size=40, shape=1, color="red") + geom_text(aes(label="potential outliers"), x=c(4.5), y=c(14), size=4, color="red") s5 #Two-way scatter plot in ggplot -> element modification summary(lm(CW~CL, data=crabs)) s6 <-ggplot(data=crabs, aes(x=CL, y=CW, color=sp, shape=sex)) + geom_point(size=2) + labs(x="Carapace length", y="Carapace width", color="Species", shape="Sex") + scale_color_manual(labels=c("Blue","Orange"), values=c("blue","orange")) + scale_shape_manual(labels=c("Female","Male"), values=c(4,16)) + geom_abline(aes(intercept=1.09, slope=1.1)) + theme_classic() + guides(fill="none") s6 #### Exploration #### #try building your own mock data and creating a graph from it ### template ### x <-c(1,2,3,4,5,6,7,8,9,10,11,12) y <-c(32,45,54,33,35,67,45,78,99,100,77,103) df <-data.frame("month"=x, "rate"=y) s_temp <-ggplot(data=df, aes(x=month, y=rate)) + geom_point() + geom_smooth(method=lm) s_temp ################ #------------------Other Plots---------------------------------------------------------------------# #references: #https://stats.idre.ucla.edu/r/faq/how-can-i-visualize-longitudinal-data-in-ggplot2/ #https://mgimond.github.io/Stats-in-R/Logistic.html #http://www.cookbook-r.com/Statistical_analysis/Logistic_regression/ #Spaghetti plot in ggplot o1 <-ggplot(data=sleep, aes(x=group, y=extra, group=ID)) + geom_line() o1 #Logistic regression plot in ggplot midwest$log_popdensity <-log(midwest$popdensity) o2 <-ggplot(data=midwest, aes(x=log_popdensity, y=inmetro)) + geom_point() + stat_smooth(method="glm", se=FALSE, method.args=list(family=binomial)) o2 #Bubble plot in ggplot o3 <-ggplot(data=Cars93, aes(x=EngineSize, y=RPM, size=Horsepower))+ geom_point() o3 #************************** #Spaghetti plot in ggplot -> element modification o4 <-ggplot(data=sleep, aes(x=group, y=extra, group=ID, colour=ID)) + geom_line(size=1) + theme(legend.position="none") + labs(y="Extra sleep (hours)", x="Drug") + scale_y_continuous(limits=c(-2,6),breaks=c(-2,-1,0,1,2,3,4,5,6)) + scale_x_discrete(limits=c(1,2),labels=c("before","after"), expand=c(0.1,0.1)) o4 #Logistic regression plot in ggplot -> element modification o5 <-ggplot(data=midwest, aes(x=log_popdensity, y=inmetro)) + geom_point(alpha=0.5, position=position_jitter(width=.02,height=.02)) + stat_smooth(method="glm", se=FALSE, method.args=list(family=binomial), color="red", size=1)+ labs(x="Log of population density", y="Probability of being in a metro area")+ scale_x_continuous(limits=c(4,12), breaks=c(4,5,6,7,8,9,10,11,12)) o5 #Bubble plot in ggplot -> element modification o6 <-ggplot(data=Cars93, aes(x=EngineSize, y=RPM, size=Horsepower, fill=Type))+ geom_point(alpha=0.75, shape=21) + scale_fill_manual(values=c("red","orange","yellow","green","blue","purple"))+ theme_classic() o6 ### ### ### ######################## Special Treat: Functions for Graph Creation ################################### #------------Two-sample t-test barchart---------------------- #function that creates a barplot for a two-sample t-test #numset & catset must be in dataset$variable format; cat1, cat2 must be format they come in in the dataset; #numlab, & catlab must be strings; barcols must be list of cols barchart.2sample <- function(numset, catset, cat1, cat2, numlab, catlab, barcols) { c1.cat <-numset[catset==cat1] c2.cat <-numset[catset==cat2] c1.mean <-mean(c1.cat) c2.mean <-mean(c2.cat) ll<-c(ci(c1.cat)[2], ci(c2.cat)[2]) ul<-c(ci(c1.cat)[3], ci(c2.cat)[3]) x<-c(1,2) y<-c(c1.mean,c2.mean) x<-barplot(y) ymax<-ul[2] if (ul[1]>ul[2]){ ymax<-ul[1] } ymin<-ll[2] if (ll[1]0){ ymin<-0 } barplot(y, xlab=catlab, ylab=numlab, ylim=c(ymin*1.10,ymax*1.10), col=barcols, axes=F) arrows(x,ul,x,ll, length=0.05, angle=90, code=3) axis(1, at=x, labels=c(cat1,cat2)) axis(2) box() clip(0.1,2.49,-100, 100) abline(h=0) } #Example barchart.2sample(mtcars$mpg, mtcars$vs, 0, 1, "MPG", "engine type", c("red","blue")) #------------1-way ANOVA barchart---------------------- #Function for creating 1-way ANOVA barcharts #df is the dataset, cat_var is the categorical variable, num_var is the numerical variable #creates a plot stored in df_plot -> can add additional customization to it barchart.anova <- function(df, cat_var, num_var) { cat_var <- enquo(cat_var) num_var <- enquo(num_var) df_sum<-df %>% group_by(!!cat_var) %>% summarise(mean=mean(!!num_var), sd = sd(!!num_var), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) #print(df_sum) df_plot <<-ggplot(df_sum, aes(!!cat_var, mean, fill=!!cat_var))+ geom_bar(stat="identity", color="black")+ geom_hline(yintercept = 0) + geom_errorbar(aes(ymin=ll, ymax=ul), width=0.1) + theme_classic() + theme(legend.position="none") } #Example barchart.anova(iris, Species, Sepal.Length) #basic df_plot #customized df_plot + labs(y="Sepal length (cm", x="Species") + scale_y_continuous(limits=c(0,7),breaks=c(0:7))+ geom_text(aes(label=c('*','*','*')), vjust=-0.8, size=6) #------------2-way ANOVA barchart---------------------- #Function for creating 2-way ANOVA barcharts #df is the dataset, cat1 and cat2 are the categorical variables, num_var is the numerical variable #let_list is a list of letters/symbols to indicate different groups; needs to be in c("A","B",...) format #dodg_n is a number to set how much the barcharts are dodged by (try 0.9 as your default and adjust from there) #v_just is the vertical adjustment of the text from the let_list (try -4 as your default and adjust from there) #creates a plot stored in df_plot -> can add additional customization to it barchart.2anova <- function(df, cat1, cat2, num_var,let_list,dodg_n,v_just) { cat1 <- enquo(cat1) cat2 <- enquo(cat2) num_var <- enquo(num_var) df_sum<-df %>% group_by(!!cat1,!!cat2) %>% summarise(mean=mean(!!num_var), sd = sd(!!num_var), error = qt(0.975,df=n()-1)*sd/sqrt(n()), ul = mean + error, ll = mean - error) #print(df_sum) df_plot <<-ggplot(df_sum, aes(x=as.factor(!!cat1), y=mean))+ geom_bar(aes(fill=as.factor(!!cat2)),stat="identity", color="black", position=position_dodge()) + geom_hline(yintercept = 0) + geom_text(aes(label=let_list,group=as.factor(!!cat2)), vjust=v_just, size=4, position=position_dodge(dodg_n))+ geom_errorbar(aes(ymin=ll, ymax=ul, group=as.factor(!!cat2)), width=0.1, position=position_dodge(dodg_n)) + theme_classic() } #Example barchart.2anova(warpbreaks, tension, wool, breaks, c("&","#","#","#","#","#"), 0.9,-8.5) #basic df_plot #customized df_plot + labs(y="mean break number", x="tension", fill="wool type") + scale_fill_manual(values=c("grey","brown")) + scale_y_continuous(limits=c(0,70),breaks=c(0,10,20,30,40,50,60,70))