#Create and view distributions mynormal <- rnorm(n =10000, mean = 85, sd = 40) mylognormal <- rlnorm(n=10000, meanlog = 1, sdlog = 0.1) mybeta <- rbeta(n=10000, shape1=1, shape2=99) mybinary <-rbinom(n=10000,size=1, prob=0.95) mybinomial <- rbinom(n=10000, size=2000, prob=0.95) myexponential <- rexp(n=10000, rate=4.2) mygamma <- rgamma(n=10000, shape=2, rate=2) #rgamma(n, shape, rate = 1, scale = 1/rate) mygeometric <- rgeom(n=10000, prob=0.10) mynegbinomial <- rnbinom(n=10000, size=1000, prob=0.97) mypoisson <- rpois(n=10000, lambda=10) hist(mynormal) hist(mylognormal) hist(mybeta) hist(mybinary) hist(mybinomial) hist(myexponential) hist(mygamma) hist(mygeometric) hist(mynegbinomial) hist(mypoisson) #can change around the parameters #----------------------------------------------------------------------------------------------------------------------------; #Examples of distributions in action library(Stat2Data) library(ggplot2) #Normal data("AppleStock") head(AppleStock) hist(AppleStock$Change) #normal glm1 <-glm(Change~Volume, data=AppleStock, family=gaussian()) summary(glm1) plot1 <-ggplot(data=AppleStock, aes(x=Volume, y=Change))+ geom_hline(aes(yintercept=0), size=2)+ geom_point(col="blue")+ geom_smooth(method=lm, col="red") plot1 #---------------------------------------------------------- #Binary data("WalkTheDogs") head(WalkTheDogs) hist(WalkTheDogs$Walk) glm2 <-glm(Walk~Kcal, data=WalkTheDogs, family=binomial()) summary(glm2) plot2 <-ggplot(data=WalkTheDogs, aes(x=Kcal, y=Walk))+ geom_point(col="blue")+ stat_smooth(method="glm", method.args=list(family=binomial), color="red", size=1) plot2 #---------------------------------------------------------- #Binomial data("FGByDistance") head(FGByDistance) hist(FGByDistance$Makes) glm3 <- glm(cbind(Makes,N-Makes)~Dist, data=FGByDistance, family=binomial()) summary(glm3) plot3 <-ggplot(data=FGByDistance, aes(x=Dist, y=PropMakes))+ geom_point(col="blue")+ stat_smooth(method="glm", method.args=list(family=binomial), color="red", size=1) plot3 #---------------------------------------------------------- #Gamma data("HorsePrices") hist(HorsePrices$Price) glm4 <-glm(Price~Height, data=HorsePrices, family=Gamma()) summary(glm4) plot4 <-ggplot(data=HorsePrices, aes(x=Height, y=Price))+ geom_point(col="blue")+ stat_smooth(method="glm", method.args=list(family=Gamma), color="red", size=1) plot4 #---------------------------------------------------------- #Poisson data("GlowWorms") hist(GlowWorms$Eggs) glm5 <-glm(Eggs~Lantern, data=GlowWorms, family=poisson()) summary(glm5) plot5 <-ggplot(data=GlowWorms, aes(x=Lantern, y=Eggs))+ geom_point(col="blue")+ stat_smooth(method="glm", method.args=list(family=poisson), color="red", size=1) plot5 #---------------------------------------------------------- #References: #Distributions in the stats package #https://stackoverflow.com/questions/14048401/simulate-data-from-lognormal-in-r #https://dnett.github.io/S510/27GLMbinomialAnnotated.PDF #https://tysonbarrett.com/Rstats/chapter-5-generalized-linear-models.html