*########### Setup: import combined dataset and create separate datasets for each subset #############; PROC IMPORT datafile='/home/markwilliamson20/my_courses/markwilliamson0/MW_Datasets_2021/Presentation and Module Datasets 2021/Combined_R_Datasets.csv' dbms=csv out=Combined replace; getnames=yes; guessingrows=1000; PROC PRINT data=Combined; *Nile; DATA Nile; set Combined; keep Nile; if Nile="." then delete; PROC PRINT data=Nile; *starwars; DATA starwars; set Combined; keep name height mass gender; if height="." then delete; if mass="." then delete; if gender not in ("male", "female") then delete; PROC PRINT data=starwars; *cabbages; DATA cabbages; set Combined; keep Cult Date HeadWt VitC; if Cult="." then delete; if VitC="." then delete; PROC PRINT data=cabbages; *chickwts; DATA chickwts; set Combined; keep weight feed; if weight="." then delete; if feed="." then delete; DATA chickwts; set chickwts; weight2=weight; PROC PRINT data=chickwts; *iris; DATA iris; set Combined; keep SepalLength Species; if SepalLength="." then delete; PROC PRINT data=iris; *warpbreaks; DATA warpbreaks; set Combined; keep breaks wool tension; if breaks="." then delete; PROC PRINT data=warpbreaks; *cars; DATA cars; set Combined; keep speed dist; if speed="." then delete; PROC PRINT data=cars; *crabs; DATA crabs; set Combined; keep sp sex CL CW; if sp="." then delete; if CW="." then delete; PROC PRINT data=crabs; *sleep; DATA sleep; set Combined; keep extra group ID; if extra="." then delete; PROC PRINT data=sleep; *midwest; DATA midwest; set Combined; keep county popdensity inmetro; if county="." then delete; PROC PRINT data=midwest; *Cars93; DATA Cars93; set Combined; keep Manufacturer Type EngineSize Horsepower RPM; if Manufacturer="." then delete; if EngineSize="." then delete; PROC PRINT data=Cars93; *######################### Histograms #########################; *references: *https://www.lexjansen.com/wuss/2015/141_Final_Paper_PDF.pdf; *https://blogs.sas.com/content/iml/2018/12/03/tips-customize-legends-proc-sgplot.html; *Basic Histogram; PROC SGPLOT data=Nile; histogram Nile; *Two-Sample Histogram; PROC SGPLOT data=starwars; histogram height/ group=gender transparency=0.5; *#########################; *Basic Histogram -> element modification; PROC SGPLOT data=Nile; histogram Nile/ fillattrs=(color="blue") nbins=6; xaxis values=(0,250,500,750,1000,1250,1500) label="Nile River flow rate"; *Two-Sample Histogram -> element modification; PROC SGPLOT data=starwars; histogram height/ group=gender transparency=0.5 nbins=10 scale=count; density height/ group=gender type=normal; xaxis valueattrs=(size=12pt color="red") labelattrs=(size=12pt color="red") tickstyle=across label="Height (cm)"; yaxis valueattrs=(size=12pt color="red") labelattrs=(size=12pt color="red") tickstyle=across; *######################### Box Plots ##########################; *references: *https://blogs.sas.com/content/iml/2012/10/17/specify-the-colors-of-groups-in-sas-statistical-graphics.html; *https://communities.sas.com/t5/Graphics-Programming/SGPLOT-VBOX-Change-Category-Color/td-p/425262; *https://communities.sas.com/t5/Graphics-Programming/SGPLOT-VBOX-Change-Category-Color/td-p/608403; *https://documentation.sas.com/?cdcId=pgmsascdc&cdcVersion=9.4_3.5&docsetId=grstatproc&docsetTarget=n18szqcwir8q2nn10od9hhdh2ksj.htm&locale=en; *https://support.sas.com/content/dam/SAS/support/en/books/pro-template-made-easy-a-guide-for-sas-users/62007_Appendix.pdf; *Basic Boxplot; PROC SGPLOT data=chickwts; vbox weight2 /category=feed; *Two-Way Boxplot; PROC SGPLOT data=cabbages; vbox VitC /category=Date group=Cult; *#########################; *Basic Boxplot-> element modification; DATA chickwtsAttrMap; length id $5 value $10 FillColor $10; input id value FillColor; datalines; chkID casein Orange chkID horsebean Brown chkID linseed Green chkID meatmeal Grey chkID soybean White schkID sunflower Yellow ; PROC SGPLOT data=chickwts dattrmap=chickwtsAttrMap; vbox weight2 /category=feed group=feed boxwidth=0.50 whiskerattrs=(color="black") lineattrs=(color="black") attrid=chkID medianattrs=(color="black") meanattrs=(color="black") outlierattrs=(color="black"); yaxis label="weight"; *Two-Way Boxplot-> element modification; DATA cabbagesAttrMap; length id $5 value $3 FillColor $10 LineColor $10 MarkerColor $10 MarkerSymbol $15 MarkerSize 3; input id value FillColor LineColor MarkerColor MarkerSymbol MarkerSize; datalines; cabID c39 Green Black BILG CircleFilled 6 cabID c52 Purple Black VLIP CircleFilled 6 ; DATA cabbages; set cabbages; if Date="d16" then do Date2="Day 16"; end; else if Date="d20" then do Date2="Day 20"; end; else do Date2="Day 21"; end; PROC SGPLOT data=cabbages dattrmap=cabbagesAttrMap; vbox VitC /category=Date2 group=Cult attrid=cabID nomean; scatter y=VitC x=Date2/ group=Cult groupdisplay=cluster clusterwidth=0.70 attrid=cabID; yaxis label="Ascorbic acid content"; xaxis label="Date"; *#######################################; *########## Exploration ################; *Using the cabbages dataset, try creating a two-way box plot of cabbage head weight (HeadWt); *Include Cult and Date as group/category; *If you're ambitous, also try an attribute mape and customized the color and style; *#######################################; *######################### Bar Plots ##########################; *references: *https://www.sas.com/content/dam/SAS/support/en/sas-global-forum-proceedings/2019/3644-2019.pdf; *Basic Bar plot; PROC GLIMMIX data=iris; class Species; model SepalLength=Species; lsmeans Species/ cl; ods output LSMeans=iris_means; PROC SGPLOT data=iris_means; vbarparm category=Species response=Estimate/limitlower=Lower limitupper=Upper; *Two-Way Bar plot; PROC GLIMMIX data=warpbreaks; class wool tension; model breaks=wool*tension; lsmeans wool*tension /cl; ods output LSMeans=wb_means; PROC SGPLOT data=wb_means; vbarparm category=wool response=Estimate/group=tension groupdisplay=cluster limitlower=Lower limitupper=Upper; *#########################; *Basic Bar plot-> element modification; DATA irisAttrMap; length id $7 value $12 FillColor $10; input id value FillColor; datalines; irisID setosa STPPK irisID versicolor PAPPK irisID virginica VIP ; DATA iris_means; set iris_means; Estimate2 = Estimate + 1; PROC SGPLOT data=iris_means dattrmap=irisAttrMap noautolegend; vbarparm category=Species response=Estimate/group=Species attrid=irisID; vbarparm category=Species response=Estimate/limitlower=Lower limitupper=Upper limitattrs=(color=black) nofill; yaxis label="Mean sepal length" values=(0,1,2,3,4,5,6,7,8,9); scatter y=Estimate2 x=Species/ markerattrs=(color=black size=10 symbol=Asterisk); *Two-Way Bar plot-> element modification; DATA wbAttrMap; length id $4 value $1 FillColor $10 LineColor $10; input id value FillColor LineColor; datalines; wbID L Green Black wbID M Orange Black wbID H Red Black ; DATA wb_means; set wb_means; if tension="L" then do tension2="1"; end; else if tension="M" then do tension2="2"; end; else do tension2="3"; end; if tension="L" and wool="A" then do text="A"; end; else do text="B"; end; Estimate2 =Estimate + 10; PROC SORT data=wb_means; by wool tension2; PROC SGPLOT data=wb_means dattrmap=wbAttrMap; vbarparm category=wool response=Estimate/group=tension groupdisplay=cluster limitlower=Lower limitupper=Upper attrid=wbID limitattrs=(color=black); yaxis label="mean number of breaks" values=(0,10,20,30,40,50,60); text Y=Estimate2 x=wool text=text/ group=tension groupdisplay=cluster textattrs=(size=18 color="black"); *######################## Scatter Plots #######################; *references: n/a; *Basic Scatter plot; PROC SGPLOT data=cars; scatter y=speed x=dist; *Two-Way Scatter plot; PROC SGPLOT data=crabs; scatter y=CW x=CL / group=sex; *#########################; *Basic Scatter plot-> element modification; PROC GLIMMIX data=cars; model speed=dist; output out=cars_pred pred lcl ucl; PROC SORT data=cars_pred; by dist; PROC SGPLOT data=cars_pred noautolegend; band x=dist lower=lcl upper=ucl /transparency=0.50 fillattrs=(color='red'); scatter y=speed x=dist/markerattrs=(symbol=circlefilled color="black"); series y=Pred x=dist /lineattrs=(color="red"); xaxis label="Speed (mph)"; yaxis label="Stopping distance (ft)" values=(0,5,10,15,20,25,30,35); *Two-Way Scatter plot-> element modification; PROC GLIMMIX data=crabs; class sp sex; model CW=CL|sp; output out=crabs_pred pred lcl ucl; PROC SORT data=crabs_pred; by CL sp; DATA crabAttrMap; length id $6 value $1 MarkerColor $10 LineColor $10 Bandcolor $10; input id value MarkerColor LineColor BandColor; datalines; crabID B Blue Blue Blue crabID O Orange Orange Orange ; PROC SGPLOT data=crabs_pred dattrmap=crabAttrMap; band x=CL lower=lcl upper=ucl/group=sp transparency=0.50 attrid=crabID; series y=Pred x=CL/ group=sp attrid=crabID; scatter y=CW x=CL/group=sp markerattrs=(symbol=circlefilled) attrid=crabID; xaxis label="Carapace length"; yaxis label="Carapace width" ranges=(0-1 14-60) values=(0,15,20,25,30,35,40,45,50,55,60); styleattrs axisbreak=slantedright; *######################### Other Plots ########################; *references; *https://support.sas.com/kb/52/964.html; *https://www.sas.com/content/dam/SAS/support/en/sas-global-forum-proceedings/2018/2179-2018.pdf; *https://blogs.sas.com/content/graphicallyspeaking/2017/12/19/getting-started-sgplot-part-9-bubble-plot/; *https://documentation.sas.com/?cdcId=pgmsascdc&cdcVersion=9.4_3.5&docsetId=grstatproc&docsetTarget=p0er4dg9tojp05n1sf7maeqdz1d8.htm&locale=en; *https://blogs.sas.com/content/iml/2018/12/03/tips-customize-legends-proc-sgplot.html; *https://support.sas.com/rnd/datavisualization/yourGraphs/businessQuick/bubble/; *Spaghetti plot; PROC SGPLOT data=sleep; series x=group y=extra / group=ID; *Logistic Regression plot; DATA midwest; set midwest; log_popdensity=log(popdensity); PROC GLIMMIX data=midwest; model inmetro(event="1")=log_popdensity /dist=binary; output out=midwest_pred pred(ilink) lcl(ilink) ucl(ilink); PROC SORT data=midwest_pred; by log_popdensity; PROC SGPLOT data=midwest_pred; scatter y=inmetro x=log_popdensity; series y=PredMu x=log_popdensity; *Bubble plot; PROC SGPLOT data=Cars93; bubble x=EngineSize y=RPM size=Horsepower; *#########################; *Spaghetti plot-> element modification; DATA sleep; set sleep; if group=1 then do group2="before"; end; else do group2="after"; end; PROC SGPLOT data=sleep; title "sleep study"; refline 0/ lineattrs=(thickness=2 color="black" pattern=ShortDash); series x=group2 y=extra / group=ID lineattrs=(thickness=2); yaxis label="Extra sleep (hours)"; xaxis label="Drug"; keylegend / location=outside position=right across=1; *Logistic Regression plot-> element modification; PROC SGPLOT data=midwest_pred noautolegend; band x=log_popdensity lower=LCLMu upper=UCLMu / transparency=0.5 fillattrs=(color="grey"); scatter y=inmetro x=log_popdensity / markerattrs=(size=6 color="black"); series y=PredMu x=log_popdensity/ lineattrs=(color="red"); yaxis label="Probability of being in a metro area"; xaxis label="Log of population density" ranges=(0-0.1 3.9-12) values=(0,4,5,6,7,8,9,10,11,12); styleattrs axisbreak=slantedright; *Bubble plot-> element modification; PROC SGPLOT data=Cars93; bubble x=EngineSize y=RPM size=Horsepower/ group=Type transparency=0.4; inset "Bubble size represents Horsepower" / position=bottomright textattrs=(size=11); yaxis grid values=(3500,4000,4500,5000,5500,6000,6500) labelattrs=(size=12); xaxis grid labelattrs=(size=12); *################################################################; *########## Exploration ################; *Using the Cars93 dataset, try creating 2-3 attribute maps and trying it out the sample bubble plot; *Change the fill color, line color, and line thickness; *Try adding more features if you're feeling bold; *template; DATA bubbleAttrMap1; length id $5 value $12 FillColor $10 LineColor $10 LineThickness 3; input id value FillColor LineColor LineThickness; datalines; carID Small Blue Black 2 carID Midsize Green Black 2 carID Compact Yellow Black 2 carID Large Purple Black 2 carID Sporty Red Black 2 carID Van Grey Black 2 ; *example graph; PROC SGPLOT data=Cars93 dattrmap=bubbleAttrMap1; bubble x=EngineSize y=RPM size=Horsepower/ group=Type transparency=0.4 attrid=carID; inset "Bubble size represents Horsepower" / position=bottomright textattrs=(size=11); *#######################################; *#### *#### *#### *######################### Special Treat: Macros in SAS ##########################; *References: *https://www.listendata.com/2015/12/sas-macros-made-easy.html; *Mean of single variable; %MACRO mean_test (input =, ivar=, output=); PROC MEANS data = &input noprint; var &ivar; output out = &output mean= ; PROC PRINT data=&output; %MEND; %mean_test(input=sashelp.heart, ivar= height, output=test); %mean_test(input=Cars93, ivar=Horsepower, output=test2); *#########################; *Quick barplot of a categorical variable and numerical variable; %MACRO plot_tests (input=, cvar=, nvar= ); PROC GLIMMIX data = &input; class &cvar; model &nvar = &cvar; lsmeans &cvar / cl; ods output LSMeans=lsm; PROC SGPLOT data=lsm; vbarparm category=&cvar response=Estimate /limitupper=Upper limitlower=Lower; %MEND; %plot_tests(input=crabs, cvar=sex, nvar=CW); %plot_tests(input=chickwts, cvar=feed, nvar=weight);