*Model Gauntlet in SAS Code; *---------------------------------------------------------------------------------------------; *1-Sample T-tests; *Intro; DATA BWeight; set sashelp.BWeight; PROC PRINT data=BWeight(obs=25); DATA Baseball; set sashelp.Baseball; PROC PRINT data=Baseball(obs=25); *Analysis; *1) Is the average birthweight of White infants greater than 3200?; PROC TTEST data=BWeight h0=3200 sides=U plots=none; where Black in (0); var Weight; *2) Is the average birth weight of Black infant less than 3200?; PROC TTEST data=BWeight h0=3200 sides=L plots=none; where Black in (1); var Weight; *3) Is the average birth weight of Black infants less than the mean weight of White infants (3411.2)?; PROC TTEST data=BWeight h0=3411.2 sides=2 plots=none; where Black in (1); var Weight; *4) Is the average number of at bats for baseball players different than 400?; PROC TTEST data=Baseball h0=400 sides=2 plots=none; var nAtBat; *5) Is the log salary for baseball players less than 6?; PROC TTEST data=Baseball h0=6 sides=L plots=none; var logSalary; *6) Is the average number of home runs for baseball players greater than Barry Bonds (16)?; PROC TTEST data=Baseball h0=16 sides=U plots=none; var nHome; *---------------------------------------------------------------------------------------------; *2-Sample T-tests; *Intro; PROC PRINT data=BWeight(obs=25); PROC PRINT data=Baseball(obs=25); *Analysis; *1) Is the average birth weight of infants greater for boys compared to girls?; PROC TTEST data=BWeight sides=L plots=none; class Boy; var Weight; PROC MEANS data=BWeight noprint; class Boy; var Weight; output out=TSttest1 mean=mean lclm=lclm uclm=uclm; PROC SGPLOT data=TSttest1; vbarparm category=Boy response=mean/ limitlower=lclm limitupper=uclm fillattrs=(color="lightblue"); *2) Is the average birth weight of infants lower for smoking vs. non-smoking mothers?; PROC TTEST data=BWeight sides=U plots=none; class MomSmoke; var Weight; PROC MEANS data=BWeight noprint; class MomSmoke; var Weight; output out=TSttest2 mean=mean lclm=lclm uclm=uclm; PROC SGPLOT data=TSttest2; vbarparm category=MomSmoke response=mean/ limitlower=lclm limitupper=uclm fillattrs=(color="grey"); *3) Is the average birth weight of infants different between married and non-married mothers?; PROC TTEST data=BWeight sides=2 plots=none; class Married; var Weight; PROC MEANS data=BWeight noprint; class Married; var Weight; output out=TSttest3 mean=mean lclm=lclm uclm=uclm; PROC SGPLOT data=TSttest3; vbarparm category=Married response=mean/ limitlower=lclm limitupper=uclm fillattrs=(color="gold"); *4) Is the averarge number of hits for baseball players different across league?; PROC TTEST data=Baseball sides=2 plots=none; class League; var nHits; PROC MEANS data=Baseball noprint; class League; var nHits; output out=TSttest4 mean=mean lclm=lclm uclm=uclm; PROC SGPLOT data=TSttest4; vbarparm category=League response=mean/ limitlower=lclm limitupper=uclm fillattrs=(color="orange"); *5) Is the average number of runs for baseball players different across league?; PROC TTEST data=Baseball sides=2 plots=none; class League; var nRuns; PROC MEANS data=Baseball noprint; class League; var nRuns; output out=TSttest5 mean=mean lclm=lclm uclm=uclm; PROC SGPLOT data=TSttest5; vbarparm category=League response=mean/ limitlower=lclm limitupper=uclm fillattrs=(color="orange"); *6) Is the average number of outs for baseball players different across division?; PROC TTEST data=Baseball sides=2 plots=none; class Division; var nOuts; PROC MEANS data=Baseball noprint; class Division; var nOuts; output out=TSttest6 mean=mean lclm=lclm uclm=uclm; PROC SGPLOT data=TSttest6; vbarparm category=Division response=mean/ limitlower=lclm limitupper=uclm fillattrs=(color="lightgreen"); *---------------------------------------------------------------------------------------------; *Paired T-tests; *Intro; DATA PriceData; set sashelp.PriceData; PROC PRINT data=PriceData(obs=25); PROC MEANS data=PriceData mean; DATA Fish; set sashelp.Fish; PROC PRINT data=Fish(obs=25); PROC FREQ data=Fish; tables Species; DATA pressure; input SBPbefore SBPafter @@; datalines; 120 128 124 131 130 131 118 127 140 132 128 125 140 141 135 137 126 118 130 132 126 129 127 135 ; *(https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_ttest_sect011.htm); DATA auc; input TestAUC RefAUC @@; datalines; 103.4 90.11 59.92 77.71 68.17 77.71 94.54 97.51 69.48 58.21 72.17 101.3 74.37 79.84 84.44 96.06 96.74 89.30 94.26 97.22 48.52 61.62 95.68 85.80 ; *(https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_ttest_sect013.htm); *Analysis; *1) Is the unit price different across time for Product 1 and 10?; PROC TTEST data=PriceData; paired price1*price10; *2) Is the unit price different across time for Product 1 and 14?; PROC TTEST data=PriceData; paired price1*price14; *3) Is the unit price different across time for Product 16 and 17?; PROC TTEST data=PriceData; paired price16*price17; *4) Is the length of perch different between measurement 1 and 2?; PROC TTEST data=Fish; where Species in ("Perch"); paired Length1*Length2; *5) Is blood pressure different before versus after a stimulus?; PROC TTEST data=pressure; paired SBPbefore*SBPafter; *6) Is the AUC (area under serum-concentration curve) different between a test and reference drug?; PROC TTEST data=auc; paired TestAUC*RefAUC; *==============================================================================================; *==============================================================================================; *1-Way ANOVA; *Intro; DATA Iris; set sashelp.Iris; PROC PRINT data=Iris(obs=25); PROC SGPLOT data=Iris; vbox SepalLength /group=Species; PROC SGPLOT data=Iris; vbox SepalWidth /group=Species; PROC SGPLOT data=Iris; vbox PetalLength /group=Species; PROC SGPLOT data=Iris; vbox PetalWidth /group=Species; DATA Cars; set sashelp.Cars; PROC PRINT data=Cars(obs=25); PROC FREQ data=Cars; tables Origin; tables Type; PROC PRINT data=Fish(obs=25); PROC FREQ data=Fish; tables Species; PROC SGPLOT data=Fish; vbox Weight /group=Species; PROC SGPLOT data=Fish; vbox Width /group=Species; *Analysis; *1) Is the average sepal length different across iris species?; PROC ANOVA data=Iris; class Species; model SepalLength=Species; means Species / tukey; PROC GLIMMIX data=Iris; class Species; model SepalLength=Species/s; *2) Is the average petal width different across iris species?; PROC ANOVA data=Iris; class Species; model PetalWidth=Species; means Species / tukey; PROC GLIMMIX data=Iris; class Species; model PetalWidth=Species/s; *3) Is the average highway MPG different across car origin?; PROC ANOVA data=Cars; class Origin; model MPG_Highway=Origin; means Origin/tukey; *4) Is the average suggested retail price different across car type?; PROC ANOVA data=Cars; where Type not in ("Hybrid"); class Type; model MSRP=Type; means Type/tukey; *5) Is the average width different across 3 fish species?; PROC ANOVA data=Fish; where Species in ("Bream" "Whitefish" "Pike"); class Species; model Width=Species; means Species / tukey; *6) Is the average length different across 6 fish species?; PROC ANOVA data=Fish; where Species not in ("Smelt"); class Species; model Length1=Species; means Species / tukey; *---------------------------------------------------------------------------------------------; *2-Way ANOVA; *Intro; DATA Class; set sashelp.Class; DATA Class; set Class; if Age < 13 then do; Age2="pre-teen"; end; else do; Age2="teen"; end; PROC PRINT data=Class(obs=25); PROC PRINT data=Baseball(obs=25); PROC FREQ data=Baseball; tables Team; tables Position; PROC PRINT data=Cars(obs=25); PROC FREQ data=Cars; tables Origin; tables Type; tables DriveTrain; PROC PRINT data=BWeight(obs=25); PROC FREQ data=BWeight; tables MomEdLevel; tables Married; *Analysis; *1) Is average height different across age and sex for children?; PROC GLM data=Class; class Sex Age2; model Height=Sex|Age2; means Sex|Age2/ tukey; *2) Is average weight different across age and sex for children?; PROC GLM data=Class; class Sex Age2; model Weight=Sex|Age2; means Sex|Age2/ tukey; *3) Is the average number of hits for baseball players different across league and division?; PROC GLM data=Baseball; class League Division; model nHits=League|Division; means League|Division / tukey; *4) Is the average log salary for baseball players different across league and division?; PROC GLM data=Baseball; class League Division; model logSalary=League|Division; means League|Division / tukey; *5) Is the average horsepower for cars different across origin and drive train?; PROC GLM data=Cars; class Origin DriveTrain; model Horsepower=Origin|DriveTrain; means Origin|DriveTrain / tukey; *6) Is infant birth rate different ascross maternal education level or smoking status?; PROC GLIMMIX data=BWeight; class MomEdLevel MomSmoke; model Weight=MomEdLevel|MomSmoke; lsmeans MomEdLevel*MomSmoke/ cl; ods output LSMeans=lsm; PROC PRINT data=lsm; PROC SGPLOT data=lsm; vbarparm category=MomEdLevel response=Estimate /group=MomSmoke groupdisplay=cluster limitlower=Lower limitupper=Upper; *---------------------------------------------------------------------------------------------; *Blocked/Nested ANOVA; *Intro; DATA nested1; input School $ Instructor response; datalines; Atlanta 1 25 Atlanta 1 29 Atlanta 2 14 Atlanta 2 11 Chicago 1 11 Chicago 1 6 Chicago 2 22 Chicago 2 18 SanFran 1 17 SanFran 1 20 SanFran 2 5 SanFran 2 2 ; *https://online.stat.psu.edu/stat502/lesson/4/4.2/4.2.1; DATA RevHub2; set sashelp.Revhub2; DATA RevHub2; set RevHub2; ln_Revenue=log(Revenue); PROC PRINT data=RevHub2(obs=25); DATA Turnip; do Plant=1 to 4; do Leaf=1 to 3; do Sample=1 to 2; input Calcium @@; output; end; end; end; datalines; 3.28 3.09 3.52 3.48 2.88 2.80 2.46 2.44 1.87 1.92 2.19 2.19 2.77 2.66 3.74 3.44 2.55 2.55 3.78 3.87 4.07 4.12 3.31 3.31 ; *https://support.sas.com/documentation/onlinedoc/stat/132/nested.pdf; PROC PRINT data=Turnip(obs=25); DATA Comet; set sashelp.Comet; PROC PRINT data=Comet(obs=25); PROC FREQ data=Comet; tables Dose; tables Rat; tables Sample; PROC PRINT data=Class(obs=25); *Analysis; *1) Are average responses different across school and instructor, where instructor is nested in school?; PROC GLIMMIX data=nested1; class School Instructor; model response = School Instructor(School); lsmeans School / adjust=tukey plot=meanplot cl lines; lsmeans Instructor(School) / adjust=tukey plot=meanplot cl lines; *2) Is average log revenue for airlines different across flight type, where type is nested in flight source?; PROC GLIMMIX data=RevHub2; class Source Type; model ln_Revenue=Source; random Type(Source); lsmeans Source / adjust=tukey plot=meanplot cl lines; *3) Are average calcium levels different across turnip plants, where samples are nested in leaves and plants?; PROC GLIMMIX data=Turnip; class Plant Leaf Sample; model Calcium=Plant; random Leaf(Plant) Sample(Leaf Plant); lsmeans Plant / adjust=tukey plot=meanplot cl lines; *4) Is average cell DNA damage different across drug dose, when controlling for rat?; PROC GLIMMIX data=Comet; class Dose Rat; model Length=Dose; random Rat; lsmeans Dose /adjust=tukey plot=meanplot cl lines; *5) Is average height different across sex in children when controlling for age?; PROC GLIMMIX data=Class; class Sex Age; model Height=Sex; random Age; lsmeans Sex /adjust=tukey plot=meanplot cl lines; *6) Is average weight different across sex in children when controlling for age?; PROC GLIMMIX data=Class; class Sex Age; model Weight=Sex; random Age; lsmeans Sex /adjust=tukey plot=meanplot cl lines; *==============================================================================================; *==============================================================================================; *Simple Linear Regression; *Intro; DATA vote1980; set sashelp.vote1980; PROC PRINT data=vote1980(obs=25); PROC PRINT data=Baseball(obs=25); PROC CORR data=Baseball; var nAtBat nHits nHome nRuns YrMajor nOuts nAssts nError logSalary; PROC PRINT data=Fish(obs=25); DATA Fish; set Fish; ln_Weight=log(Weight); ln_Length1=log(Length1); ln_Width=log(Width); *Analysis; *1) Can the log number of votes be predicted by population in US counties?; PROC REG data=vote1980; model LogVoteRate=Pop; *2) Can log weight be predicted by log length for fish?; PROC REG data=fish; model ln_Weight=ln_Length1; *3) Can log weight be predicted by log width for fish?; PROC REG data=fish; model ln_Weight=ln_Width; *4) Can the number of home runs be predicted by the number of hits for baseball players?; PROC REG data=Baseball; model nHome=nHits; *5) Can the number of runs be predicted by the number of years in the major leagues for baseball players?; PROC REG data=Baseball; model nRuns=YrMajor; *6) Can log salary be predicted by the number of runs for baseball players?; PROC REG data=Baseball; model logSalary=nRuns; *---------------------------------------------------------------------------------------------; *Multiple Linear Regression; *Intro; PROC PRINT data=vote1980(obs=25); DATA Fish; set Fish; ln_Length2 = log(Length2); ln_Length3 = log(Length3); ln_Height = log(Height); PROC PRINT data=Fish(obs=25); PROC PRINT data=Baseball(obs=25); *Analysis; *1) Can the log number of votes be predicted by population, education, and housing in US counties?; PROC GLMSELECT data=vote1980 plots=all; model LogVoteRate=Pop Edu Houses/ selection=stepwise(select=AICc) stats=all; PROC GLM data=vote1980; model LogVoteRate=Pop Edu Houses; *2) Can the log number of votes be predicted by population, education, housing, and all interactions in US counties?; PROC GLMSELECT data=vote1980 plots=all; model LogVoteRate=Pop|Edu|Houses/ selection=stepwise(select=AICc) stats=all; PROC GLM data=vote1980; model LogVoteRate=Pop Edu Pop*Edu Houses Pop*Houses Edu*Houses Pop*Edu*Houses; *3) Can the log weight be predicted by log length1, log length2, log length3,log height, and log width for fish?; PROC GLMSELECT data=Fish; model ln_Weight=ln_Length1 ln_Length2 ln_Length3 ln_Height ln_Width/ selection=stepwise(select=AICc) stats=all; PROC CORR data=Fish; var ln_Weight ln_Width ln_Height ln_Length2; *Width most; PROC GLM data=Fish; model ln_Weight=ln_Width; *4) Can log salary be predicted by the number of hits, home runs, and runs for baseball players?; PROC CORR data=Baseball; var logSalary nHits nHome nRuns; PROC GLM data=Baseball; model logSalary=nHits nHome; *5) Can log salary be predicted by the number of hits, home runs, outs, assists, and years in the major league?; PROC CORR data=Baseball; var logSalary nHits nHome nOuts nAssts YrMajor; PROC GLM data=Baseball; model logSalary=nHits nHome nOuts nAssts YrMajor; *6) Can log salary be predicted by the number of at bats, hits, runs, home runs, walks, outs, assists, and years in the major league?; PROC GLMSELECT data=Baseball; model logSalary=nAtBat nHits nRuns nHome nBB nOuts nAssts YrMajor/ selection=stepwise(select=AICc) stats=all; PROC CORR data=Baseball; var logSalary nAtBat nHits nBB nOuts YrMajor; PROC GLM data=Baseball; model logSalary=nHits nBB nOuts YrMajor; *---------------------------------------------------------------------------------------------; *Logistic Regression; *Intro; DATA Gas; set sashelp.Gas; DATA Gas; set Gas; if Fuel="Ethanol" then do; Fuel2=1; end; else do; Fuel2=0; end; PROC PRINT data=Gas(obs=25); PROC FREQ data=Gas; tables Fuel2; tables CpRatio; tables EqRatio; tables Fuel2*CpRatio; tables Fuel2*EqRatio; DATA BMT; set sashelp.BMT; PROC PRINT data=BMT(obs=25); *Status= 1-death, 0-survival; PROC FREQ data=BMT; tables Status*Group; DATA JunkMail; set sashelp.Junkmail; PROC PRINT data=JunkMail(obs=25); *Class= 0-not junk, 1-junk; DATA Cars; set Cars; if Type="Sedan" then do; Type2=1; end; else do; Type2=0; end; PROC PRINT data=Cars(obs=25); PROC FREQ data=Cars; tables DriveTrain; tables Origin; tables Cylinders; tables Type; tables Type2; *Analysis; *1) Can fuel status (1=ethanol, 0=non-ethanol) be predicted by Nitrogen Oxide emmisson?; PROC GLIMMIX data=Gas; model Fuel2(event="1")=NOx/ s dist=Binary; output out=pred1 pred(ilink) lcl(ilink) ucl(ilink); PROC SORT data=pred1; by NOx; PROC SGPLOT data=pred1 noautolegend; band x=NOx lower=LCLMu upper=UCLMu/ transparency=0.30; scatter y=Fuel2 x=NOx; series y=PredMu x=NOx; *2) Can fuel status (1=ethanol, 0=non-ethanol) be predicted by Equivalence Ratio?; PROC GLIMMIX data=Gas; model Fuel2(event="1")=EqRatio/s dist=Binary; output out=pred2 pred(ilink) lcl(ilink) ucl(ilink); PROC SORT data=pred2; by EqRatio; PROC SGPLOT data=pred2 noautolegend; band x=EqRatio lower=LCLMu upper=UCLMu/ transparency=0.30; scatter y=Fuel2 x=EqRatio; series y=PredMu x=EqRatio; *3) Can Junk mail status (1=junk, 2=non-junk) be predicted by the frequency of exclamation marks?; PROC GLIMMIX data=JunkMail; model Class(event="1")=Exclamation/s dist=Binary; output out=pred3 pred(ilink) lcl(ilink) ucl(ilink); PROC SORT data=pred3; by Exclamation; PROC SGPLOT data=pred3 noautolegend; band x=Exclamation lower=LCLMu upper=UCLMu/ transparency=0.30; scatter y=Class x=Exclamation; series y=PredMu x=Exclamation; *4) Can Junk mail status (1=junk, 2=non-junk) be predicted by the frequency several words and symbols?; PROC GLIMMIX data=JunkMail; model Class(event="1")=Address Receive Report Free Credit Money Exclamation Dollar/s dist=Binary; *5) Can death status (1=dead, 0=censored) be predicted by risk category for post- bone marrow transplant leukemia patients?; PROC GLIMMIX data=BMT; class Group(ref="ALL"); model Status(event="1")=Group/s dist=Binary oddsratio; *6) Can car type (1=sedan, 0=other) be predicted by origin, drive train, or cylinders?; PROC GLIMMIX data=Cars; class Origin(ref="USA") DriveTrain(ref="Front") Cylinders(ref="3"); model Type2(event="1")=Origin DriveTrain Cylinders/s dist=Binary oddsratio;