You are on page 1of 9

Bivariate Correlation and Multiple

Regression Analyses for Continuous


Variables Using SAS
(commands=finan_regressionsas!
/**************************************/
/* BIVARIATE CORRELATION ANALYSIS FOR */
/* TWO CONTINUOUS VARIABLES IN SAS */
/**************************************/
/* INDICATE LIBRARY CONTAINING PERMANENT SAS DATA SET "CARS" */
libname a!a"a# V$ "C%&"em'&a!a"a#"(
First, we consider commands to generate scatter plots.
In INSIGHT: go to the command dialog box and type INSIGHT, witho!t the "!otes.
#lic$ on Scatter %lot &',() and select *%G as ' and 'ear or +eight as (.
)*'"i*n +ee",all(
)*'"i*n !e-i.e,/in(
proc gplot !a"a , a!a"a#0.a+(
'l*" m')*1ea+(
'l*" m')*/ei)2"(
1mb*l -al3e,!*"(
run(
quit(
/* ARE T4E RELATIONS4IPS LINEAR5 */
,
/* INVESTIGATE STRANGE OBSERVATION */
proc print !a"a , a!a"a#0.a+(
/2e+e 1ea+ e6 0(
run(
Obs MPG ENGINE HORSE WEIGHT ACCEL YEAR ORIGIN CYLINDER
35 9 4 93 732 9 0 . .
/* REMOVE STRANGE OBSERVATION WIT4 YEAR , 78 AND INVESTIGATE SCATTERPLOT
AGAIN0 */
data .a+#(
e" a!a"a#0.a+(
i9 1ea+ ne 0(
run(
proc gplot !a"a , .a+#(
'l*" m')*1ea+(
1mb*l -al3e,!*"(
run( quit(
-
#alc!late %earson correlation coe..icients .or the /ariables o. interest.
In INSIGHT: select *!lti/ariate&'(), and all /ariables will be ' /ariables.
proc corr !a"a , .a+#(
-a+ /ei)2" 1ea+ m')(
run(
Pears! Crre"a#$! Ce%&$e!#s
Prb ' (r( )!*er H0+ R,-0
N).ber / Obser0a#$!s
WEIGHT YEAR MPG
WEIGHT 1.00000 20.30990 20.33014
4e,$&"e We$5,# 6"bs.7 8.0001 8.0001
405 405 397
YEAR 20.30990 1.00000 0.57903
M*e" Year 6.*)" 1007 8.0001 8.0001
405 405 397
MPG 20.33014 0.57903 1.00000
M$"es :er Ga""! 8.0001 8.0001
397 397 397
#onsider the nomiss and spearman options. Nomiss is .or listwise deletion o. missing
/al!es &as opposed to the de.a!lt option o. pairwise deletion), while Spearman is a non0
parametric test o. correlation &%earson correlation ass!mes normality).
proc corr !a"a , .a+# n*mi(
-a+ /ei)2" 1ea+ m')(
run(
Pears! Crre"a#$! Ce%&$e!#s; N - 397
Prb ' (r( )!*er H0+ R,-0
WEIGHT YEAR MPG
WEIGHT 1.00000 20.30097 20.33014
4e,$&"e We$5,# 6"bs.7 8.0001 8.0001
YEAR 20.30097 1.00000 0.57903
M*e" Year 6.*)" 1007 8.0001 8.0001
MPG 20.33014 0.57903 1.00000
M$"es :er Ga""! 8.0001 8.0001
proc corr !a"a , .a+# 'ea+man(
-a+ /ei)2" 1ea+ m')(
run(
S:ear.a! Crre"a#$! Ce%&$e!#s
Prb ' (r( )!*er H0+ R,-0
N).ber / Obser0a#$!s
WEIGHT YEAR MPG
1
WEIGHT 1.00000 20.23231 20.37402
4e,$&"e We$5,# 6"bs.7 8.0001 8.0001
405 405 397
YEAR 20.23231 1.00000 0.57053
M*e" Year 6.*)" 1007 8.0001 8.0001
405 405 397
MPG 20.37402 0.57053 1.00000
M$"es :er Ga""! 8.0001 8.0001
397 397 397
************************************/
/* MULTIPLE REGRESSION ANALYSIS FOR */
/* CONTINUOUS VARIABLES IN SAS */
/************************************/
Fit a m!ltiple regression model to the #23S data, where *%G is the dependent /ariable,
and +4IGHT and '423 are the contin!o!s predictor /ariables. Generate a plot o.
st!denti5ed resid!als &3ST674NT, based on the model .itted by deleting the c!rrent
obser/ation .rom the data set), /ers!s the predicted /al!es, and o!tp!t the predicted
/al!es and resid!als to a new S2S data set, 34G72T..
proc reg !a"a , .a+#(
m*!el m') , /ei)2" 1ea+ / .lb(
'l*" +"3!en"0*'+e!i."e!0(
*3"'3" *3",+e)!a" ','+e!i." +,+ei! +"3!en",+"3!en"(
run(
quit(
T,e REG Pr&e*)re
M*e"+ MODEL1
De:e!*e!# 4ar$ab"e+ MPG M$"es :er Ga""!
N).ber / Obser0a#$!s Rea* 405
N).ber / Obser0a#$!s <se* 397
N).ber / Obser0a#$!s =$#, M$ss$!5 4a")es 3
A!a">s$s / 4ar$a!&e
S). / Mea!
S)r&e D? S@)ares S@)are ? 4a")e Pr ' ?
M*e" 2 19335 9992.39159 320.07 8.0001
Errr 394 4959.94332 11.31391
Crre&#e* T#a" 399 24041
R# MSE 3.43739 R2S@)are 0.3093
De:e!*e!# Mea! 23.55113 A*A R2S@ 0.3053
CeB 4ar 14.59744
Para.e#er Es#$.a#es
Para.e#er S#a!*ar*
4ar$ab"e Labe" D? Es#$.a#e Errr # 4a")e Pr ' (#(
I!#er&e:# I!#er&e:# 1 214.27939 3.97422 23.59 0.0004
WEIGHT 4e,$&"e We$5,# 6"bs.7 1 20.00997 0.00021431 231.07 8.0001
YEAR M*e" Year 6.*)" 1007 1 0.75791 0.04909 15.44 8.0001
8
Para.e#er Es#$.a#es
4ar$ab"e Labe" D? 95C C!D*e!&e L$.$#s
I!#er&e:# I!#er&e:# 1 222.03993 29.49304
WEIGHT 4e,$&"e We$5,# 6"bs.7 1 20.00710 20.00925
YEAR M*e" Year 6.*)" 1007 1 0.99140 0.35442
/* W4Y IS T4ERE A DIAGONAL LINE AT T4E BOTTOM OF T4E FITTED:RESIDUAL
PLOT5 */
/* C4EC; T4E RESIDUAL DISTRIBUTION FOR NORMALITY USING T4E SAVED DATA
SET */
proc univariate !a"a,+e)!a" n*'+in"(
-a+ +"3!en"(
2i"*)+am / n*+mal(
'+*b'l*" / n*+mal <m3,e" i)ma,e"=(
run(
9
T,e <NI4ARIATE Pr&e*)re
?$##e* D$s#r$b)#$! /r rs#)*e!#
Para.e#ers /r Nr.a" D$s#r$b)#$!
Para.e#er S>.b" Es#$.a#e
Mea! M) 0.001521
S#* De0 S$5.a 1.005359
G*!ess2/2?$# Tes#s /r Nr.a" D$s#r$b)#$!
Tes# 222S#a#$s#$&2222 22222: 4a")e22222
Kolmogorov-Smirnov D 0.04323989 Pr > D 0.071
Cra.er20! M$ses W2S@ 0.19322312 Pr ' W2S@ 0.015
A!*ers!2Dar"$!5 A2S@ 1.55291333 Pr ' A2S@ 80.005
/* CALCULATE MEANS ON PREDICTORS */
proc means !a"a , .a+#(
-a+ /ei)2" 1ea+(
run(
T,e MEANS Pr&e*)re
4ar$ab"e Labe" N Mea! S#* De0 M$!$.). MaE$.).
FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
WEIGHT 4e,$&"e We$5,# 6"bs.7 405 2975.09 343.5493931 1913.00 5140.00
YEAR M*e" Year 6.*)" 1007 405 75.9353025 3.7417993 70.0000000 32.0000000
FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
/* CREATE NEW VARIABLES */
data .a+>(
e" .a+#(
l*)m') , l*)<m')=(
/)".en" , /ei)2" : 2975.09( /* #$?@07$ , mean *9 /ei)2" */
/)".en"# , /)".en"**2(
1ea+.en" , 1ea+ : 75.94( /* ?@0$A , mean *9 1ea+ */
:
run(
/* REFIT T4E MODEL USING T4E NEW VARIABLES0 */
proc reg !a"a , .a+>(
m*!el l*)m') , /)".en" /)".en"# 1ea+.en" / .lb(
'l*" +"3!en"0*'+e!i."e!0(
*3"'3" *3",+e)!a"# ','+e!i." +,+ei! +"3!en",+"3!en"(
run(
quit(
T,e REG Pr&e*)re
M*e"+ MODEL1
De:e!*e!# 4ar$ab"e+ "5.:5
N).ber / Obser0a#$!s Rea* 405
N).ber / Obser0a#$!s <se* 397
N).ber / Obser0a#$!s =$#, M$ss$!5 4a")es 3
A!a">s$s / 4ar$a!&e
S). / Mea!
S)r&e D? S@)ares S@)are ? 4a")e Pr ' ?
M*e" 3 39.99793 13.22294 973.03 8.0001
Errr 393 5.31299 0.01352
Crre&#e* T#a" 399 44.93090
R# MSE 0.11927 R2S@)are 0.3319
De:e!*e!# Mea! 3.10399 A*A R2S@ 0.3310
CeB 4ar 3.74929
Para.e#er Es#$.a#es
Para.e#er S#a!*ar*
4ar$ab"e D? Es#$.a#e Errr # 4a")e Pr ' (#( 95C C!D*e!&e L$.$#s
I!#er&e:# 1 3.05330 0.00349 390.39 8.0001 3.04211 3.07543
=5#&e!# 1 20.00032939 0.00000300 241.24 8.0001 20.00034592 20.00031419
=5#&e!#2 1 5.507953E23 3.912099E29 9.40 8.0001 3.314305E23 7.201102E23
>ear&e!# 1 0.03274 0.00193 19.49 8.0001 0.02944 0.03904
;
/* DO T4E DIAGNOSTIC PLOTS LOO; BETTER5 */
proc univariate !a"a,+e)!a"# n*'+in"(
-a+ +"3!en"(
2i"*)+am / n*+mal(
'+*b'l*" / n*+mal <m3,e" i)ma,e"=(
run(
<
/* USE PROC TRANSREG TO SEE IF T4ERE IS A BETTER TRANSFORMATION *9 Y0 */
proc transreg !a"a , .a+>(
m*!el b*B.*B<m')= , i!en"i"1</)".en" /)".en"# 1ea+.en"=(
run(
T,e TRANSREG Pr&e*)re
Tra!s/r.a#$! I!/r.a#$!
/r GECE6MPG7
La.b*a R2S@)are L5 L$He
23.00 0.77 2725.473
22.75 0.79 2979.705
22.50 0.31 2930.009
22.25 0.32 2535.701
22.00 0.34 2544.173
21.75 0.35 2505.904
21.50 0.39 2471.429
21.25 0.37 2441.392
21.00 0.33 2419.395
20.75 0.33 2397.072
20.50 0.33 2334.025
-0.25 0.88 -377.588 <
0.00 + 0.88 -377.877 *
0.25 0.33 2334.733
0.50 0.37 2397.797
0.75 0.39 2419.333
1.00 0.35 2439.907
1.25 0.34 2497.949
1.50 0.32 2493.953
1.75 0.30 2533.254
2.00 0.73 2570.093
2.25 0.79 2903.999
2.50 0.74 2949.730
2.75 0.72 2992.003
3.00 0.99 2735.935
8 2 Ges# La.b*a
I 2 C!D*e!&e I!#er0a"
J 2 C!0e!$e!# La.b*a
/* FOR BOC:COC TRANSFORMATIONS8 D , <YEL : F= / L0 */
/* L , 7 ,G l*) "+an9*+ma"i*n0 */
/* INCLUSION OF ADDITIONAL PREDICTORS / ECCLUSION OF OUTLIERS */
/* MAY IMPROVE T4E FIT AND T4E DIAGNOSTIC PLOTS EVEN MORE0 */
/* ECAMINE T4E FIT IN MORE DETAIL USING SAS INSIG4T0 */
/* FIT T4E MODEL USING ANALYDE :G FIT <YC=8 */
/* AND ECAMINE T4E FIT USING GRAP4S0 */
/* PREDICTION INTERVALS ARE AVAILABLE FOR SIMPLE LINEAR */
/* REGRESSION MODELS W4EN USING INSIG4T0 */
=

You might also like