DM 'LOG;CLEAR;OUT;CLEAR;'; options ls=79 pagesize=60 nodate; filename temp 'pcomp.ps'; goptions device= ps gsfname = temp gsflen=132 vsize = 6 gsfmode=append; title 'Example 5.1 on page 101'; data raw; infile '~/multi.var/480/99/data/applican.dat'; input ident fl app aa la sc lc hon sms exp drv amb gsp pot kj suit; proc princomp data=raw out= v_scores outstat=v_eig covariance; var fl--suit; title 'PC Analysis on Sample Covariance Matrix'; proc princomp data=raw out= c_scores outstat=c_eig; var fl--suit; title 'PC Analysis on Sample Correlation Matrix'; proc print data = v_scores; var ident prin1-prin4; title 'Values of First 4 Principle Component Scores Based on the Covariance Matrix'; proc print data = c_scores; var ident prin1-prin4; title 'Values of First 4 Principle Component Scores Based on the Correlation Matrix'; symbol1 value=dot width=3 L=1; proc capability data = v_scores graphics normal; var prin1; histogram prin1 / Normal (indices); inset mean (5.3) std = 'Std Dev' (5.3) Skewness (5.3) Kurtosis (5.3)/ header = 'Summary Statistics' pos = nw; cdfplot prin1 / Normal; qqplot prin1 / Normal; title 'Plot of First PC Score from Covariance Matrix'; proc capability data = c_scores graphics normal; var prin1; histogram prin1 / Normal (indices); inset mean (5.3) std = 'Std Dev' (5.3) Skewness (5.3) Kurtosis (5.3)/ header = 'Summary Statistics' pos = nw; cdfplot prin1 / Normal; qqplot prin1 / Normal; title 'Plot of First PC Score from Correlation Matrix'; run; proc iml; use v_eig; reset noname; read all var _num_ into X where (_TYPE_ = "EIGENVAL"); evals = X`; p=nrow(evals); index=1:p; index=index`; evals = index || evals; names = {'index' 'evals'}; create v_scree from evals [colname = names]; append from evals; close v_scree; symbol color=black interpol=join value=dot height=1; proc gplot data = v_scree; plot evals*index; title 'Scree Plot: Covariance Matrix'; run; * proc print data=c_eig; proc iml; use c_eig; reset noname; read all var _num_ into X where (_TYPE_ = "EIGENVAL"); evals = X`; p=nrow(evals); index=1:p; index=index`; read all var _num_ into E where (_TYPE_ = "SCORE"); E=E`; D=diag(sqrt(evals)); Load = E*D; read all var _char_ into ch_names where (_TYPE_ = "CORR"); read all var _char_ into pc_names where (_TYPE_ = "SCORE"); pc_names = pc_names[,2]; ch_names = ch_names[,2]; print 'Principal Component Loading Vectors'; Load = Load[,1:5]; pc_names=pc_names[1:5]; print Load[rowname=ch_names colname=pc_names]; evals = index || evals; names = {'index' 'evals'}; create c_scree from evals [colname = names]; append from evals; close c_scree; proc gplot data = c_scree; plot evals*index; title 'Scree Plot: Correlation Matrix'; run; title 'Inference on eigen-values'; * proc print data=v_eig; proc iml; use v_eig; reset noname; read all var _num_ into evals where (_TYPE_ = "EIGENVAL"); evals=evals`; read all var _num_ into S where (_TYPE_ = "COV"); read all var _num_ into n where (_TYPE_ = "N"); n=n[1]; dfs=n-1; read all var _num_ into Evect where (_TYPE_ = "SCORE"); Evect=Evect`; p=nrow(evals); index=1:p; index=index`; */ Test of Independence of all Variables */; */ use probchi(statistic,df) */; w=0; do i=1 to p; w=w+log(evals[i])-log(S[i,i]); end; w=-(dfs-(2*p+5)/6)*w; df=p*(p-1)/2; p_val=1-probchi(w,df); print 'Test of Independence of All Variables'; print w df p_val; */ Confidence Intervals for Eigen-Values */; low=J(p,1,0); high=J(p,1,0); e1=exp(-1.96*sqrt(2/(n-1))); e2=exp(1.96*sqrt(2/(n-1))); do i=1 to p; low[i]=evals[i]*e1; high[i]=evals[i]*e2; end; out= evals||low||high; cnames={'eigen-value' 'Lower Limit' 'Upper Limit'}; print out[colname=cnames]; */ Covariance Matrix of First Eigen-Vector */; V=J(p,p,0); do i=2 to p; V=V+(evals[1]*evals[i]/(dfs*(evals[1]-evals[i])##2))*Evect[,i]*Evect[,i]`; end; print 'Covariance Matrix of First Eigen-Vector'; print V; */ Tests of Equality of Smallest Eigen-Values */; Q=J(p-1,1,0); df1=J(p-1,1,0); do r=2 to p; ebar=sum(evals[p-r+1:p])/r; qq=sum(log(evals[p-r+1:p]/ebar)); Q[r-1]=-(dfs-p+r-(2*r##2+r+2)/(6*r##2))*qq; df1[r-1]=(r-1)*(r+2)/2; end; pval=1-probchi(Q,df1); index1=index[2:p]; out= index1||Q||df1||pval; cnames={'r' 'Q' 'df' 'p-value'}; print out[colname=cnames]; */ Confidence Intervals for Variance Accounted for by Smallest Eigen-Values */; tr=sum(evals); delta=J(p,1,0); U=J(p,1,0); do r=1 to p-1; sumr=sum(evals[p-r+1:p]); delta[r]=sumr/tr; s1=sum(evals[1:p-r]##2); s2=sum(evals[p-r+1:p]##2); std=sqrt((2*delta[r]##2*s1+2*(1-delta[r])##2*s2)/(dfs*tr##2)); U[r]=delta[r]+1.645*std; end; U[p]=1; delta[p]=1; index1=index[1:p]; out=index1||delta||U; cnames={'r' 'delta' 'Upper Limit'}; print out[colname=cnames];