/* normtest: univariate & multivariate normality tests Version 1.2 -- Date: September 16th, 2000 By: HervŽ M. CACI (hcaci@wanadoo.fr OR caci.h@chu-nice.fr) */ program define normtest version 5.0 *! Version 1.2 -- September 16th, 2000 /* Parse standard Stata commands */ local varlist "required existing min(2)" local if "optional" local in "optional" parse "`*'" parse "`varlist'", parse(" ") local nvar : word count `varlist' /* Number of variables */ /* "If" "in" implementation */ tempvar touse mark `touse' `if' `in' markout `touse' `varlist' preserve quietly drop if `touse'== 0 quietly summ `touse' local nobs=_result(1) /* Number of complete observations */ /* Matrix definitions */ matrix xdev=J(`nobs',`nvar',0) /* Deviation from mean */ matrix dev=J(1,`nvar',0) matrix devsq=J(1,`nvar',0) matrix ss=J(1,`nvar',0) matrix sdev=J(1,`nvar',0) matrix m2=J(1,`nvar',0) matrix m3=J(1,`nvar',0) matrix m4=J(1,`nvar',0) matrix sqrtb1=J(1,`nvar',0) matrix b2=J(1,`nvar',0) matrix b2minus3=J(`nvar',1,0) matrix g1=J(`nvar',1,0) matrix g2=J(`nvar',1,0) matrix stm3b2=J(`nvar',1,0) matrix zb2=J(`nvar',1,0) matrix sub1=J(`nvar',1,0) matrix pcs1=J(1,`nvar',0) matrix pcs2=J(1,`nvar',0) matrix pcs3=J(1,`nvar',0) matrix pcs4=J(1,`nvar',0) matrix mpc2=J(1,`nvar',0) matrix mpc3=J(1,`nvar',0) matrix mpc4=J(1,`nvar',0) matrix pcb1=J(1,`nvar',0) matrix pcb2=J(1,`nvar',0) matrix sqrtdinv=J(`nvar',`nvar',0) matrix corr3=J(`nvar',`nvar',0) matrix corr4=J(`nvar',`nvar',0) matrix sqrtq=J(`nvar',`nvar',0) matrix sqrtbb=J(`nvar',`nvar',0) matrix dsq=J(`nobs',1,0) matrix dsqb=J(`nobs',1,0) /* Constant definitions */ local n1=sqrt(`nobs'*(`nobs'-1))/(`nobs'-2) local n2=3*(`nobs'-1)/(`nobs'+1) local n3=(`nobs'^2-1)/((`nobs'-2)*(`nobs'-3)) /* ... for Anscombe & Glynn's tranformation for kurtosis */ local eb2=3*(`nobs'-1)/(`nobs'+1) local vb2=24*`nobs'*(`nobs'-2)*(`nobs'-3)/(((`nobs'+1)^2)*(`nobs'+3)*(`nobs'+5)) local svb2=sqrt(`vb2') local beta1= 6*(`nobs'*`nobs'-5*`nobs'+2)/((`nobs'+7)*(`nobs'+9)) /* */ *sqrt(6*(`nobs'+3)*(`nobs'+5)/(`nobs'*(`nobs'-2)*(`nobs'-3))) local a=6+(8/`beta1')*(2/`beta1'+sqrt(1+4/(`beta1'^2))) /* Beginning of matrix computations */ local i=1 while `i' <= `nvar' { quietly { local nam`i'="`1'" summ `1' gen xdev1 = `1'-_result(3) local j=1 while `j'<=`nobs' { matrix xdev[`j',`i'] = xdev1[`j'] /* Quick -mkmat- */ matrix dev[1,`i']=dev[1,`i']+xdev1[`j'] /* Quick CSUM(xdev) */ matrix ss[1,`i'] =ss[1,`i'] +xdev1[`j']^2 /* Quick CSUM(xdev&*xdev) */ matrix m3[1,`i'] =m3[1,`i'] +xdev1[`j']^3 matrix m4[1,`i'] =m4[1,`i'] +xdev1[`j']^4 local j=`j'+1 } matrix devsq[1,`i']=dev[1,`i']*dev[1,`i']/`nobs' matrix m2[1,`i']=(ss[1,`i']-devsq[1,`i'])/`nobs' matrix sdev[1,`i']=sqrt(m2[1,`i']) matrix m3[1,`i']=m3[1,`i']/`nobs' matrix m4[1,`i']=m4[1,`i']/`nobs' matrix sqrtb1[1,`i']=m3[1,`i']/(m2[1,`i']*sdev[1,`i']) matrix b2[1,`i']=m4[1,`i']/m2[1,`i']^2 /* Will be transposed later */ matrix g1[`i',1]=`n1'*sqrtb1[1,`i'] /* sqrtb1 is quickly transposed */ matrix g2[`i',1]=(b2[1,`i']-`n2')*`n3' /* b2 is quickly transposed */ /* Anscome & Glynn's transformation for kurtosis */ matrix stm3b2[`i',1]=(b2[1,`i']-`eb2')/`svb2' matrix zb2[`i',1]=(1-2/(9*`a')-((1-2/`a')/ /* */ (1+stm3b2[`i',1]*sqrt(2/(`a'-4))))^(1/3))/sqrt(2/(9*`a')) matrix b2minus3[`i',1]=b2[1,`i']-3 /* Next variable */ macro shift local i=`i'+1 drop xdev1 } /* quietly */ } /* while */ matrix sqrtb1=sqrtb1' matrix b2=b2' matrix drop m2 m3 m4 /* Free up some memory before going on */ /* Quantities needed for multivariate statistics */ matrix s=xdev'*xdev /* In SPSS, COMPUTE S=SSCP(xdev) -> nvar X nvar */ matrix sb=s /* Copy the matrix for later use in the loop */ local i=1 local n1=`nobs'-1 while `i'<=`nvar' { local j=1 while `j'<=`nvar' { matrix sb[`i',`j']=s[`i',`j']/`nobs' matrix s[`i',`j']=s[`i',`j']/`n1' local j=`j'+1 } local i=`i'+1 } local i=1 while `i'<=`nvar' { matrix sqrtdinv[`i',`i']=sqrt(s[`i',`i']) local i=`i'+1 } matrix sqrtdinv=inv(sqrtdinv) matrix corr=sqrtdinv*s*sqrtdinv /* Principal components for Srivastava's tests */ matrix svd u q v = s matrix pc=xdev*v matrix svd aa bb cc = sb matrix pcb=xdev*cc matrix drop xdev /* Mahalanobis distances */ local i=1 while `i'<=`nvar' { /* This outer loop computes the element-wise */ matrix sqrtq[`i',`i'] =sqrt(q[1,`i']) /* square-root of the vector matrices and */ matrix sqrtbb[`i',`i']=sqrt(bb[1,`i']) /* simultaneously builds the symmetric matrices */ local j=1 while `j'<=`nvar' { /* This inner loop raises to the third and */ matrix corr3[`i',`j']=corr[`i',`j']^3 /* fourth power the correlation matrix that will */ matrix corr4[`i',`j']=corr[`i',`j']^4 /* be used later in Small's tests */ local j=`j'+1 } local i=`i'+1 } matrix sqrtqinv=inv(sqrtq) matrix stdpc=pc*sqrtqinv matrix sqrtbbi=inv(sqrtbb) matrix stdpcb=pcb*sqrtbbi local i=1 /* Loop to simultaneously execute 2 "RSSQ" commands */ while `i'<=`nobs' { local j=1 local s1=0 local s2=0 while `j'<=`nvar' { local s1=`s1'+stdpc[`i',`j']^2 local s2=`s2'+stdpcb[`i',`j']^2 local j=`j'+1 } matrix dsq[`i',1]=`s1' matrix dsqb[`i',1]=`s2' local i=`i'+1 } matrix drop stdpc matrix drop stdpcb /* Univariate skew and kurtosis */ /* - approximate Johnson's SU transformation for skew */ di in gr _n "Number of observations:" %9.0g in ye `nobs' di in gr "Number of variables:" %9.0g in ye `nvar' matrix y=sqrtb1*sqrt((`nobs'+1)*(`nobs'+3)/(6*(`nobs'-2))) local beta2=3*(`nobs'^2+27*`nobs'-70)*(`nobs'+1)*(`nobs'+3)/ /* */ ((`nobs'-2)*(`nobs'+5)*(`nobs'+7)*(`nobs'+9)) local w=sqrt(sqrt(2*(`beta2'-1))-1) local delta=1/sqrt(ln(`w')) local alpha=sqrt(2/(`w'*`w'-1)) matrix yalpha = y / `alpha' di _n in gr "Measures and tests of skew" di in gr _dup(26) "-" _n di in gr " Var g1 sqrt(b1) z(b1) p-value" di in gr "----------+----------+----------+----------+----------" local i=1 while `i'<= `nvar' { matrix sub1[`i',1]=`delta'*ln(yalpha[`i',1]+sqrt(1+yalpha[`i',1]^2)) di in gr _col(2) "`nam`i''" _col(11) "|" /* */ %9.4f in ye _col(12) g1[`i',1] in gr _col(22) "|" /* */ %9.4f in ye _col(23) sqrtb1[`i',1] in gr _col(33) "|" /* */ %9.4f in ye _col(34) sub1[`i',1] in gr _col(44) "|" /* */ %9.4f in ye _col(45) 2*(1-normprob(abs(sub1[`i',1]))) local i = `i'+1 } /* Anscombe & Glynn's tranformation for kurtosis */ di _n in gr "Measures and tests of kurtosis" di in gr _dup(30) "-" _n di in gr " Var g2 b2 - 3 z(b2) p-value" di in gr "----------+----------+----------+----------+----------" local i = 1 while `i'<=`nvar' { di in gr _col(2) "`nam`i''" _col(11) "|" /* */ %9.4f in ye _col(12) g2[`i',1] in gr _col(22) "|" /* */ %9.4f in ye _col(23) b2minus3[`i',1] in gr _col(33) "|" /* */ %9.4f in ye _col(34) zb2[`i',1] in gr _col(44) "|" /* */ %9.4f in ye _col(45) 2*(1-normprob(abs(zb2[`i',1]))) local i = `i'+1 } /* Omnibus tests of normality */ matrix ksq=J(`nvar',1,0) matrix lm =J(`nvar',1,0) di in gr _n "Omnibus tests of normality (both chisq, 2df)" di in gr _dup(44) "-" _n di in gr " D'Agostino & Pearson | Jarque & Bera" di in gr "----------+----------+----------+---------------------" di in gr " | K sq | p | LM | p" di in gr "----------+----------+----------+----------+----------" local i=1 while `i'<=`nvar' { matrix ksq[`i',1]=sub1[`i',1]^2+zb2[`i',1]^2 matrix lm [`i',1]=`nobs'*((sqrtb1[`i',1]^2/6)+(b2minus3[`i',1]^2/24)) di in gr _col(2) "`nam`i''" _col(11) "|" /* */ %9.4f in ye _col(12) ksq[`i',1] in gr _col(22) "|" /* */ %9.4f in ye _col(23) chiprob(2,ksq[`i',1]) in gr _col(33) "|" /* */ %9.4f in ye _col(34) lm[`i',1] in gr _col(44) "|" /* */ %9.4f in ye _col(45) chiprob(2,lm[`i',1]) local i=`i'+1 } di in gr _n "***** Multivariate statistics *****" _n /* Small's multivariate tests */ matrix uinv=inv(corr3) matrix q1=sub1'*uinv*sub1 matrix uinv2=inv(corr4) matrix q2=zb2'*uinv2*zb2 di in gr "Tests of multivariate skew" di in gr _dup(27) "-" _n di in gr "(1) Small's test (chisq): Q1=" %9.4f in ye q1[1,1] /* */ in gr " p= " %5.4f in ye chiprob(`nvar',q1[1,1]) /* */ in gr " df =" %9.0f in ye `nvar' /* Srivastava's multivariate tests */ local i=1 local sqb1p=0 local b2p=0 while `i'<=`nvar' { local j=1 while `j'<=`nobs' { matrix pcs1[1,`i']=pcs1[1,`i']+pc[`j',`i'] matrix pcs2[1,`i']=pcs2[1,`i']+pc[`j',`i']^2 matrix pcs3[1,`i']=pcs3[1,`i']+pc[`j',`i']^3 matrix pcs4[1,`i']=pcs4[1,`i']+pc[`j',`i']^4 local j=`j'+1 } matrix mpc2[1,`i']= (pcs2[1,`i']-(pcs1[1,`i']^2/`nobs'))/`nobs' matrix mpc3[1,`i']= (pcs3[1,`i']-(3/`nobs'*pcs1[1,`i']*pcs2[1,`i']) /* */ + (2/(`nobs'^2)*(pcs1[1,`i']^3)))/`nobs' matrix mpc4[1,`i']= (pcs4[1,`i']-(4/`nobs'*pcs1[1,`i']*pcs3[1,`i']) /* */ + (6/(`nobs'^2)*(pcs2[1,`i']*(pcs1[1,`i']^2))) /* */ - (3/(`nobs'^3)*(pcs1[1,`i']^4)))/`nobs' matrix pcb1[1,`i']=mpc3[1,`i']/(mpc2[1,`i']^1.5) matrix pcb2[1,`i']=mpc4[1,`i']/(mpc2[1,`i']^2) local sqb1p=`sqb1p'+pcb1[1,`i']^2 local b2p=`b2p'+pcb2[1,`i'] local i=`i'+1 } matrix drop pc local sqb1p=`sqb1p'/`nvar' local b2p=`b2p'/`nvar' local chib1=`sqb1p'*`nobs'*`nvar'/6 local normb2=(`b2p'-3)*sqrt(`nobs'*`nvar'/24) di in gr "(2) Srivastava's test: chi(b1p)=" %9.4f in ye `chib1' /* */ in gr " p= " %5.4f in ye chiprob(`nvar',`chib1') /* */ in gr " df= " %5.0f in ye `nvar' di in gr _n "Tests of multivariate kurtosis" di in gr _dup(30) "-" _n di in gr "(1) A variant of Small's test (chisq): VQ2=" %9.4f in ye q2[1,1] /* */ in gr " p= " %5.4f in ye chiprob(`nvar',q2[1,1]) /* */ in gr " df=" %9.0f in ye `nvar' di in gr "(2) Srivastava's test: b2p=" %9.4f in ye `b2p' /* */ in gr " N(b2p)=" %9.4f in ye `normb2' /* */ in gr " p= " %5.4f in ye 2*(1-normprob(abs(`normb2'))) local i=1 local b2pm=0 while `i'<=`nobs' { local b2pm=`b2pm'+dsqb[`i',1]^2 local i=`i'+1 } local b2pm=`b2pm'/`nobs' local nb2pm=`nvar'*(`nvar'+2) local nb2pm=(`b2pm'-`nb2pm')/sqrt(8*`nb2pm'/`nobs') di in gr "(3) Mardia's test: b2p=" %9.4f in ye `b2pm' /* */ in gr " N(b2p)=" %9.4f in ye `nb2pm' /* */ in gr " p= " %5.4f in ye 2*(1-normprob(abs(`nb2pm'))) local q3=q1[1,1]+q2[1,1] di in gr "(4) Omnibus test of multivariate normality" di in gr " (based on Small's test, chisq): VQ3= " %9.4f in ye `q3' /* */ in gr " p= " %5.4f in ye chiprob(2*`nvar',`q3') /* */ in gr " df= " %5.0f in ye 2*`nvar' /* Putative multivariate outliers */ tempvar dsqvar rnk top chisq local ddf=`nobs'-`nvar'-1 scalar f01=invfprob(`nvar',`ddf',0.01/`nobs') scalar f05=invfprob(`nvar',`ddf',0.05/`nobs') local fc01=(f01*`nvar'*(`nobs'-1)^2)/(`nobs'*(`ddf'+`nvar'*f01)) local fc05=(f05*`nvar'*(`nobs'-1)^2)/(`nobs'*(`ddf'+`nvar'*f05)) qui gen case= _n qui svmat dsq,name(dsqvar) qui egen rnk=rank(dsqvar) qui gen top=(`nobs'+1)-rnk sort top di in gr _n "Critical values (Bonferroni) for a single multivariate outlier:" di in gr _dup(63) "-" _n di in gr " F(0.05/n) = " %5.2f `fc05' " --- df = " %3.0f `nvar' ", " %3.0f `ddf' di in gr " F(0.01/n) = " %5.2f `fc01' " --- df = " %3.0f `nvar' ", " %3.0f `ddf' di in gr _n "5 observations with largest Mahalanobis distances:" _n di in gr " Rank ! Case ! Mahalanobis D-square" di in gr "------+------+---------------------" local i=1 while `i'<=5 { di in ye _col(4) %1.0f `i' _col(7) in gr "!" /* */ in ye _col(9) %4.0f case[`i'] _col (14) in gr "!" /* */ in ye _col(17) %10.2f dsqvar[`i'] local i=`i'+1 } /* Plot */ gen chisq=invchi(`nvar',1-((rnk-0.5)/`nobs')) label var chisq "Chi2 n_var,1-((i-0.5)/n_obs)" label var dsq "Mahalanobis D-squared" graph dsq chisq, title("Plot of ordered squared distances") xlabel ylabel end