*! bidensity: bivariate kernel density estimation *! version 0.9.0 25jul2012 by John Luke Gallup and Kit Baum (jlgallup@pdx.edu) program define bidensity, rclass version 12.1 syntax varlist(min=2 max=2) [if] [in] [fw aw] [, N(integer 50) /// XWidth(real 0.0) YWidth(real 0.0) Saving(string) REPLACE /// MName(string) kernel(string) noGRaph SCatter SCAtter1(string) *] // mname specifies name stub for global mata matrices for // density, xvalues, yvalues local k : word count `kernel' if `k' == 0 local kernel epanechnikov else { if `k' > 1 { di as err "only one kernel may be specified" exit 198 } _kernel_name, kernel(`"`kernel'"') local kernel `s(kernel)' if `"`kernel'"' == "" { di as err "invalid kernel function" exit 198 } } preserve marksample touse qui count if `touse' if r(N)==0 error 2000 quietly keep if `touse' if "`weight'" != "" { quietly { tempvar wvar gen double `wvar' `exp' if "`weight'" == "aweight" { summ `wvar', meanonly replace `wvar' = `wvar'/r(mean) } } } mata: _BiDensity("`kernel'", "`varlist'", `xwidth', /// `ywidth', `n', "`wvar'", /// ("`saving'"!="")|("`graph'"==""), "`mname'", "`replace'") local newvars "_d _`return(yvar)' _`return(xvar)'" if ("`graph'"=="") { if ("`scatter'"!="" | `"`scatter1'"'!="") /// local scatter `"scatter `varlist', `scatter1' || "' if (strpos("`options'", "ylab") == 0) loc options "`options' ylab(,angle(0))" twoway `scatter'contourline `newvars' if _d!=., `options' } if ("`saving'"!="") { keep `newvars' qui keep if !missing(_d) save `saving', `replace' } restore end // parsing facility to retrieve kernel name (from kdensity.ado) program _kernel_name, sclass syntax , KERNEL(string) local kernlist epanechnikov epan2 gaussian triangle rectangle // currently unused kernels: biweight cosine parzen local maxabbrev 2 3 5 3 3 // # of letters for abbreviation tokenize `maxabbrev' local i = 1 foreach kern of local kernlist { if substr("`kern'",1,length(`"`kernel'"')) == `"`kernel'"' /// & length(`"`kernel'"') >= ``i'' { sreturn local kernel `kern' continue, break } else { sreturn local kernel } local ++i } end version 12.1 mata: void _BiDensity(string scalar kernel, /// string scalar varlist, /// real scalar xwid, /// real scalar ywid, /// real scalar n, /// string scalar wvar, /// real scalar sav_graph, /// string scalar mname, /// string scalar replace) { // calculate bivariate kernel density values (x vs. y) // of dimensions n^2 yxvar = tokens(varlist) st_view(x,.,yxvar[2]) st_view(y,.,yxvar[1]) N = st_nobs() // N is number of observations in data if (n>N) { // n is number of bins errprintf("warning: n(%f) > no. of observations; n set to %f",n,N) n = N } if (n<=1) n = max((N,50)) // create weight variable if (wvar=="") w = 1 else { st_view(w,.,wvar) N = sum(w) } // calculate bandwidths if (xwid<=0) xwid = 0.9*min((sqrt(variance(x,w)),mm_iqrange(x,w)/1.349))/(N^0.20) if (ywid<=0) ywid = 0.9*min((sqrt(variance(y,w)),mm_iqrange(y,w)/1.349))/(N^0.20) // make grid of x & y values (n of each) xymin = colmin((x,y)) xyscale = 1/(n-1)*(colmax((x,y))-xymin+2*(xwid,ywid)) mxy = J(n,1,xyscale) mxy[1,] = (0,0) mxy = (runningsum(mxy[,1]),runningsum(mxy[,2])) mxy = mxy :+ (xymin-(xwid,ywid)) mx = mxy[,1] my = mxy[,2] wid = xwid*ywid d = J(n,n,.) if (kernel=="epanechnikov") { for (i=1; i<=n; i++) { azx = abs((x :- mx[i])/xwid) zx2_1 = (1:-azx:^2:/5) :* (azx:<1) for (j=1; j<=n; j++) { azy = abs((y :- my[j])/ywid) k = zx2_1 :* (1:-azy:^2:/5) :* (azy:<1) d[j,i] = mean(k,w) } } d = 0.1125/wid*mean(w) :* d // rescale; 0.1125 = 9/80 } if (kernel=="epan2") { for (i=1; i<=n; i++) { azx = abs((x :- mx[i])/xwid) zx2_1 = 1:-azx:^2 :* (azx:<1) for (j=1; j<=n; j++) { azy = abs((y :- my[j])/ywid) k = zx2_1 :* (1:-azy:^2) :* (azy:<1) d[j,i] = mean(k,w) } } d = 0.5625/wid*mean(w) :* d // rescale; 0.5625 = 9/16 } else if (kernel=="gaussian") { for (i=1; i<=n; i++) { zx2 = ((x :- mx[i])/xwid):^2 for (j=1; j<=n; j++) { zy2 = ((y :- my[j])/ywid):^2 k = exp(-0.5 :* (zx2 :+ zy2)) d[j,i] = mean(k,w) } } d = 1/(2*pi()*wid)*mean(w) :* d // rescale } else if (kernel=="triangle") { for (i=1; i<=n; i++) { azx = abs((x :- mx[i])/xwid) zx1 = 1:-azx :* (azx:<1) for (j=1; j<=n; j++) { azy = abs((y :- my[j])/ywid) k = zx1 :* (1:-azy) :* (azy:<1) d[j,i] = mean(k,w) } } d = mean(w)/wid :* d // rescale } else if (kernel=="rectangle") { for (i=1; i<=n; i++) { azx = abs((x :- mx[i])/xwid) :< 1 for (j=1; j<=n; j++) { k = azx :* (abs((y :- my[j])/ywid) :< 1) d[j,i] = mean(k,w) } } d = 0.25*mean(w)/wid :* d // rescale } _editmissing(d,0) // replace any . with 0 in d if (sav_graph) { ylab = st_varlabel(yxvar[1]) xlab = st_varlabel(yxvar[2]) dvecn = n^2 // length of vec(d) if (dvecn>N) st_addobs(dvecn-N) nvec = J(n,1,1) newvars = ("_d","_":+yxvar) st_store((1,dvecn), st_addvar("double", newvars), (vec(d),nvec#my,mx#nvec)) st_varlabel(newvars[2], ylab) st_varlabel(newvars[3], xlab) } st_rclear() st_global("return(kernel)", kernel) st_global("return(xvar)", yxvar[2]) st_global("return(yvar)", yxvar[1]) st_numscalar("return(xwidth)", xwid) st_numscalar("return(ywidth)", ywid) st_numscalar("return(xscale)", xyscale[,1]) st_numscalar("return(yscale)", xyscale[,2]) st_numscalar("return(N)", n) if (mname!="") { // put results in global mata matrics mnames = mname:+("_x","_y","_d") for (i=1; i<=3; i++) rmexternal(mnames[i]) *crexternal(mnames[1]) = mx *crexternal(mnames[2]) = my *crexternal(mnames[3]) = d } } end // end of mata