*! version 4.2 18nov2010 -- Ph. Van Kerm * -- change epan option to epanechnikov * -- add options kernel() and bwidth() for compatibility with Stata 10+ syntax * version 4.1 26aug2010 -- Ph. Van Kerm * -- add cdf variability bands *! version 4.0 10mar2010/13jul2010 -- Ph. Van Kerm * -- add cdf() option * version 2.0 22sep2005 -- Ph. Van Kerm (SJ3-2: st0037, SJ4-1: st0037_1, SJ?-?: st0037_2) * version 1.3 15dec2003 -- Ph. Van Kerm (SJ3-2: st0037, SJ4-1: st0037_1) * version 1.2 05jan2003 -- Ph. Van Kerm * AKDENSITY0 computes adaptive kernel density estimates. * Essentially official -kdensity- hacked to allow adaptive * stuff and report variability bands. * Can be used alone, or as an engine for AKDENSITY * version 1.3 corrects a bug in -ipolate4- * version 2.0 corrects an errorin the formula used for * computing the variability bands for * Epanechnikov kernel. Adds the EPAN2 option * for alternative Epanechnikov Kernel. program define akdensity0, rclass sortpreserve version 7.0 syntax varname [if] [in] [fw aw] , /* */ AT(varname) /* */ Generate(string) /* */ [Width(string) BWidth(string) CDF(string) STDBands(real 0) LAMBDA(string) /* */ EPanechnikov GAUssian EPAN2 Kernel(string) DOUBLE] /** 0. get width: added 20101118 to handle bwidth() and width() **/ local width `width' `bwidth' local k : word count `width' if `k' > 1 { di as err "width() and bwidth() are mutually exclusive" exit 198 } if `k' == 0 { di as err "width() or bwidth() required" exit 198 } **end addition 2010-11-18 /** 1. get var names and labels **/ local ix `"`varlist'"' local ixl: variable label `ix' if `"`ixl'"'=="" { local ixl "`ix'" } /** 2. check validity of options and mark sample **/ local gen `"`generat'"' ** added 2011-11-18 (to update to Stata 11 -kernel()- option) ** copied from kdensity.ado with adjustment to allow only epan epan2 gauss local kernel_old /// `epanechnikov' /// `gaussian' /// `epan2' local k : word count `kernel_old' if `"`kernel'"' == "" { if `k' > 1 { di as err "only one kernel may be specified" exit 198 } if `k' == 0 { local kernel epanechnikov } else { local kernel `kernel_old' } } else { if `k' != 0 { di as err "kernel(): old syntax " /// "may not be combined with new syntax" exit 198 } local k : word count `kernel' if `k' > 1 { di as err "only one kernel may be specified" exit 198 } _get_kernel_name, kernel(`"`kernel'"') local kernel `s(kernel)' if `"`kernel'"' == "" { di as err "invalid kernel function" exit 198 } } // **end addition 2010-11-18 /* local kflag = ( (`"`epan'"' != `""') + (`"`gauss'"' != `""') + (`"`epan2'"' != `""') ) if `kflag' > 1 { di in red `"only one kernel may be specified"' exit 198 } if `"`gauss'"' != `""' { local kernel=`"Gaussian"' } else { if `"`epan2'"' != `""' { local kernel=`"Alternative Epanechnikov"' } else { local kernel=`"Epanechnikov"' } } */ marksample use /* use identifies variables in IF/IN clauses */ qui count if `use' if r(N)==0 { error 2000 } confirm new var `generate' local yl `"`generate'"' local xl `"`at'"' if "`cdf'"~="" { confirm new var `cdf' qui gen `double' `cdf' = . } if "`lambda'"~="" {confirm new var `lambda'} /** 3. prepare the temporary variables **/ tempvar d m z y qui gen `double' `d'=. /* estimated density at m */ qui gen double `y'=. /* value of kernel times weight */ qui gen `z'=. /* distance from current grid point */ qui gen `m'=. /* grid points */ /** 4. count number of grid points, create 'm' and sort in grid order **/ qui count if `at' != . local n = r(N) qui replace `m' = `at' local srtlst : sortedby tempvar obssrt gen `obssrt' = _n sort `m' `obssrt' /** 5. care for the weights: generate the tt variable **/ if "`weight'" != "" { tempvar tt qui gen double `tt' `exp' if `use' qui summ `tt', meanonly if "`weight'" == "aweight" { qui replace `tt' = `tt'/r(mean) } } else { local tt = 1 } /** 6. computation of the density function **/ local i 1 if `"`kernel'"' == `"gaussian"' { * if `"`gauss'"' != `""' { local con1 = sqrt(2*_pi) while `i'<=`n' { qui replace `z'=(`ix'-`m'[`i'])/(`width') if `use' qui replace `y'= (1/`width')*(exp(-0.5*((`z')^2))/`con1') /* kernel divided by width */ qui summ `y' [aw=`tt'] , meanonly qui replace `d'=(r(mean)) in `i' qui replace `y' = . if ("`cdf'"!="") { qui replace `y'= normal(-`z') qui summ `y' [aw=`tt'] , meanonly qui replace `cdf'=(r(mean)) in `i' qui replace `y' = . } local i = `i'+1 } qui replace `d'=0 if `d'==. in 1/`n' } else { if `"`kernel'"' == `"epan2"' { * if `"`epan2'"' != `""' { local con1 = 3/4 local con2 = 1 local con3 = 1/3 while `i'<=`n' { qui replace `z'=(`ix'-`m'[`i'])/(`width') if `use' qui replace `y'= cond(abs(round(`z',1e-8))<=`con2', /* */ (1/`width')*(`con1'*(1-((`z')^2))) , 0) /* */ if `z'~=. qui summ `y' [aw=`tt'] if `use' , meanonly qui replace `d'=(r(mean)) in `i' qui replace `y'=. if ("`cdf'"!="") { qui replace `y'= cond(round(-`z',1e-8)<-`con2', 0, cond(round(-`z',1e-8)>`con2',1, /* */ (0.5 + `con1'*(-`z' - `con3'*(-`z')^3) ) )) /* */ if `z'~=. qui summ `y' [aw=`tt'] , meanonly qui replace `cdf'=(r(mean)) in `i' qui replace `y' = . } local i = `i'+1 } qui replace `d'=0 if `d'==. in 1/`n' } else { local con1 = 3/(4*sqrt(5)) local con2 = sqrt(5) local con3 = 1/15 while `i'<=`n' { qui replace `z'=(`ix'-`m'[`i'])/(`width') if `use' qui replace `y'= cond(abs(round(`z',1e-8))<=`con2', /* */ (1/`width')*(`con1'*(1-(((`z')^2)/5))) , 0) /* */ if `z'~=. qui summ `y' [aw=`tt'] if `use' , meanonly qui replace `d'=(r(mean)) in `i' qui replace `y'=. if ("`cdf'"!="") { qui replace `y'= cond(round(-`z',1e-8)<-`con2', 0, cond(round(-`z',1e-8)>`con2',1, /* */ (0.5 + `con1'*(-`z'-(((-`z')^3)/15))) )) /* */ if `z'~=. qui summ `y' [aw=`tt'] , meanonly qui replace `cdf'=(r(mean)) in `i' qui replace `y' = . } local i = `i'+1 } qui replace `d'=0 if `d'==. in 1/`n' } } /** 7. if lambda required: generate the lambda variable **/ if "`lambda'"~="" { tempvar fipol qui ipolate4 `m' `d' `ix' , generate(`fipol') zero `double' qui means `fipol' [aw=`tt'] if `use' qui gen `double' `lambda' = sqrt(r(mean_g)/`fipol') if `use' } /** 8. if std error bands required: generate the bands **/ if (`stdbands'>0) { tempvar hipol cap confirm var `width' if _rc==0 {qui ipolate4 `ix' `width' `m' , generate(`hipol') epolate} else {qui gen `hipol' = `width'} tempvar sqw tempname totsqw qui gen `sqw' = (`tt'^2) if `use' su `sqw' , meanonly scalar `totsqw' = r(sum) qui replace `sqw' = `tt' if `use' su `sqw' , meanonly if `"`kernel'"' == `"gaussian"' { * if `"`gauss'"' != `""' { loc const = 1/(2*sqrt(_pi)) loc constcdf = 1/sqrt(_pi) /* Hansen lec notes */ } else { if `"`kernel'"' == `"epan2"' { * if `"`epan2'"' != `""' { loc const = 0.6 loc constcdf = 9/35 /* Hansen lec notes */ } else { loc const = 3/(5*sqrt(5)) loc constcdf = 0.57498891 /* pvk calculations */ } } confirm new variable `yl'_up confirm new variable `yl'_lo qui gen `double' `yl'_up = `d' + `stdbands'*(sqrt((`totsqw'/((r(sum))^2))*(`d'/`hipol')*(`const'))) qui gen `double' `yl'_lo = `d' - `stdbands'*(sqrt((`totsqw'/((r(sum))^2))*(`d'/`hipol')*(`const'))) if ("`cdf'"!="") { confirm new variable `cdf'_up confirm new variable `cdf'_lo tempvar varcdf qui gen double `varcdf' = max(0, (`totsqw'/((r(sum))^2)) * ( (`cdf' * (1 - `cdf')) - (`d'*`hipol'*`constcdf') ) ) /* See Hansen lec notes */ qui gen `double' `cdf'_up = `cdf' + `stdbands'* sqrt(`varcdf') qui gen `double' `cdf'_lo = `cdf' - `stdbands'* sqrt(`varcdf') } } /** 9. double save in S_# and r() **/ ret clear ret local kernel `"`kernel'"' global S_1 `"`kernel'"' /** 10. rename and label created vars **/ label var `d' `"density: `ixl'"' rename `d' `yl' if ("`cdf'" != "") { lab var `cdf' `"smooth cdf: `ixl'"' } sort `srtlist' `obssrt' /* re-put the values in original order then by grid */ end program define ipolate4 , sortpreserve version 7 syntax varlist(min=3 max=3), generate(string) [ {Zero|epolate} DOUBLE] tempvar tmp tmppos confirm new var `generate' tokenize `varlist' local initX "`1'" local initY "`2'" local newX "`3'" loc sortvar : sortedby * ce qui est out-of-range (no extrapolation -> mis a missing par deaut (on peut mettre a zero) sort `initX' qui count if `initX'~=. loc nbinit = _result(1) loc i 1 qui gen `double' `generate' = . qui gen `tmppos' = _n preserve keep `initX' `initY' `newX' `generate' `tmppos' qui stack `initX' `initY' `tmppos' `newX' `generate' `tmppos', into(x y pos) clear if "`epolate'"~="" { qui ipolate y x , gen(ynew) epolate } else { qui ipolate y x , gen(ynew) if "`zero'"~="" {replace ynew = 0 if ynew==. & x~=. } } qui keep if _stack==2 rename ynew `tmp' drop y x * rename x `newX' rename pos `tmppos' drop _stack tempfile tmpsav sort `tmppos' qui save `"`tmpsav'"' , replace restore sort `tmppos' qui merge `tmppos' using `"`tmpsav'"' qui replace `generate' = `tmp' drop _merge end