* ! MKER-Multivariate Kernel regression - 09oct2017 - GCerulli
********************************************************************************
*Mata function "mf_kernel_reg.mata"
********************************************************************************
mata: mata clear
mata: mata set matastrict off
version 14
mata:
// mf_kernel_reg 1.2 GCerulli 06may2017  (including cross-validation)
void function mf_kernel_reg(
string scalar xvars,
string scalar response,
string scalar fit,
real scalar bandh,
string scalar touse,
string scalar kernel,
string scalar model)
{
real matrix X, Z, mc, y, ystar, X0, X0WX0, X0WY
real colvector W , W1, wstar, d, b , d_std , d_h , C, ONE
real scalar n, k, i, j
string rowvector vars, v
st_view(X, ., tokens(xvars), touse)
// standardize vars with mm_meancolvar from moremata
mc = mm_meancolvar(X)
Z = (X :- mc[1,.]) :/ sqrt(mc[2,.])
n = rows(X)
k = cols(X)
st_view(y, ., response, touse)
st_view(ystar, ., fit, touse)
wstar=J(n,1,.)
// loop over observations
for(i = 1; i <= n; i++) {
// loop over vars
d = J(n, 1, 0)
for(j = 1; j <= k; j++) {
d = d + ( Z[., j] :- Z[i, j] ) :^2
}
d_std = (d :-min(d)):/(max(d)-min(d)) // 0<=d_std<=1
// bandh = bandwidth
d_h=d_std:/bandh // (x-x0)/h
W=mm_kern(kernel,d_h) // kern((x-x0)/h)
W1=W:/colsum(W)  // W1=Kernel weights sum-up to 1
// WLS
if (model=="linear"){  // begin linear
X0=X[.,.]:-X[i,.]
X0WX0 = cross(X0, 1, W1, X0, 1)
X0WY = cross(X0, 1, W1, y,0)
//b
b = cholsolve(X0WX0,X0WY)
ystar[i] = b[k+1] // it is the regression constant
wstar[i]=W1[i]  // this is the w_ii in the cross-validation formula
} // end linear
if (model=="mean"){  // begin mean
yw=y:*W1
ystar[i] = colsum(yw)
wstar[i]=W1[i]
} // end mean
}
ONE=J(n,1,1)
C=(y-ystar):/(ONE-wstar)
cv=(1/n)*C'C
st_eclear()
st_numscalar("r(CV)", cv)
st_numscalar("r(bandh)", bandh)
}
end
********************************************************************************
* PROGRAM "mkern"
********************************************************************************
program mkern , eclass
version 14
syntax varlist(numeric) [if] [in], y(varname numeric) y_fit(string) h(real) k(string) model(string) [graph]
marksample touse
qui gen `y_fit'=.
mata: mf_kernel_reg("`varlist'", "`y'", "`y_fit'",`h',"`touse'","`k'","`model'")
if "`graph'"!=""{
qui count if `touse'
local N=r(N)
tempvar id
gen `id'=_n if `touse'
local h2=round(`h',0.003)
tw (line `y' `id' if `touse' & `id'<=`N', xtitle("") clpattern(dash) clwidth(thin) clcolor(black)) ///
(line y_fitted `id' if `touse' & `id'<=`N', clpattern(solid) clwidth(thin) clcolor(red)) ,  ///
legend(ring(0) pos(5) order(1 "Row data" 2 "MKERN fit")) ///
note("Bandwidth = `h2'" "Kernel = `k'" "Model = Local `model'")
}
ereturn scalar CV=r(CV)
ereturn scalar band=r(bandh)
ereturn local kern="`k'"
end
********************************************************************************
* PROGRAM "min_cv_mkern"
********************************************************************************
*! min_cv_mkern v2.0.0 GCerulli 11jul2016
capture program drop min_cv_mkern
program min_cv_mkern, eclass
version 14
#delimit;     
syntax varlist [if] [in] [fweight iweight pweight] [,
kern(string)
cvfile(string)
modeltype(string)
graph];
#delimit cr
********************************************************************************
marksample touse
tokenize `varlist'
local y `1'        // y = outcome
macro shift
local xvars `*'    // xvars = covariates
********************************************************************************
qui count
local N_obs=r(N)
qui count if `touse'
local N=r(N)
********************************************************************************
* COMPUTE THE "CROSS-VALIDATION" PROCEDURE BY GENERATING A GRID 
* MADE OF 500 BANDWIDTHS (FROM 0.01 TO 0.5, BY A STEP OF 0.001)
********************************************************************************
tempname M
mat `M'=J(500,3,.)
local j=1
forvalues b=0.01(0.01)0.5{   
cap drop y_fitted
mkern `xvars' if `touse' , y(`y') y_fit(y_fitted) h(`b') k(`kern') model(`modeltype')
*
mat `M'[`j',1]=`j'
mat `M'[`j',2]=`b'
mat `M'[`j',3]=e(CV)
local j=`j'+1
}
svmat `M' , names(__M)
rename __M1 _step
rename __M2 _bandw
rename __M3 _CV
qui sum _CV , d
ereturn scalar min_CV = r(min)
qui sum _bandw if _CV==r(min)
ereturn scalar opt_bandw = r(mean)
********************************************************************************
* PUT THE RESULT OF THE CROSS-VALIDATION INTO A DATASET CALLED "cvfile"
********************************************************************************
if "`cvfile'"!=""{
preserve
keep _step _bandw _CV 
save `cvfile' , replace
restore
drop _step _bandw _CV
}
else{
drop _step _bandw _CV
}
********************************************************************************
end
********************************************************************************