*! fhsae April 28, 2018
* Translated into Stata from R's SAE package by Molina and Marhuenda
* Paul Corral (World Bank Group - Poverty and Equity Global Practice)
* William Seitz (World Bank Group - Poverty and Equity Global Practice)
* Joao Pedro de Azevedo (World Bank Group - Poverty and Equity Global Practice)
* Minh Cong Nguyen (World Bank Group - Poverty and Equity Global Practice)
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
cap set matastrict off
clear mata
program define fhsae, eclass
version 11.2
#delimit ;
syntax varlist (min=1 numeric fv) [if] [in],
REvar(varlist max=1 numeric) method(string)
[FHpredict(string)
precision(real 1e-15)
maxiter(int 100)
FHSEpredict(string)
FHCVpredict(string)
DSEpredict(string)
DCVpredict(string)
AREApredict(string)
GAMMApredict(string)
OUTsample
NONEGative];
#delimit cr
set more off
if upper("`method'")=="CHANDRA"{
_fAyHeRRiot `varlist', revar(`revar') fhpredict(`fhpredict') fhsepredict(`fhsepredict') ///
fhcvpredict(`fhcvpredict') dsepredict(`dsepredict') dcvpredict(`dcvpredict') ///
gammapredict(`gammapredict') `outsample' `nonegative' areapredict(`areapredict')
}
else{
marksample touse11
tokenize `varlist'
local depvar `1'
macro shift
local indeps `*'
//TEMP VARS AND NAMES
tempname beta vcov tomata s2u loglike aic bic numobs aicc
tempvar lev yhat res touse out
//Create variable for out of sample prediction
if("`outsample'"!=""){
qui:gen `out' = `depvar'==.
foreach x of local indeps{
qui:replace `out' = 0 if `x' == .
}
}
//Remove collinear variables
_rmcoll `indeps', forcedrop
local indeps `r(varlist)'
local methods FH REML ML
local method = upper(trim("`method'"))
local okmethod: list methods & method
if ("`okmethod'"==""){
dis as error "Option given under method, not valid" ///
"Valid methods: `methods'; chandra"
error 345566
}
//regression
noi: dis in green "======================================================================================"
noi: dis in yellow "Fay-Herriot Small Area Estimation"
noi: dis in green "======================================================================================"
//Send vectors to mata
qui:sum `revar', d
local p50=r(p50)
local `tomata' revar region depvar indeps
foreach x of local `tomata'{
mata:st_view(`x'=.,.,"``x''", "`touse11'")
}
mata: _Fh=_fhmoment(depvar,indeps,revar,`p50')
mata: st_matrix("`beta'",*(_Fh[1,1]))
mata: st_matrix("`vcov'",*(_Fh[1,2]))
mata: fh = *(_Fh[1,3])
mata: dse= sqrt(revar)
mata: dcv= 100*(dse:/depvar)
mata: st_matrix("`loglike'",*(_Fh[1,4]))
mata: st_matrix("`aic'",*(_Fh[1,5]))
mata: st_matrix("`aicc'",*(_Fh[1,11]))
mata: st_matrix("`bic'",*(_Fh[1,6]))
mata: fhse = sqrt(*(_Fh[1,7]))
mata: fhcv = 100*(fhse:/fh)
mata: gamma = *(_Fh[1,8])
mata: st_matrix("`s2u'", *(_Fh[1,9]))
mata: st_matrix("`numobs'", *(_Fh[1,10]))
noi{
mat rownames `beta' = `indeps' _cons
mat `beta' = `beta''
mat rownames `vcov' = `indeps' _cons
mat colnames `vcov' = `indeps' _cons
}
if("`outsample'"!=""){
mata: st_view(_x1=.,., "`indeps'","`out'")
noi:mata: _out = _fhout(_x1,*(_Fh[1,1]),*(_Fh[1,2]),*(_Fh[1,9]))
mata: fh_out = *(_out[1,1])
mata: fhse_out = *(_out[1,2])
mata: fhcv_out = *(_out[1,3])
}
local predicts fh fhse fhcv dse dcv gamma
foreach x of local predicts{
if("``x'predict'"!=""){
local nm: list sizeof `x'predict
if (`nm'>1){
display as error "Only one variable name for option `x'predict"
error 202020202
}
qui:gen ``x'predict' = .
mata: st_store(.,tokens("``x'predict'"),"`touse11'",`x')
if(("`x'"=="fh"|"`x'"=="fhse"|"`x'"=="fhcv") & "`outsample'"!=""){
mata: st_store(.,tokens("``x'predict'"),"`out'",`x'_out)
}
}
}
//POST RESULTS
ereturn post `beta' `vcov', depname(`depvar') esample(`touse11')
noi ereturn display
// E returns
ereturn scalar N = `numobs'[1,1]
ereturn scalar sigma2u = `s2u'[1,1]
ereturn scalar loglike = `loglike'[1,1]
ereturn scalar aic = `aic'[1,1]
ereturn scalar aicc = `aicc'[1,1]
ereturn scalar bic = `bic'[1,1]
di as text "{hline 61}"
display in ye "`method' Model diagnostics"
di as text "{hline 61}"
display in gr "Number of observations" _col(45) in gr "=" _col(50) in ye e(N)
display in gr "Estimated random effects variance" _col(45) in gr "=" _col(50) in ye e(sigma2u)
display in gr "Log Likelihood" _col(45) in gr "=" _col(50) in ye e(loglike)
display in gr "AIC" _col(45) in gr "=" _col(50) in ye e(aic)
display in gr "AICc" _col(45) in gr "=" _col(50) in ye e(aicc)
display in gr "BIC" _col(45) in gr "=" _col(50) in ye e(bic)
di as text "{hline 61}"
}
end
program define _fAyHeRRiot, eclass
version 11.2
#delimit ;
syntax varlist (min=1 numeric fv) [if] [in],
REvar(varlist max=1 numeric)
[FHpredict(string)
FHSEpredict(string)
FHCVpredict(string)
DSEpredict(string)
DCVpredict(string)
AREApredict(string)
GAMMApredict(string)
OUTsample
NONEGative];
#delimit cr
set more off
marksample touse11
tokenize `varlist'
local depvar `1'
macro shift
local indeps `*'
//TEMP VARS AND NAMES
tempname beta vcov tomata s2u
tempvar lev yhat res touse out
//Create variable for out of sample prediction
if("`outsample'"!=""){
qui:gen `out' = `depvar'==.
foreach x of local indeps{
qui:replace `out' = 0 if `x' == .
}
}
//Remove collinear variables
_rmcoll `indeps', forcedrop
local indeps `r(varlist)'
//regression
noi: dis in green "=============================================================================="
noi: dis in yellow "Fay-Herriot Model"
noi: dis in green "=============================================================================="
noi: regress `depvar' `indeps' if `touse11'==1
scalar er2a=e(r2_a)
scalar er2=e(r2)
scalar ef = e(F)
scalar enum = e(N)
//Predict vectors
qui{
gen `touse'=e(sample)
predict `lev', hat
predict `yhat', xb
predict `res', res
local `tomata' lev res revar region depvar indeps
foreach x of local `tomata'{
mata:st_view(`x'=.,.,"``x''", "`touse'")
}
//Send to mata for FH
noi{
mata: _Fh = _fh(depvar, indeps, revar, res, lev)
mata: fhse = *(_Fh[1,1])
mata: fhcv = *(_Fh[1,2])
mata: dse = *(_Fh[1,3])
mata: dcv = *(_Fh[1,4])
mata: st_matrix("`beta'",*(_Fh[1,5]))
mata: st_matrix("`vcov'",*(_Fh[1,6]))
mata: fh = *(_Fh[1,7])
mata: area = *(_Fh[1,8])
mata: st_matrix("`s2u'", *(_Fh[1,9]))
mata: gamma = *(_Fh[1,10])
}
noi{
mat rownames `beta' = `indeps' _cons
mat `beta' = `beta''
mat rownames `vcov' = `indeps' _cons
mat colnames `vcov' = `indeps' _cons
}
if("`outsample'"!=""){
mata: st_view(_x1=.,., "`indeps'","`out'")
noi:mata: _out = _fhout(_x1,*(_Fh[1,5]),*(_Fh[1,6]),*(_Fh[1,9]))
mata: fh_out = *(_out[1,1])
mata: fhse_out = *(_out[1,2])
mata: fhcv_out = *(_out[1,3])
}
local predicts fh fhse fhcv dse dcv area gamma
foreach x of local predicts{
if("``x'predict'"!=""){
local nm: list sizeof `x'predict
if (`nm'>1){
display as error "Only one variable name for option `x'predict"
error 202020202
}
gen ``x'predict' = .
mata: st_store(.,tokens("``x'predict'"),"`touse'",`x')
if(("`x'"=="fh"|"`x'"=="fhse"|"`x'"=="fhcv") & "`outsample'"!=""){
mata: st_store(.,tokens("``x'predict'"),"`out'",`x'_out)
}
}
}
ereturn post `beta' `vcov', depname(`depvar') esample(`touse')
noi display in yellow "GLS coefficients"
noi ereturn display
ereturn scalar sigma2u = `s2u'[1,1]
ereturn scalar r2_a = er2a
ereturn scalar r2 = er2
ereturn scalar F = ef
ereturn scalar N = enum
}
end
mata
function _fhmoment(y, x, sigma2, Aes){
pointer(real matrix) rowvector _fhval
_fhval = J(1,11,NULL)
x=x,J(rows(x),1,1)
noneg = (st_local("nonegative")!="")
method = st_local("method")
if (method=="FH") xb = _fhopti(y,x,sigma2,Aes)
if (method=="ML") xb = _mlopti(y,x,sigma2,Aes)
if (method=="REML") xb = _remlopti(y,x,sigma2,Aes)
_beta = *xb[1,1]
_varbeta = *xb[1,2]
Aes = *xb[1,3]
yhat = quadcross(x',_beta)
if (noneg==1) yhat = yhat:*(yhat:>0)
resi = y-yhat
_eblup = yhat + (Aes:*(1:/(Aes:+sigma2)):*resi)
//Goodness of fit
m = rows(y)
p = cols(x)
_loglike = (-.5)*quadsum(log(2*pi()*(Aes:+sigma2))+((resi:^2):/(Aes:+sigma2)))
_aic = (-2)*_loglike+2*(p+1)
_bic = (-2)*_loglike+(p+1)*log(m)
_aicc = _aic + (2*(p^2)+2*p)/(m-p-1)
//MSE
vi = (1:/(Aes:+sigma2))
bd = (sigma2:/(Aes:+sigma2))
sumad2 = quadsum(vi:^2)
sumad = quadsum(vi)
if (method=="FH"){
vara = 2*m/(sumad^2)
b = 2*(m*sumad2 -(sumad^2))/(sumad^3)
g1d = sigma2:*(1:-bd)
g2d = (bd:^2):*quadrowsum((quadcross(x',(_varbeta)):*x))
g3d = (bd:^2):*(vara:/(Aes:+sigma2))
_mse = g1d + g2d + 2*g3d - b*(bd:^2)
}
if (method=="ML"){
vara = 2/sumad2
b = (-1)*quadsum(diagonal(quadcross(_varbeta,quadcross(x,(vi:^2),x))))/sumad2
g1d = sigma2:*(1:-bd)
g2d = (bd:^2):*quadrowsum((quadcross(x',(_varbeta)):*x))
g3d = (bd:^2):*(vara:/(Aes:+sigma2))
_mse = g1d + g2d + 2*g3d - b*(bd:^2)
}
if (method=="REML"){
vara = 2/sumad2
g1d = sigma2:*(1:-bd)
g2d = (bd:^2):*quadrowsum((quadcross(x',(_varbeta)):*x))
g3d = (bd:^2):*(vara:/(Aes:+sigma2))
_mse = g1d + g2d + 2*g3d
}
_fhval[1,1] = &(_beta) //beta
_fhval[1,2] = &(_varbeta) //vcov
_fhval[1,3] = &(_eblup) //eblup fh estimates
_fhval[1,4] = &(_loglike) //loglikelihood
_fhval[1,5] = &(_aic) //AIC
_fhval[1,6] = &(_bic) //BIC
_fhval[1,7] = &(_mse) //MSE
_fhval[1,8] = &(bd) //Gamma
_fhval[1,9] = &(Aes) //sigma2u
_fhval[1,10] = &(m) //Num obs
_fhval[1,11] = &(_aicc) //AICc
return(_fhval)
}
function _fhout(_x1,_bgls,_vcovgls,sigma2u){
pointer(real matrix) rowvector _fhout1
_fhout1 = J(1,3,NULL)
_x1 = _x1,J(rows(_x1),1,1)
noneg = (st_local("nonegative")~="")
yhat = quadcross(_x1',_bgls)
if (noneg==1) yhat = yhat:*(yhat:>0)
_fhout1[1,1] = &(yhat)
_fhout1[1,2] = &(sqrt(quadrowsum((quadcross(_x1',(_vcovgls)):*_x1)):+ sigma2u))
_fhout1[1,3] = &(100*((*_fhout1[1,2]):/(*_fhout1[1,1])))
return(_fhout1)
}
function _fhopti(y,x,sigma2,Aes){
pointer(real matrix) rowvector _fhout
_fhout = J(1,3,NULL)
prec = strtoreal(st_local("precision"))
diff = prec + 1
m = rows(y)
p = cols(x)
maxiter = strtoreal(st_local("maxiter"))
k=0
while((diff>prec) &(kprec) display("CAUTION: Convergence not achieved")
k=k+1
Aes = max((Aes[k],0))
_varbeta = invsym(quadcross(x,(1:/(Aes:+sigma2)),x))
_beta = quadcross(_varbeta,quadcross(x,(1:/(Aes:+sigma2)),y))
_fhout[1,1] = &(_beta)
_fhout[1,2] = &(_varbeta)
_fhout[1,3] = &(Aes)
return(_fhout)
}
function _mlopti(y,x,sigma2,Aes){
pointer(real matrix) rowvector _fhout
_fhout = J(1,3,NULL)
prec = strtoreal(st_local("precision"))
diff = prec + 1
m = rows(y)
p = cols(x)
maxiter = strtoreal(st_local("maxiter"))
k=0
while((diff>prec) &(kprec) display("CAUTION: Convergence not achieved")
k=k+1
Aes = max((Aes[k],0))
_varbeta = invsym(quadcross(x,(1:/(Aes:+sigma2)),x))
_beta = quadcross(_varbeta,quadcross(x,(1:/(Aes:+sigma2)),y))
_fhout[1,1] = &(_beta)
_fhout[1,2] = &(_varbeta)
_fhout[1,3] = &(Aes)
return(_fhout)
}
function _remlopti(y,x,sigma2,Aes){
pointer(real matrix) rowvector _fhout
_fhout = J(1,3,NULL)
prec = strtoreal(st_local("precision"))
diff = prec + 1
m = rows(y)
p = cols(x)
maxiter = strtoreal(st_local("maxiter"))
k=0
while((diff>prec) &(kprec) display("CAUTION: Convergence not achieved")
k=k+1
Aes = max((Aes[k],0))
_varbeta = invsym(quadcross(x,(1:/(Aes:+sigma2)),x))
_beta = quadcross(_varbeta,quadcross(x,(1:/(Aes:+sigma2)),y))
_fhout[1,1] = &(_beta)
_fhout[1,2] = &(_varbeta)
_fhout[1,3] = &(Aes)
return(_fhout)
}
function _fh(y, x, sigma2, eps, lv){
pointer(real matrix) rowvector _fhval
_fhval = J(1,10,NULL)
x=x,J(rows(x),1,1)
dof1 = cols(x)
noneg = (st_local("nonegative")~="")
//The shrinkage component
delta = quadsum(eps:^2) - quadsum((sigma2:*(1:-lv)))
sigma2u = delta/(rows(x)-dof1)
if (sigma2u<0){
sigma2u=0
V=sigma2
}
else{
V = sigma2:+sigma2u
}
b_gls = _fheblup(V,x,y)
_beta = *b_gls[1,1]
_varbeta = *b_gls[1,2]
yhat = quadcross(x',_beta)
if (noneg==1) yhat = yhat:*(yhat:>0)
//shrinkage estimator
gamma = sigma2u:/(V)
//EBLUP
FH = (gamma:*y)+((1:-gamma):*yhat)
//Random area effects
a_eff = gamma:*(y-yhat)
//MSE-estimate
g1 = gamma:*sigma2
g2 = ((1:-gamma):^2):*quadrowsum((quadcross(x',(_varbeta)):*x))
vsig = 2*rows(x)/(quadsum(1:/V)^2)
g3 = vsig:*((sigma2:^2):/(V:^3))
mse_fh = g1+g2+(2:*g3)
//FHSE
_fhval[1,1] = &(sqrt(mse_fh))
//FHCV
_fhval[1,2] = &(100:*(sqrt(mse_fh):/FH))
//DIRECT SE
_fhval[1,3] = &(sqrt(sigma2))
//DIRECT CV
_fhval[1,4] = &(100:*(sqrt(sigma2):/y))
//Betas
_fhval[1,5] = &(_beta)
//VCOV
_fhval[1,6] = &(_varbeta)
//FH
_fhval[1,7] = &(FH)
//Area effect
_fhval[1,8] = &(a_eff)
//Sigma2u
_fhval[1,9] = &(sigma2u)
//gamma
_fhval[1,10] = &(gamma)
return(_fhval)
}
function _fheblup(_v,_x,_y){
pointer(real matrix) rowvector fhout
fhout = J(1,2,NULL)
_varbeta = invsym(quadcross(_x,(1:/_v),_x))
_beta = quadcross(_varbeta,quadcross(_x,(1:/_v),_y))
fhout[1,1] = &(_beta)
fhout[1,2] = &(_varbeta)
return(fhout)
}
end