//s2sc and povmap mata functions *! ls2sc_povmap 0.5.3 27 Apr 2017 *! Copyright (C) World Bank 2016-17 - Minh Cong Nguyen & Paul Andres Corral Rodas *! Minh Cong Nguyen - mnguyen3@worldbank.org *! Paul Andres Corral Rodas - pcorralrodas@worldbank.org * This program is free software: you can redistribute it and/or modify * it under the ter ms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program. If not, see . version 11 mata: mata clear mata drop *() mata set matalnum on mata set mataoptimize on string scalar lsae_povmapStataVersion() return("`c(stata_version)'") string scalar lsae_povmapVersion() return("00.05.00") //0- Estimation functions //Main function void _s2cs_base0(string scalar yvar, string scalar xvar, string scalar zvar, string scalar yhat, string scalar yhat2, string scalar sae1, string scalar wgt, string scalar idvar, string scalar touse) { //real scalar locationeff, y = st_data(.,tokens(yvar), touse) x = st_data(.,tokens(xvar), touse) z1 = st_data(.,tokens(zvar), touse) yh = st_data(.,tokens(yhat), touse) yh2 = st_data(.,tokens(yhat2), touse) wt = st_data(.,tokens(wgt), touse) area = st_data(.,tokens(sae1), touse) id = st_data(.,idvar,touse) sim = strtoreal(st_local("rep")) seed = strtoreal(st_local("seed")) vce = strtoreal(st_local("vceopt")) EB = strtoreal(st_local("ebest")) hheff = strtoreal(st_local("hheffs")) h3 = strtoreal(st_local("h3")) betaout = st_local("matbeta") if (st_local("nolocation")~="") h3 = 2 boots = strtoreal(st_local("boots")) etanorm = strtoreal(st_local("etanormal")) epsnorm = strtoreal(st_local("epsnormal")) if (boots==1 & (strtoreal(st_local("psu1"))==1)) psu = st_data(.,st_local("psu"),touse) external bsim, asim, maxA, varr, sigma, v_sigma, allest, sigma_eps, hherr, locerr, locerr2, bs //sort data if (boots==1 & (strtoreal(st_local("psu1"))==1)) { data = area, id, y, x, z1, yh, yh2, wt, psu _sort(data, (1,cols(data),2)) y = &(data[.,3]) x = &(data[.,4..4+cols(x)-1], J(rows(x),1,1)) z1 = &(data[.,3+cols(*x)..3+cols(*x)+cols(z1)-1]) yh = &(data[.,3+cols(*x)+cols(*z1)..3+cols(*x)+cols(*z1)+cols(yh)-1]) yh2 = &(data[.,3+cols(*x)+cols(*z1)+cols(*yh)..3+cols(*x)+cols(*z1)+cols(*yh)+cols(yh2)-1]) wt = &(data[.,cols(data)-1]) area = &(data[.,1]) psu = &(data[.,cols(data)]) info = panelsetup(*area,1) info2= panelsetup(*psu,1) if (rows(info)>rows(info2)) _error("Your specified PSU variable is at a higher level than your cluster variable") } else { data = area, id, y, x, z1, yh, yh2, wt _sort(data, (1,2)) y = &(data[.,3]) x = &(data[.,4..4+cols(x)-1], J(rows(x),1,1)) z1 = &(data[.,3+cols(*x)..3+cols(*x)+cols(z1)-1]) yh = &(data[.,3+cols(*x)+cols(*z1)..3+cols(*x)+cols(*z1)+cols(yh)-1]) yh2 = &(data[.,3+cols(*x)+cols(*z1)+cols(*yh)..3+cols(*x)+cols(*z1)+cols(*yh)+cols(yh2)-1]) wt = &(data[.,cols(data)]) area = &(data[.,1]) psu = &(data[.,1]) info = panelsetup(*area,1) info2 = info } //return results //return a matrix of pointers - //row1: bOLS, vbOLS, rmse, r2 fstat, adjr2, Nobs, degree of freedom //row2: aOLS, vaOLS, rmse, r2 fstat, adjr2, Nobs, degree of freedom //row3: sigma_eta2, sigma_eps2, ratio eta/overall, var of sigmaeta2, maxA, varr //row4: bGLS, vbGLS allest = _f_s2sc_estall_eb(*y, *x, *z1, *yh, *yh2, *wt, *area, info, sim, seed, h3, vce, EB, 0, touse) // _f_s2sc_estall_eb(y, x, z, wt, area, info, sim, seed,henderson,hhe) if (st_local("nooutput")=="") { st_matrix("_bols", (*allest[1,1])') st_matrix("_vols", (*allest[1,2])) st_matrix("_rmse_beta", (*allest[1,4])) st_matrix("_r2_beta", (*allest[1,5])) st_matrix("_F_beta", (*allest[1,6])) st_matrix("_r2a_beta", (*allest[1,7])) st_matrix("_N_beta", (*allest[1,8])) st_matrix("_dfm_beta", (*allest[1,9])) if (hheff==1) { //Alfa --- Need to include no location option! st_matrix("_bzols", (*allest[2,1])') st_matrix("_vzols", (*allest[2,2])) st_matrix("_rmse_alpha", (*allest[2,4])) st_matrix("_r2_alpha", (*allest[2,5])) st_matrix("_F_alpha", (*allest[2,6])) st_matrix("_r2a_alpha", (*allest[2,7])) st_matrix("_N_alpha", (*allest[2,8])) st_matrix("_dfm_alpha", (*allest[2,9])) } //Variance if (h3==1) { //Henderson st_matrix("_v_eta", (*allest[3,1])) st_matrix("_v_eps", (*allest[3,2])) st_matrix("_v_ratio", (*allest[3,3])) } if (h3==0) { //ELL st_matrix("_v_eta", (*allest[3,1])) st_matrix("_v_eps", (*allest[3,2])) st_matrix("_v_ratio", (*allest[3,3])) st_matrix("_fvar_sigeta2", (*allest[3,4])) //st_matrix("_svar_sigeta2", (*allest[3,5])) } if (h3==2) { //No location st_matrix("_v_eps", (*allest[1,4]:^2)) } //GLS Results st_matrix("_bgls", (*allest[4,1])') st_matrix("_vgls", (*allest[4,2])) } //st_matrix("Zn", *allest[4,3]) //Coefficients for 2nd stage if (sim>0) { if (boots==0) { //get betas parametric if (h3==1) _error("Parametric drawing with H3 option is not allowed") if (EB==1) _error("Parametric drawing with EB option is not allowed") rseed(seed) hherr = locerr = asim = v_sigma = maxA = varr = sigma_eps = sigma = J(1,1,.) //empty matrix storing purpose - which can be overwritten later bsim = _f_drawnorm(sim, *allest[4,1]', *allest[4,2], seed) //for sigma eta if (epsnorm==0) hherr = *allest[4,4] //epsnorm 1 requires the census!! sigma = *allest[3,1] //sigma_eta2 if (*allest[3,4]==.) v_sigma = *allest[3,5] //simulated variance of sigma_eta2 else v_sigma = *allest[3,4] //parametric variance of sigma_eta2 if (etanorm==1) locerr = _f_gammadraw(sigma, v_sigma, sim) else locerr = (*allest[4,3])[.,2] //for sigma epsilon if (hheff==1) { //with household effect asim = _f_drawnorm(sim, *allest[2,1]', *allest[2,2], seed) maxA = J(sim, 1, *allest[3,6]) varr = J(sim, 1, *allest[3,7]) } if (hheff==0) { //without household effect if (h3==1) sigma_eps = J(sim, 1, (*allest[1,4]-*allest[3,1])) if (h3==0) sigma_eps = J(sim, 1, (*allest[3,2])) if (h3==2) sigma_eps = J(sim, 1, (*allest[1,4])) } } //End of parametric option else { //Get via bootstrap: bs = _f_bootstrap_estall(*y, *x, *z1, *yh, *yh2, *wt, *area, info, info2, sim, seed, h3, vce, EB, *psu, touse) if ((EB==1) & ((etanorm==0)|(epsnorm==0))) { display("I've changed your selection from semi-parametric to normal. When EB is chosen, only normal eta and epsilon drawings are allowed") etanorm=1 epsnorm=1 } varr = maxA =sigma_eps= J(sim,1,.) //Ask again why bs[1] here bsim = J(sim,cols((*(*bs[1])[4,1])'),.) if (hheff==1) asim = J(sim,cols((*(*bs[1])[2,1])'),.) //fill up with randomly drawn sigma etas if (EB==1) { locerr = (*(*bs[1])[4,3])[.,1] locerr2 = . } else { //(EB==0) if (etanorm==1) locerr = J(sim,1,.) else locerr = J(rows(info),sim,.) //(etanorm==0) } //if (etanorm==0) locerr = J(rows(info),sim,.) if (epsnorm==0) hherr = J(rows((*(*bs[1])[4,4])),sim,.) for(i=1;i<=sim;i++) { bsim[i,.] = (*(*bs[i])[4,1])' if (EB==1) { locerr = locerr, (*(*bs[i])[4,3])[|.,2 \ .,3|] locerr2 = locerr2, (0, (*(*bs[i])[3,1])) } else { //EB==0 if (etanorm==1) locerr[i,1] = (*(*bs[i])[3,1]) else locerr[.,i] = (*(*bs[i])[4,3])[.,2] //etanorm==0 } if (hheff==1) { asim[i,.] = (*(*bs[i])[2,1])' if (epsnorm==1) { maxA[i,1] = *(*bs[i])[3,6] varr[i,1] = *(*bs[i])[3,7] } } else { if (epsnorm==1) { if (h3==0) sigma_eps[i,1] = *(*bs[i])[3,2] if (h3==1) sigma_eps[i,1] = *(*bs[i])[1,4] - *(*bs[i])[3,1] if (h3==2) sigma_eps[i,1] = *(*bs[i])[1,4] } } if (epsnorm==0) hherr[.,i] = *(*bs[i])[4,4] } //sim } ////Get via bootstrap: } //sim>0 //st_matrix("Zn", (*(*bs[1])[4,4])) //st_matrix("Zn", mean(*asim)) //write the bsim to file - with this order (7 items): asim, sigma, v_sigma, maxA, varr, bsim, sigma_eps if ((st_local("stage")=="first") & (st_local("matbeta")!="")) { fh = fopen(betaout, "rw") fputmatrix(fh, asim) fputmatrix(fh, sigma) fputmatrix(fh, v_sigma) fputmatrix(fh, maxA) fputmatrix(fh, varr) fputmatrix(fh, bsim) fputmatrix(fh, sigma_eps) fclose(fh) } } // We call upon the Henderson or ELL function for 1st stage function _f_s2sc_estall_eb( real matrix y, real matrix x, real matrix z, real matrix yh, real matrix yh2, real matrix wt, real matrix area, real matrix info, real scalar sim, real scalar seed, real scalar henderson, real scalar vceopt, real scalar EB, real scalar bs, string scalar touse) { pointer(real matrix) rowvector estout estout = J(10,25,NULL) rinfo = rows(info) //OLS b_ols = _f_wols(y, x, wt, info, vceopt, bs) yhs = quadcross(x',*b_ols[1,1]) //RESULTS FROM OLS that are passed on to Stata and other functions estout[1,1] = &(*b_ols[1,1]) //Beta model Betas estout[1,2] = &(*b_ols[1,2]) //Beta model VCOV estout[1,4] = &(sqrt(*b_ols[1,4])) //RMSE estout[1,5] = &(*b_ols[1,5]) //R2 estout[1,6] = &(*b_ols[1,6]) //F-Stat estout[1,7] = &(*b_ols[1,7]) //Adj. R2 estout[1,8] = &(*b_ols[1,8]) //Num Obs estout[1,9] = &(*b_ols[1,9]) //Degrees of freedom (K-1) //order matters for z - check alpha variables if ((allof(z,.)==1) & (allof(yh,.)==1) & (allof(yh2,.)==1)) { //0,0,0 hheffect = 0 } else { yhs2 = yhs:^2 if ((allof(z,.)==1) & (allof(yh,.)==1) & (allof(yh2,.)==0)) zn = yhs2:*yh2 //0,0,1 if ((allof(z,.)==1) & (allof(yh,.)==0) & (allof(yh2,.)==1)) zn = yhs :*yh //0,1,0 if ((allof(z,.)==1) & (allof(yh,.)==0) & (allof(yh2,.)==0)) zn = yhs :*yh, yhs2:*yh2 //0,1,1 if ((allof(z,.)==0) & (allof(yh,.)==1) & (allof(yh2,.)==1)) zn = z //1,0,0 if ((allof(z,.)==0) & (allof(yh,.)==1) & (allof(yh2,.)==0)) zn = z, yhs2:*yh2 //1,0,1 if ((allof(z,.)==0) & (allof(yh,.)==0) & (allof(yh2,.)==1)) zn = z, yhs :*yh //1,1,0 if ((allof(z,.)==0) & (allof(yh,.)==0) & (allof(yh2,.)==0)) zn = z, yhs :*yh, yhs2:*yh2 //1,1,1 zn = zn, J(rows(zn),1,1) hheffect = 1 } //Henderson III or ELL or no location effect if (henderson==1) { //Henderson III H3 = _f_henderson_p1(y, x, wt, area, *b_ols[1,3], info, 1) //SIGMAS from HENDERSON estout[3,1] = &(*H3[1,2]) //SIGMA ETA estout[3,2] = &(*H3[1,1]) //SIGMA EPSilon estout[3,3] = &(*H3[1,2]/(*b_ols[1,4])) //Ratio of Variance over MSE estout[3,4] = &(*H3[1,3]) //Ehats //USE HENDERSON RESULTS FOR ALPHA MODEL if (hheffect==1) { ech = *H3[1,3] maxA = 1.05*max(ech:^2) lhslog = ln((ech:^2):/(maxA:-(ech:^2))) alpha_ols = _f_wols(lhslog, zn, wt, info, vceopt, bs) //RESULTS FROM ALFA estout[2,1] = &(*alpha_ols[1,1]) //Beta estout[2,2] = &(*alpha_ols[1,2]) //VCOV estout[2,4] = &(sqrt(*alpha_ols[1,4])) //RMSE estout[2,5] = &(*alpha_ols[1,5]) //R2 estout[2,6] = &(*alpha_ols[1,6]) //F-Stat estout[2,7] = &(*alpha_ols[1,7]) //Adj. R2 estout[2,8] = &(*alpha_ols[1,8]) //Num Oalphas estout[2,9] = &(*alpha_ols[1,9]) //Degrees of freedom (K-1) bigb = exp(quadcross(zn',*alpha_ols[1,1])) var_r = *alpha_ols[1,4] sige2 = (maxA*(bigb:/(1:+bigb))) + (0.5*var_r)*((maxA*(bigb:*(1:-bigb))):/((1:+bigb):^3)) sige2 = *H3[1,1]*(sige2/mean(sige2)) estout[3,6] = &(maxA) estout[3,7] = &(var_r) } else { //hheffect==0 sige2 = J(*b_ols[1,8], 1, (*b_ols[1,4]-*H3[1,2])) } //GLS ROY b_gls0 = _f_hh_gls2(y, x, wt, sige2, *H3[1,2], info, 1,bs) //RESULTS FROM GLS H&H estout[4,1] = &(*b_gls0[1,1]) estout[4,2] = &(*b_gls0[1,2]) if (EB==1) { ebout = _f_ebmatch(*H3[1,2], wt, sige2, info, area, *b_gls0[1,3]) estout[4,3] = &(*ebout[1,1]) //ETA estout[4,4] = &(*ebout[1,2]) //EPS } else { noebout = _f_hhech(*b_gls0[1,3], wt, info, area) estout[4,3] = &(*noebout[1,1]) //ETA estout[4,4] = &(*noebout[1,2]) //EPS } } //end of Henderson III if (henderson==0) { //FOR ELL //obtain sigma_eta2 and sampling variance of sigma_eta2 sigeta = _ell_sigeta(*b_ols[1,3], wt, info, 0) estout[3,1] = &(*sigeta[1,1]) //Var of ETA //Variance of epsilon estout[3,2] = &(*b_ols[1,4] - *sigeta[1,1]) //varepsilon estout[3,3] = &(*sigeta[1,1]/(*b_ols[1,4])) //ratiov estout[3,4] = &(*sigeta[1,2]) //parametric variance of sigma_eta2 if (hheffect==1) { //sige2 ech = J(rows(*b_ols[1,3]),1,.) for (i=1; i<=rinfo; i++) { res1 = panelsubmatrix(*b_ols[1,3],i,info) m0 = info[i,1],1 \ info[i,2],1 ech[|m0|] = res1 :- mean(res1) //if (i==1) area[i,.] ,mean(res1) //Future //wt1 = panelsubmatrix(wt,i,info) //ech[info[i,1]..info[i,2],1] = res1 :- mean(res1, wt1) //if (i==1) area[i,.] ,mean(res1,wt1), quadsum(wt1) //else dump=dump \ (area[i,.] ,mean(res1,wt1), quadsum(wt1)) } //Standardize Residuals ech = ech*(sqrt(*estout[3,2])/sqrt(quadvariance(ech))) //Alpha OLS maxA = 1.05*max(ech:^2) lhslog = ln((ech:^2):/(maxA:-(ech:^2))) alpha_ols = _f_wols(lhslog, zn, wt, info,vceopt, bs) //RESULTS FROM ALFA estout[2,1] = &(*alpha_ols[1,1]) //Beta estout[2,2] = &(*alpha_ols[1,2]) //VCOV estout[2,4] = &(sqrt(*alpha_ols[1,4])) //RMSE estout[2,5] = &(*alpha_ols[1,5]) //R2 estout[2,6] = &(*alpha_ols[1,6]) //F-Stat estout[2,7] = &(*alpha_ols[1,7]) //Adj. R2 estout[2,8] = &(*alpha_ols[1,8]) //Num Oalphas estout[2,9] = &(*alpha_ols[1,9]) //Degrees of freedom (K-1) bigb = exp(quadcross(zn',*alpha_ols[1,1])) var_r = *alpha_ols[1,4] sige2 = (maxA*(bigb:/(1:+bigb))) + (0.5*var_r)*((maxA*(bigb:*(1:-bigb))):/((1:+bigb):^3)) estout[3,6] =&(maxA) estout[3,7] =&(var_r) } else { //hheffect==0 sige2 = J(*b_ols[1,8], 1, *estout[3,2]) } //GLS //b_gls0 = _f_ell_gls(y, x, wt,info, sige2, *sigeta[1,1]) b_gls0 = _f_hh_gls2(y, x, wt, sige2, *sigeta[1,1], info, EB, bs) //RESULTS FROM GLS ELL estout[4,1] = &(*b_gls0[1,1]) estout[4,2] = &(*b_gls0[1,2]) if (EB==1) { ebout = _f_ebmatch(*sigeta[1,1], wt, sige2, info, area, *b_gls0[1,3]) estout[4,3] = &(*ebout[1,1]) //ETA estout[4,4] = &(*ebout[1,2]) //EPS } else { noebout = _f_hhech(*b_gls0[1,3], wt, info, area) estout[4,3] = &(*noebout[1,1]) //ETA estout[4,4] = &(*noebout[1,2]) //EPS } } //end of ELL if (henderson==2) { //No location effect EB=0 //Notice that there's no adjustment made if (hheffect==1) { ech = *b_ols[1,3] //Alpha OLS maxA = 1.05*max(ech:^2) lhslog = ln((ech:^2):/(maxA:-(ech:^2))) alpha_ols = _f_wols(lhslog, zn, wt, info, vceopt, bs) //RESULTS FROM ALFA estout[2,1] = &(*alpha_ols[1,1]) //Beta estout[2,2] = &(*alpha_ols[1,2]) //VCOV estout[2,4] = &(sqrt(*alpha_ols[1,4])) //RMSE estout[2,5] = &(*alpha_ols[1,5]) //R2 estout[2,6] = &(*alpha_ols[1,6]) //F-Stat estout[2,7] = &(*alpha_ols[1,7]) //Adj. R2 estout[2,8] = &(*alpha_ols[1,8]) //Num Oalphas estout[2,9] = &(*alpha_ols[1,9]) //Degrees of freedom (K-1) estout[2,10]= &maxA bigb = exp(quadcross(zn',*alpha_ols[1,1])) var_r = *alpha_ols[1,4] sige2 = (maxA*(bigb:/(1:+bigb))) + (0.5*var_r)*((maxA*(bigb:*(1:-bigb))):/((1:+bigb):^3)) } else { sige2=J(*b_ols[1,8], 1, *b_ols[1,4]) } //GLS //b_gls0 = _f_ell_gls(y, x, wt,info, sige2, *sigeta[1,1]) b_gls0 = _f_hh_gls2(y, x, wt, sige2, 0, info, EB,1) //RESULTS FROM GLS ELL estout[4,1] = &(*b_gls0[1,1]) estout[4,2] = &(*b_gls0[1,2]) } //henderson ==2 if ((strtoreal(st_local("alfatest")))==1 & bs==0) { st_store(.,st_varindex(tokens(st_local("alfaerr"))), touse, lhslog) st_store(.,st_varindex(tokens(st_local("exbeta"))), touse, yhs ) } return(estout) } // Prepares vcov matrix for Henderson function _f_henderson_p1(real matrix y, real matrix x, real matrix wt, real matrix area, real matrix ehat, real matrix info, real scalar EB) { pointer(real matrix) rowvector sigout2 sigout2 = J(1,5,NULL) sxw = quadcross(x,wt,x) sxw2 = quadcross(x,(wt:^2),x) rinfo = rows(info) //These are to be filled in the loop xdm = J(rows(x), cols(x), 0) ydm = J(rows(x), 1, 0) sumw = sumw2 = ebar = J(rinfo,1,0) xbar = J(rinfo,cols(x),0) sse = sw2 = 0 for (i=1; i<=rinfo; i++) { x1 = panelsubmatrix(x,i,info) y1 = panelsubmatrix(y,i,info) wt1 = panelsubmatrix(wt,i,info) sw2 = sw2 + quadsum((wt1:^2))/quadsum(wt1) m1 = info[i,1] m2 = info[i,2] m0 = m1,1 \ m2,1 ydm[|m0|] = y1 :- mean(y1,wt1) xbar[i,.] = mean(x1,wt1) m0 = m1,. \ m2,. xdm[|m0|] = x1 :- xbar[i,.] sumw[i] = quadsum(wt1) if (EB == 1) { sumw2[i] = quadsum((wt1:^2)) ebar[i] = mean(panelsubmatrix(ehat,i,info),wt1) } } //remove 0 vector columns for inversion xdm = select(xdm,(colsum((xdm:==0)):~=rows(xdm))) sxdmw = quadcross(xdm,wt,xdm) _invsym(sxdmw) sxdmw2 = quadcross(xdm,(wt:^2),xdm) yxdmw = quadcolsum((wt:*ydm):*xdm) _invsym(sxw) sse = quadcross(wt,(ydm:^2)) - quadcross(quadcross(yxdmw', sxdmw)',yxdmw') t2 = trace(quadcross(sxdmw',sxdmw2)) t3 = trace(quadcross(sxw,sxw2)) t4 = trace(sxw*quadcross(xbar,(sumw:^2),xbar)) sigma2ech = sse/(quadsum(wt)-sw2-t2) sigma2eta = (quadcross(wt,(y:^2)) - quadcross(quadcross(quadcross(x,wt,y),sxw)',quadcross(x,wt,y)) - (quadsum(wt)-t3)*sigma2ech)/(quadsum(wt)-t4) gamma_a = sigma2eta:/(sigma2eta:+(sigma2ech:*(sumw2:/(sumw:^2)))) //utilde utilde = - (mean(gamma_a:*ebar)):+(gamma_a:*ebar) //eps tilde epstilde = J(rows(ehat),1,0) for (i=1; i<=rinfo; i++) { m1 = info[i,1] m2 = info[i,2] m0 = m1,1 \ m2,1 epstilde[|m0|] = J(m2-m1+1, 1 , utilde[i]) //epstilde[|info[i,1],1\info[i,2],1|] = J(rows(epstilde[|info[i,1],1\info[i,2],1|]),1,utilde[i]) } epstilde = -mean(ehat - epstilde ):+(ehat - epstilde) //epshat epshat = epstilde:*(sqrt(sigma2ech)/sqrt(quadvariance(epstilde))) uhat = utilde:*(sqrt(sigma2eta)/sqrt(quadvariance(utilde))) sigout2[1,3] = &(epshat) sigout2[1,4] = &(uhat) sigout2[1,1] = &(sigma2ech) sigout2[1,2] = &(sigma2eta) return(sigout2) //epshat for alfa model (replaces residuals from OLS and is then used in alfa model) } //BOOTSTRAP, adjusted from Minh's function _f_bootstrap_estall(real matrix y, real matrix x, real matrix z, real matrix yh, real matrix yh2, real matrix wt, real matrix area, real matrix info, real matrix info2, real scalar sim, real scalar seed, real scalar henderson, real scalar vceopt, real scalar EB, real matrix psu, string scalar touse) { pointer(pointer(real matrix) rowvector) colvector bsout bsout = J(sim,1,NULL) data0 = area, y, x, z, yh, yh2, wt rowdata = rows(data0) coldata = cols(data0) data = J(rowdata, coldata,.) colsx = cols(x) colsy = cols(y) colsz = cols(z) colsyh = cols(yh) colsyh2 = cols(yh2) rinfo = rows(info2) //uniformseed(seed) for(s=1;s<=sim;s++) { for (i=1; i<=rinfo; i++) { dout = panelsubmatrix(data0,i,info2) nrow = rows(dout) m0 = info2[i,1],1 \ info2[i,2],coldata data[|m0|] = dout[ceil(nrow*uniform(nrow,1)), .] } //_sort(data, 1) area1 = &(data[.,1]) y1 = &(data[.,2]) x1 = &(data[|.,3 \ .,3+colsx-1|]) z1 = &(data[|.,3+colsx \ .,3+colsx+colsz-1|]) yhx = &(data[|.,3+colsx+colsz \.,3+colsx+colsz+colsyh-1|]) yhx2 = &(data[|.,3+colsx+colsz+colsyh \ .,3+colsx+colsz+colsyh+colsyh2-1|]) wt1 = &(data[.,coldata]) est = _f_s2sc_estall_eb(*y1, *x1, *z1, *yhx, *yhx2, *wt1, *area1, info, sim, seed, henderson, vceopt, EB,1, touse) //if (*est[3,1]<0) *est[3,1] = 0 c = 1 if (*est[3,1]<0) { if (c<=100) { s = s - 1 c = c + 1 } else _error("Please try a different PSU or select a different seed number.") } else bsout[s] = f_pointer_clone(est) } return(bsout) } //Column processing function void _s2sc_sim_cols(string scalar xvar, string scalar zvars, string scalar yhats, string scalar yhats2, string scalar areavar, string scalar plvar, string scalar wgt, string scalar touse, string scalar hhid, string scalar matin) { count = strtoreal(st_local("colprocess")) sim = strtoreal(st_local("rep")) seed = strtoreal(st_local("seed")) h3 = strtoreal(st_local("h3")) if (st_local("nolocation")~="") h3 = 2 hheff = strtoreal(st_local("hheffs")) boots = strtoreal(st_local("boots")) etanorm = strtoreal(st_local("etanormal")) epsnorm = strtoreal(st_local("epsnormal")) EB = strtoreal(st_local("ebest")) pline = strtoreal(st_local("pline")) lg = strtoreal(st_local("lny")) varinmod = tokens(st_local("varinmodel")) //pointer(real matrix) rowvector agginfo agglist = tokens(st_local("aggids")) indlist = tokens(st_local("indicators")) //pointer(real matrix) colvector arealvl //agginfo = J(rows(agglist), 1, NULL) mem = strtoreal(st_local("maxmem")) mem = floor((mem/8/sim)) external bsim, asim, maxA, varr, sigma, v_sigma, sigma_eps, locerr, hherr, locerr2 colsbsim = cols(bsim) colsasim = cols(asim) if ((EB==1) & ((etanorm==0)|(epsnorm==0))){ etanorm=1 epsnorm=1 } //WARNING: area must be sorted outside in Stata //census data - or use other way, ie seek(fh, (N*8+77)*6 ,-1) to get the 7th column fhcensus = fopen(matin, "r") varlist = fgetmatrix(fhcensus) p0 = ftell(fhcensus) a = fgetmatrix(fhcensus) p1 = ftell(fhcensus) N = rows(a) a = J(1,1,.) //The data x = tokens(xvar) z1 = tokens(zvars) yh = tokens(yhats) yh2 = tokens(yhats2) area = tokens(areavar) wt = tokens(wgt) id = tokens(hhid) pl = tokens(plvar) colsx = cols(x) colsz1 = cols(z1) colsyh = cols(yh) colsyh2 = cols(yh2) if (st_local("cuts")!="") cut = tokens(st_local("cuts")) coladd = (st_local("addvars")!="" ? cols(tokens(st_local("addvars"))) : 0) //if (pline==.) st_view(pl,.,tokens(st_local("pline")), .) //Check if X and other variables (varinmodel local), Z and Yhats are in the code e3499 = _fvarscheck(varinmod, varlist) if (z1[1] != "__mz1_000") e3499 = _fvarscheck(z1, varlist) if (yh[1] != "__myh_000") e3499 = _fvarscheck(yh, varlist) if (yh2[1] != "__myh2_000") e3499 = _fvarscheck(yh2, varlist) if (st_local("addvars")!="") e3499 = _fvarscheck(tokens(st_local("addvars")), varlist) if (e3499==1) { errprintf("Variables mentioned above are missing from the target dataset\n") _error(3499) } if (hheff==1) { //zalpha condition - __mz1_000, __myh_000, __myh2_000 if ((z1[1]=="__mz1_000") & (yh[1]=="__myh_000") & (yh2[1]=="__myh2_000")) zcond = "000" //0,0,0 if ((z1[1]=="__mz1_000") & (yh[1]=="__myh_000") & (yh2[1]~="__myh2_000")) zcond = "001" //0,0,1 if ((z1[1]=="__mz1_000") & (yh[1]~="__myh_000") & (yh2[1]=="__myh2_000")) zcond = "010" //0,1,0 if ((z1[1]=="__mz1_000") & (yh[1]~="__myh_000") & (yh2[1]~="__myh2_000")) zcond = "011" //0,1,1 if ((z1[1]~="__mz1_000") & (yh[1]=="__myh_000") & (yh2[1]=="__myh2_000")) zcond = "100" //1,0,0 if ((z1[1]~="__mz1_000") & (yh[1]=="__myh_000") & (yh2[1]~="__myh2_000")) zcond = "101" //1,0,1 if ((z1[1]~="__mz1_000") & (yh[1]~="__myh_000") & (yh2[1]=="__myh2_000")) zcond = "110" //1,1,0 if ((z1[1]~="__mz1_000") & (yh[1]~="__myh_000") & (yh2[1]~="__myh2_000")) zcond = "111" //1,1,1 } //sort is done before and setup area panel area_v = _fgetcoldata(_fvarindex(area[1], varlist), fhcensus, p0, p1-p0) //wt_v = _fgetcoldata(_fvarindex(wt[1], varlist), fhcensus, p0, p1-p0) //pl_v = _fgetcoldata(_fvarindex(pl[1], varlist), fhcensus, p0, p1-p0) info = panelsetup(area_v, 1) rowsinfo = rows(info) //for (r=1; r<=cols(agglist); r++) agginfo[r,1] = &(panelsetup(_ftruncateID(area_v, agglist[r]), 1)) //area_v = 1 //create the mark index (0 and 1) mask = _fdatamark(N, varinmod, varlist, fhcensus, p0, p1-p0) //N = quadcolsum(mask) //New mata structure: key, matrixobs, matrixvar, _ByID, _ID, _WEIGHT, _YHAT1..._YHAT100, _POVLINE, addition vars if (st_local("ydump")!="") { if (st_local("plinevar")!="") ncols = 3 + sim + 1 + coladd else ncols = 3 + sim + coladd yd = fopen(st_local("ydump"),"rw") //"DATA_MATRIX", "VARIABLE_MATRIX" are removed from the matrix variable varname = "_ByID", "_ID", "_WEIGHT" varname = varname, tokens(st_local("yhatlist")) //if (st_local("plinevar")!="") varname = varname, "_POVLINE" if (st_local("addvars")!="") varname = varname, tokens(st_local("addvars")) fputmatrix(yd, (87801, quadcolsum(mask), ncols, sim, quadsum(strlen(varname)))) //DATA_MATRIX if (st_local("addvars")!="") fputmatrix(yd, ("_ByID", "_ID", "_WEIGHT", tokens(st_local("yhatlist")), tokens(st_local("addvars")))) //VARIABLE_MATRIX else fputmatrix(yd, ("_ByID", "_ID", "_WEIGHT", tokens(st_local("yhatlist")))) //VARIABLE_MATRIX fputmatrix(yd, select(J(N,1,1), mask)) //_ByID fputmatrix(yd, select(_fgetcoldata(_fvarindex(area[1], varlist), fhcensus, p0, p1-p0), mask)) //_ID fputmatrix(yd, select(_fgetcoldata(_fvarindex(wt[1], varlist), fhcensus, p0, p1-p0), mask)) //_WEIGHT //if (st_local("plinevar")!="") fputmatrix(yd, select(_fgetcoldata(_fvarindex(st_local("plinevar"), varlist), fhcensus, p0, p1-p0), mask)) //_POVLINE } //Simulation dots if (boots==0) display("Parametric drawing of betas") else display("Bootstrapped drawing of betas and parameters") //count = 20 //need to be based on memory available printf("{txt}\nNumber of simulations: {res}%g{txt}", sim) printf("{txt}\nEach dot (.) represents {res}%g{txt} simulation(s).\n", count) display("{txt}{hline 4}{c +}{hline 3} 1 " + "{hline 3}{c +}{hline 3} 2 " + "{hline 3}{c +}{hline 3} 3 " + "{hline 3}{c +}{hline 3} 4 " + "{hline 3}{c +}{hline 3} 5 ") //Getting etamat for all area and all simulations //do this because we keep the sequential draw: for each area within each simulation, then repeat each simulation. if (EB==1) { cens_area = J(rowsinfo,1,.) for(r=1; r<=rowsinfo; r++) cens_area[r,1] = area_v[info[r,1],1] D = _ebcensus4(cens_area, locerr, locerr2, info) etamat = rnormal(1,1,D[.,2*1-1],sqrt(D[.,1*2])) //j==1 for(j=2; j<=sim; j++) etamat = etamat, rnormal(1,1,D[.,2*j-1],sqrt(D[.,j*2])) cens_area = D = NULL } else { //EB==0 if (etanorm==0) { //etamat = _f_sampleeps(rowsinfo, 1, locerr[.,1]) //for (j=2; j<=sim; j++) etamat = etamat, _f_sampleeps(rowsinfo, 1, locerr[.,j]) //Added by Paul Apr 27 2018 itsbs = cols(locerr)!=1 etamat = _f_sampleeps(rowsinfo, 1, locerr[.,1])' if (itsbs==1) for (j=2; j<=sim; j++) etamat = etamat, _f_sampleeps(rowsinfo, 1, locerr[.,j])' else for (j=2; j<=sim; j++) etamat = etamat, _f_sampleeps(rowsinfo, 1, locerr[.,1])' } else { //(etanorm==1) etamat = rnormal(rowsinfo,1,0,sqrt(locerr[1])) for (j=2; j<=sim; j++) etamat = etamat, rnormal(rowsinfo,1,0,sqrt(locerr[j])) } } area_v = NULL for (s=1; s<=sim; s=s+count) { //xb and epsnorm m0 = s,1 \ s+count-1,1 m1 = .,s \ .,s+count-1 xb = J(N,1,1)*bsim[|s,colsbsim \ s+count-1,colsbsim|]' //change to 0 for nocons for (v=1; v<=colsx; v++) xb = xb :+ _fgetcoldata(_fvarindex(x[v], varlist), fhcensus, p0, p1-p0)*bsim[|s,v \ s+count-1,v|]' if (epsnorm==1) { if (hheff==1) { if (zcond == "100") { //xb = (zi,J(rows(zi),1,1))*asim' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsz1; v++) zb = zb + _fgetcoldata(_fvarindex(z1[v], varlist), fhcensus, p0, p1-p0)*asim[|s,v \ s+count-1,v|]' } if (zcond == "110") { //xb = zi*asim[|.,1\.,cols(zi)|]' + (yhi*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi)|]'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsz1; v++) zb = zb + _fgetcoldata(_fvarindex(z1[v], varlist), fhcensus, p0, p1-p0) *asim[|s,v \ s+count-1,v|]' for (v=1; v<=colsyh; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsz1 \ s+count-1,v+colsz1|]'):*xb } if (zcond == "111") { //xb = zi*asim[|.,1\.,cols(zi)|]' + (yhi*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi)|]'):*((xi,J(rows(xi),1,1))*bsim') + (yh2i*asim[|.,1+cols(yhi) \ .,cols(yhi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsz1; v++) zb = zb + _fgetcoldata(_fvarindex(z1[v], varlist), fhcensus, p0, p1-p0) *asim[|s,v \ s+count-1,v|]' for (v=1; v<=colsyh; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsz1 \ s+count-1,v+colsz1|]'):*xb for (v=1; v<=colsyh2; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh2[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsz1+colsyh \ s+count-1,v+colsz1+colsyh|]'):*(xb:^2) } if (zcond == "010") { //xb = (yhi*asim[.,cols(asim)-1]'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsyh; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v \ s+count-1,v|]'):*xb } if (zcond == "011") { //xb = (yhi*asim[.,1..cols(yhi)]'):*((xi,J(rows(xi),1,1))*bsim') + (yh2i*asim[|.,1+cols(yhi) \ .,cols(yhi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim')+ J(rows(yhi),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsyh; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v \ s+count-1,v|]'):*xb for (v=1; v<=colsyh2; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh2[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsyh \ s+count-1,v+colsyh|]'):*(xb:^2) } if (zcond == "001") { //xb = (yh2i*asim[.,1..cols(yh2i)]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yh2i),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsyh2; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh2[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v \ s+count-1,v|]'):*(xb:^2) } if (zcond == "101") { //xb = zi*asim[|.,1 \.,cols(zi)|]' + (yh2i*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi2)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yh2i),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsz1; v++) zb = zb + _fgetcoldata(_fvarindex(z1[v], varlist), fhcensus, p0, p1-p0) *asim[|s,v \ s+count-1,v|]' for (v=1; v<=colsyh2; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh2[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsz1 \ s+count-1,v+colsz1|]' ):*(xb:^2) } zb = exp(zb) zb = (maxA[|m0|]':*(zb:/(1:+zb))) :+ (.5*varr[|m0|])':*((maxA[|m0|]':*(zb:*(1:-zb))):/((1:+zb):^3)) //_editmissing(zb, maxA) xb = xb + colshape(vec(rnormal(1,1,0,sqrt(zb))'),N)' //checked OK on 1 sim or many sim zb = NULL //release zb } else { // hheff==0 xb = xb + rnormal(N,1,0,sqrt(sigma_eps[|m0|])') //checked OK on 1 sim or many sim } } else { //epsnorm==0 xb = xb + _f_sampleeps(count, N, hherr[|m1|]) //checked OK on 1 sim or many sim } //etanorm for(j=1; j<=rowsinfo; j++) { m2 = info[j,1],. \ info[j,2],. xb[|m2|] = xb[|m2|] :+ etamat[j, s..s+count-1] } //now we have xb, let do the calculation fgt, gini, etc. //Write col by col to the mata data if (st_local("ydump")!="") { if (lg==1) for (m=1; m<=count; m++) fputmatrix(yd, exp(select(xb[.,m], mask))) else for (m=1; m<=count; m++) fputmatrix(yd, select(xb[.,m], mask)) } printf(".") if (mod(s,50)==0) printf(" %5.0f\n",s) displayflush() } //end of s xb = area_v = wt_v = pl_v = NULL //Add additional variables to the ydump if (st_local("ydump")!="") { if (st_local("addvars")!="") { addvarlist = tokens(st_local("addvars")) for (v=1; v<=coladd; v++) fputmatrix(yd, select(_fgetcoldata(_fvarindex(addvarlist[v], varlist), fhcensus, p0, p1-p0), mask)) //_ID } fclose(yd) } fclose(fhcensus) st_local("_itran","0") } //Column processing function + on the fly void _s2sc_sim_cols2(string scalar xvar, string scalar zvars, string scalar yhats, string scalar yhats2, string scalar areavar, string scalar plvar, string scalar wgt, string scalar touse, string scalar hhid, string scalar matin) { count = strtoreal(st_local("colprocess")) sim = strtoreal(st_local("rep")) seed = strtoreal(st_local("seed")) h3 = strtoreal(st_local("h3")) if (st_local("nolocation")~="") h3 = 2 hheff = strtoreal(st_local("hheffs")) boots = strtoreal(st_local("boots")) etanorm = strtoreal(st_local("etanormal")) epsnorm = strtoreal(st_local("epsnormal")) EB = strtoreal(st_local("ebest")) lg = strtoreal(st_local("lny")) varinmod = tokens(st_local("varinmodel")) //pointer(real matrix) rowvector agginfo indlist = tokens(st_local("indicators")) //pline = strtoreal(st_local("pline")) yesmata = strtoreal(st_local("matay")) agglist = strtoreal(tokens(st_local("aggids"))) fgtlist = tokens(st_local("fgtlist")) gelist = tokens(st_local("gelist")) pl = strtoreal(tokens(plvar)) plreal = 1 if (missing(pl)>0) { pl = tokens(plvar) plreal = 0 } external bsim, asim, maxA, varr, sigma, v_sigma, sigma_eps, locerr, hherr, locerr2 colsbsim = cols(bsim) colsasim = cols(asim) if ((EB==1) & ((etanorm==0)|(epsnorm==0))){ etanorm=1 epsnorm=1 } //WARNING: area must be sorted outside in Stata //census data - or use other way, ie seek(fh, (N*8+77)*6 ,-1) to get the 7th column fhcensus = fopen(matin, "r") varlist = fgetmatrix(fhcensus) p0 = ftell(fhcensus) a = fgetmatrix(fhcensus) p1 = ftell(fhcensus) N = rows(a) a = J(1,1,.) //The data x = tokens(xvar) z1 = tokens(zvars) yh = tokens(yhats) yh2 = tokens(yhats2) area = tokens(areavar) wt = tokens(wgt) id = tokens(hhid) //pl = tokens(plvar) colsx = cols(x) colsz1 = cols(z1) colsyh = cols(yh) colsyh2 = cols(yh2) if (st_local("cuts")!="") cut = tokens(st_local("cuts")) coladd = (st_local("addvars")!="" ? cols(tokens(st_local("addvars"))) : 0) //if (pline==.) st_view(pl,.,tokens(st_local("pline")), .) //Check if X and other variables (varinmodel local), Z and Yhats are in the code if (st_local("addvars")!="") e3699 = _fvarscheck(tokens(st_local("addvars")), varlist) else e3699=0 e3499 = _fvarscheck(varinmod, varlist) if (z1[1] != "__mz1_000") e3499 = _fvarscheck(z1, varlist) if (yh[1] != "__myh_000") e3499 = _fvarscheck(yh, varlist) if (yh2[1] != "__myh2_000") e3499 = _fvarscheck(yh2, varlist) if ((e3499==1) | (e3699==1)){ errprintf("Variables mentioned above are missing from the target dataset\n") _error(3499) } if (hheff==1) { //zalpha condition - __mz1_000, __myh_000, __myh2_000 if ((z1[1]=="__mz1_000") & (yh[1]=="__myh_000") & (yh2[1]=="__myh2_000")) zcond = "000" //0,0,0 if ((z1[1]=="__mz1_000") & (yh[1]=="__myh_000") & (yh2[1]~="__myh2_000")) zcond = "001" //0,0,1 if ((z1[1]=="__mz1_000") & (yh[1]~="__myh_000") & (yh2[1]=="__myh2_000")) zcond = "010" //0,1,0 if ((z1[1]=="__mz1_000") & (yh[1]~="__myh_000") & (yh2[1]~="__myh2_000")) zcond = "011" //0,1,1 if ((z1[1]~="__mz1_000") & (yh[1]=="__myh_000") & (yh2[1]=="__myh2_000")) zcond = "100" //1,0,0 if ((z1[1]~="__mz1_000") & (yh[1]=="__myh_000") & (yh2[1]~="__myh2_000")) zcond = "101" //1,0,1 if ((z1[1]~="__mz1_000") & (yh[1]~="__myh_000") & (yh2[1]=="__myh2_000")) zcond = "110" //1,1,0 if ((z1[1]~="__mz1_000") & (yh[1]~="__myh_000") & (yh2[1]~="__myh2_000")) zcond = "111" //1,1,1 } //sort is done before and setup area panel area_v = _fgetcoldata(_fvarindex(area[1], varlist), fhcensus, p0, p1-p0) display("{it:Number of observations in target dataset:}") rows(area_v) if (rows(area_v)==0){ errprintf("Your target dataset has no observations, please check\n") _error(3862) } //wt_v = _fgetcoldata(_fvarindex(wt[1], varlist), fhcensus, p0, p1-p0) //pl_v = _fgetcoldata(_fvarindex(pl[1], varlist), fhcensus, p0, p1-p0) info = panelsetup(area_v, 1) rowsinfo = rows(info) display("{it:Number of clusters in target dataset:}") rowsinfo display("") //for (r=1; r<=cols(agglist); r++) agginfo[r,1] = &(panelsetup(_ftruncateID(area_v, agglist[r]), 1)) //area_v = 1 //create the mark index (0 and 1) mask = _fdatamark(N, varinmod, varlist, fhcensus, p0, p1-p0) //N = quadcolsum(mask) //Prep for on the fly calculation if (yesmata==1 & st_local("ydump")=="") { wt_v = select(_fgetcoldata(_fvarindex(wt[1], varlist), fhcensus, p0, p1-p0), mask) nHHLDs = rows(wt_v) area2 = select(area_v, mask) nagg = cols(agglist) npovlines = cols(pl) infov = areaid = nhh = J(1, nagg, NULL) nrow = J(1, nagg, .) for (j=1; j<=nagg; j++) { idfake = trunc(area2/10^agglist[j]) infov[j] = &(panelsetup(idfake, 1)) areaid[j] = &(idfake[(*infov[j])[(1::rows(*infov[j])),2],.]) nhh[j] = &((*infov[j])[.,2] - (*infov[j])[.,1] :+ 1) nrow[j] = rows(*infov[j]) } idfake = NULL //Unit nHHLDs nDroppedHHLD nIndividuals YTrimed nSim Min_Y Max_Y Mean StdErr rmatnames = "nSim", "Unit", "nHHLDs", "nIndividuals", "Mean" senames = "StdErr" nfgts = cols(fgtlist) if (nfgts>0) { for (l=1; l<=npovlines; l++) { for (ind=1; ind<=nfgts; ind++) { plname = (plreal==1 ? strtoname(strofreal(pl[l])) : "_" + pl[l]) rmatnames = rmatnames, "avg_" + fgtlist[ind] + plname senames = senames, "se_" + fgtlist[ind] + plname } } } nges = cols(gelist) if (nges>0) { for (ind=1; ind<=nges; ind++) { rmatnames = rmatnames, "avg_" + gelist[ind] senames = senames, "se_" + gelist[ind] } } rmatnames = rmatnames, senames if (npovlines>0 & nfgts>0 & plreal==0) { plvalue = J(1,npovlines, NULL) for (l=1; l<=npovlines; l++) plvalue[l] = &((_fgetcoldata(_fvarindex(pl[l], varname), fhcensus, p0, p1-p0), mask)) } } //New mata structure: key, matrixobs, matrixvar, _ByID, _ID, _WEIGHT, _YHAT1..._YHAT100, _POVLINE, addition vars if (st_local("ydump")!="") { if (st_local("plinevar")!="") ncols = 3 + sim + 1 + coladd else ncols = 3 + sim + coladd yd = fopen(st_local("ydump"),"rw") //"DATA_MATRIX", "VARIABLE_MATRIX" are removed from the matrix variable varname = "_ByID", "_ID", "_WEIGHT" varname = varname, tokens(st_local("yhatlist")) //if (st_local("plinevar")!="") varname = varname, "_POVLINE" if (st_local("addvars")!="") varname = varname, tokens(st_local("addvars")) fputmatrix(yd, (87801, quadcolsum(mask), ncols, sim, quadsum(strlen(varname)))) //DATA_MATRIX if (st_local("addvars")!="") fputmatrix(yd, ("_ByID", "_ID", "_WEIGHT", tokens(st_local("yhatlist")), tokens(st_local("addvars")))) //VARIABLE_MATRIX else fputmatrix(yd, ("_ByID", "_ID", "_WEIGHT", tokens(st_local("yhatlist")))) //VARIABLE_MATRIX fputmatrix(yd, select(J(N,1,1), mask)) //_ByID fputmatrix(yd, select(_fgetcoldata(_fvarindex(area[1], varlist), fhcensus, p0, p1-p0), mask)) //_ID fputmatrix(yd, select(_fgetcoldata(_fvarindex(wt[1], varlist), fhcensus, p0, p1-p0), mask)) //_WEIGHT //if (st_local("plinevar")!="") fputmatrix(yd, select(_fgetcoldata(_fvarindex(st_local("plinevar"), varlist), fhcensus, p0, p1-p0), mask)) //_POVLINE } //Simulation dots if (boots==0) display("Parametric drawing of betas") else display("Bootstrapped drawing of betas and parameters") //count = 20 //need to be based on memory available printf("{txt}\nNumber of simulations: {res}%g{txt}", sim) printf("{txt}\nEach dot (.) represents {res}%g{txt} simulation(s).\n", count) display("{txt}{hline 4}{c +}{hline 3} 1 " + "{hline 3}{c +}{hline 3} 2 " + "{hline 3}{c +}{hline 3} 3 " + "{hline 3}{c +}{hline 3} 4 " + "{hline 3}{c +}{hline 3} 5 ") //Getting etamat for all area and all simulations //do this because we keep the sequential draw: for each area within each simulation, then repeat each simulation. if (EB==1) { cens_area = J(rowsinfo,1,.) for(r=1; r<=rowsinfo; r++) cens_area[r,1] = area_v[info[r,1],1] D = _ebcensus4(cens_area, locerr, locerr2, info) etamat = rnormal(1,1,D[.,2*1-1],sqrt(D[.,1*2])) //j==1 for(j=2; j<=sim; j++) etamat = etamat, rnormal(1,1,D[.,2*j-1],sqrt(D[.,j*2])) cens_area = D = NULL } else { //EB==0 if (etanorm==0) { //etamat = _f_sampleeps(rowsinfo, 1, locerr[.,1]) //for (j=2; j<=sim; j++) etamat = etamat, _f_sampleeps(rowsinfo, 1, locerr[.,j]) //Added by Paul C Apr 27 2018 itsbs = cols(locerr)!=1 etamat = _f_sampleeps(rowsinfo, 1, locerr[.,1])' if (itsbs==1) for (j=2; j<=sim; j++) etamat = etamat, _f_sampleeps(rowsinfo, 1, locerr[.,j])' else for (j=2; j<=sim; j++) etamat = etamat, _f_sampleeps(rowsinfo, 1, locerr[.,1])' } else { //(etanorm==1) etamat = rnormal(rowsinfo,1,0,sqrt(locerr[1])) for (j=2; j<=sim; j++) etamat = etamat, rnormal(rowsinfo,1,0,sqrt(locerr[j])) } } if (yesmata==0) area_v = NULL else { if (st_local("ydump")=="") { block = J(1, 5 + nfgts*npovlines + nges,.) sim0 = 1 } } for (s=1; s<=sim; s=s+count) { //xb and epsnorm m0 = s,1 \ s+count-1,1 m1 = .,s \ .,s+count-1 xb = J(N,1,1)*bsim[|s,colsbsim \ s+count-1,colsbsim|]' //change to 0 for nocons for (v=1; v<=colsx; v++) xb = xb :+ _fgetcoldata(_fvarindex(x[v], varlist), fhcensus, p0, p1-p0)*bsim[|s,v \ s+count-1,v|]' if (epsnorm==1) { if (hheff==1) { if (zcond == "100") { //xb = (zi,J(rows(zi),1,1))*asim' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsz1; v++) zb = zb + _fgetcoldata(_fvarindex(z1[v], varlist), fhcensus, p0, p1-p0)*asim[|s,v \ s+count-1,v|]' } if (zcond == "110") { //xb = zi*asim[|.,1\.,cols(zi)|]' + (yhi*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi)|]'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsz1; v++) zb = zb + _fgetcoldata(_fvarindex(z1[v], varlist), fhcensus, p0, p1-p0) *asim[|s,v \ s+count-1,v|]' for (v=1; v<=colsyh; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsz1 \ s+count-1,v+colsz1|]'):*xb } if (zcond == "111") { //xb = zi*asim[|.,1\.,cols(zi)|]' + (yhi*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi)|]'):*((xi,J(rows(xi),1,1))*bsim') + (yh2i*asim[|.,1+cols(yhi) \ .,cols(yhi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsz1; v++) zb = zb + _fgetcoldata(_fvarindex(z1[v], varlist), fhcensus, p0, p1-p0) *asim[|s,v \ s+count-1,v|]' for (v=1; v<=colsyh; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsz1 \ s+count-1,v+colsz1|]'):*xb for (v=1; v<=colsyh2; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh2[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsz1+colsyh \ s+count-1,v+colsz1+colsyh|]'):*(xb:^2) } if (zcond == "010") { //xb = (yhi*asim[.,cols(asim)-1]'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsyh; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v \ s+count-1,v|]'):*xb } if (zcond == "011") { //xb = (yhi*asim[.,1..cols(yhi)]'):*((xi,J(rows(xi),1,1))*bsim') + (yh2i*asim[|.,1+cols(yhi) \ .,cols(yhi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim')+ J(rows(yhi),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsyh; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v \ s+count-1,v|]'):*xb for (v=1; v<=colsyh2; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh2[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsyh \ s+count-1,v+colsyh|]'):*(xb:^2) } if (zcond == "001") { //xb = (yh2i*asim[.,1..cols(yh2i)]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yh2i),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsyh2; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh2[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v \ s+count-1,v|]'):*(xb:^2) } if (zcond == "101") { //xb = zi*asim[|.,1 \.,cols(zi)|]' + (yh2i*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi2)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yh2i),1,1)*asim[.,cols(asim)]' zb = J(N,1,1)*asim[|s,colsasim \ s+count-1,colsasim|]' //change to 0 for nocons for (v=1; v<=colsz1; v++) zb = zb + _fgetcoldata(_fvarindex(z1[v], varlist), fhcensus, p0, p1-p0) *asim[|s,v \ s+count-1,v|]' for (v=1; v<=colsyh2; v++) zb = zb + ((_fgetcoldata(_fvarindex(yh2[v], varlist), fhcensus, p0, p1-p0))*asim[|s,v+colsz1 \ s+count-1,v+colsz1|]' ):*(xb:^2) } zb = exp(zb) zb = (maxA[|m0|]':*(zb:/(1:+zb))) :+ (.5*varr[|m0|])':*((maxA[|m0|]':*(zb:*(1:-zb))):/((1:+zb):^3)) //_editmissing(zb, maxA) xb = xb + colshape(vec(rnormal(1,1,0,sqrt(zb))'),N)' //checked OK on 1 sim or many sim zb = NULL //release zb } else { // hheff==0 xb = xb + rnormal(N,1,0,sqrt(sigma_eps[|m0|])') //checked OK on 1 sim or many sim } } else { //epsnorm==0 if (boots==0) xb = xb + _f_sampleeps(count, N, hherr[.,1]) //Parametric only has one vector! else xb = xb + _f_sampleeps(count, N, hherr[|m1|]) //checked OK on 1 sim or many sim } //etanorm for(j=1; j<=rowsinfo; j++) { m2 = info[j,1],. \ info[j,2],. xb[|m2|] = xb[|m2|] :+ etamat[j, s..s+count-1] } //Write col by col to the mata data if (st_local("ydump")!="") { if (lg==1) for (m=1; m<=count; m++) fputmatrix(yd, exp(select(xb[.,m], mask))) else for (m=1; m<=count; m++) fputmatrix(yd, select(xb[.,m], mask)) } //now we have xb, let do the calculation fgt, gini, etc. if (yesmata==1 & st_local("ydump")=="") { xb = select(xb, mask) for (m=1; m<=count; m++) { block0 = J(1,5,.) wt_m = wt_v if (lg==1) y = exp(xb[.,m]) else y = xb[.,m] if (colmissing(y)>0) { _editmissing(y, 0) wt_m[selectindex(rowmissing(y)),.] = J(rows(selectindex(rowmissing(y))),1,0) } wy = wt_m:*y running = quadrunningsum(wt_m,0), quadrunningsum(wy,0) //minmaxy = minmax(y) for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = running[(*infov[j])[1,2],.] \ running[(*infov[j])[(2::nrow[j]),2],.] - running[(*infov[j])[(1::nrow[j]-1),2],.] block0 = block0 \ J(nrow[j],1,sim0), *areaid[j], *nhh[j], A[.,1] , A[.,2]:/A[.,1] } else block0 = block0 \ sim0 , 0 , nHHLDs, running[nHHLDs,1], running[nHHLDs,2]/running[nHHLDs,1] } A = NULL block0 = block0[2..rows(block0),.] if (nfgts>0) { for (l=1; l<=npovlines; l++) { if (plreal==1) { wt_p = (y:<= pl[l]):*wt_m rgap = 1:-(y:/ pl[l]) } else { wt_p = (y:<= *plvalue[l]):*wt_m rgap = 1:-(y:/ *plvalue[l]) } for (ind=1; ind<=nfgts; ind++) { if (fgtlist[ind]=="fgt0") currfgt = running[.,1], quadrunningsum(wt_p,0) if (fgtlist[ind]=="fgt1") currfgt = running[.,1], quadrunningsum(wt_p:*rgap,0) if (fgtlist[ind]=="fgt2") currfgt = running[.,1], quadrunningsum(wt_p:*rgap:*rgap,0) fgtx = J(1,1,.) for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = currfgt[(*infov[j])[1,2],.] \ currfgt[(*infov[j])[(2::nrow[j]),2],.] - currfgt[(*infov[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ A[.,2] :/ A[.,1] } else fgtx = fgtx \ currfgt[nHHLDs,2]:/ currfgt[nHHLDs,1] } block0 = block0, fgtx[2..rows(fgtx),1] A = fgtx = currfgt = NULL } //ind } //plines } //nfgt if (nges>0) { lny = ln(y) wlny = wt_m:*lny for (ind=1; ind<=nges; ind++) { if (gelist[ind]=="ge0") current = running, quadrunningsum(wlny,0) if (gelist[ind]=="ge1") current = running, quadrunningsum(wy:*lny,0) if (gelist[ind]=="ge2") current = running, quadrunningsum(wy:*y,0) fgtx = J(1,1,.) if (gelist[ind]=="ge0") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*infov[j])[1,2],.] \ current[(*infov[j])[(2::nrow[j]),2],.] - current[(*infov[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ -(A[.,3]:/A[.,1]) :+ ln(A[.,2]:/A[.,1]) } else fgtx = fgtx \ -(current[nHHLDs,3]:/current[nHHLDs,1]) :+ ln(current[nHHLDs,2]:/current[nHHLDs,1]) } } if (gelist[ind]=="ge1") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*infov[j])[1,2],.] \ current[(*infov[j])[(2::nrow[j]),2],.] - current[(*infov[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ (A[.,3]:/A[.,2]) :- ln(A[.,2]:/A[.,1]) } else fgtx = fgtx \ (current[nHHLDs,3]:/current[nHHLDs,2]) :- ln(current[nHHLDs,2]:/current[nHHLDs,1]) } } if (gelist[ind]=="ge2") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*infov[j])[1,2],.] \ current[(*infov[j])[(2::nrow[j]),2],.] - current[(*infov[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ 0.5*((((A[.,2] :/A[.,1]) :^-2):*(A[.,3]:/A[.,1])):-1) } else fgtx = fgtx \ 0.5*((((current[nHHLDs,2]:/current[nHHLDs,1]):^-2):*(current[nHHLDs,3]:/current[nHHLDs,1])):-1) } } block0 = block0, fgtx[2..rows(fgtx),1] A = fgtx = current = NULL } //ind } //nges //add blocks block = block \ block0 sim0 = sim0 + 1 } //m } //yesmata printf(".") if (mod(s,50)==0) printf(" %5.0f\n",s) displayflush() } //end of s xb = area_v = wt_v = wt_m = pl_v = NULL block0 = y = wt0 = wt = area = wy = running = wt_p = rgap = plvalue = lny = wlny = info = areaid = nhh = NULL //Add additional variables to the ydump if (st_local("ydump")!="") { if (st_local("addvars")!="") { addvarlist = tokens(st_local("addvars")) for (v=1; v<=coladd; v++) fputmatrix(yd, select(_fgetcoldata(_fvarindex(addvarlist[v], varlist), fhcensus, p0, p1-p0), mask)) //_ID } fclose(yd) } fclose(fhcensus) //export results to Stata if (yesmata==1 & st_local("ydump")=="") { block = block[2..rows(block),.] _sort(block, (2,1)) infov = panelsetup(block,2) rinfo = rows(infov) outsim = J(1, 4+(cols(block)-4)*2, .) for (i=1; i<=rinfo; i++) { rr = infov[i,1],5 \ infov[i,2],. out = quadmeanvariance(block[|rr|]) outsim = outsim \ infov[i,2]-infov[i,1]+1, block[infov[i,1],2::4], out[1,.], sqrt(diagonal(out[2..rows(out),.])') } out = block = NULL outsim = outsim[2..rows(outsim),.] stata("clear") (void) st_addvar("double", rmatnames) st_addobs(rows(outsim)) st_store(.,.,outsim) outsim = NULL } st_local("_itran","0") } // OLS with weight and vce option 0 none, 1 robust, 2 cluster, 3 povmap adjustment function _f_wols(real matrix y, real matrix x, real matrix wt, real matrix info, real scalar vce, real scalar bs) { pointer(real matrix) rowvector olsout olsout = J(1,9,NULL) nobs = rows(x) // N Obs ncolx = cols(x) if (bs==0) { xwx = invsym(quadcross(x,wt,x)) b_ols = quadcross(xwx,quadcross(x,wt,y)) res = y - quadcross(x',b_ols) N = rows(info) //USE MSE FROM POVMAP, it will not match that from stata! mse = quadcross(res,wt,res)/(mean(wt))/(nobs - ncolx) if (vce==0) v_ols = (quadcross(res,wt,res)*xwx)/(nobs - ncolx) //regular WOLS COV if (vce==1) v_ols = (nobs/(nobs - ncolx))*quadcross(xwx',quadcross(quadcross((wt:*res):*x,(wt:*res):*x)',xwx)) //aw robust or pw: correct stata way if (vce==2) { //cluster if (N>1) { //VCE cluster COV M = J(ncolx, ncolx, 0) for(i=1; i<=N; i++) { xi = panelsubmatrix(x, i, info) ei = panelsubmatrix(res, i, info) wi = panelsubmatrix(wt, i, info) we = quadcross((wi:*ei)' ,(wi:*ei)') M = M + quadcross(quadcross(xi,we)',xi) } v_ols = ((nobs - 1)/(nobs - ncolx))*(N/(N-1))*quadcross(quadcross(xwx',M)',xwx) } else { //Robust OLS COV v_ols = (nobs/(nobs - ncolx))*quadcross(xwx',quadcross(quadcross((wt:*res):*x,(wt:*res):*x)',xwx)) } } if (vce==3) v_ols = quadcross(quadcross(xwx',quadcross(x,(wt:^2),x))',xwx):*mse // Povmap method with adjustment //estimate other statistics for OLS df = ncolx - 1 //Degrees of freedom R2 = 1 - quadcross(res,wt,res)/quadcrossdev(y, mean(y,wt), wt, y, mean(y,wt)) //R2 fstat = (R2/(1-R2))*((nobs - ncolx)/df) //Fstat aR2 = 1 - ((nobs - 1)/(nobs - ncolx))*(1-R2) //Adjusted R2 } //bs else { b_ols = quadcross(invsym(quadcross(x,wt,x)),quadcross(x,wt,y)) res = y - quadcross(x',b_ols) mse = quadcross(res,wt,res)/(mean(wt))/(nobs - ncolx) } olsout[1,1]= &(b_ols) olsout[1,2]= &(v_ols) olsout[1,3]= &(res) olsout[1,4]= &(mse) olsout[1,5]= &(R2) olsout[1,6]= &(fstat) olsout[1,7]= &(aR2) olsout[1,8]= &(nobs) olsout[1,9]= &(df) return(olsout) } //simple remove index function (i<=n) function _f_rmindex(real scalar i, real scalar n) { if (i==1) return(2..n) else if (i==n) return(1..n-1) else { if (i>n | i<0 | n<0) { errprintf("Wrong input; must be i<=n and both i and n are positive\n") _error(3499) } else return(1..i-1,i+1..n) } } //function to select variables based on the VIF information with intercept function _f_vif(string scalar xvar, string scalar wvar, string scalar touse) { x0 = st_data(.,tokens(xvar), touse) wt = st_data(.,tokens(wvar), touse) nobs = rows(x0) x0 = x0, J(nobs,1,1) ncolx = cols(x0) ncolx1 = ncolx-1 xpx0 = quadcross(x0,wt,x0) vif = J(ncolx1,1,.) for (i=1; i<=ncolx1; i++) { ind = _f_rmindex(i,ncolx) y = x0[.,i] x = x0[.,ind] res = y - quadcross(x',quadcross(invsym(xpx0[ind, ind])', quadcross(x,wt,y))) R2 = 1 - quadcross(res,wt,res)/quadcrossdev(y, mean(y,wt), wt, y, mean(y,wt)) //R2 aR2 = 1 - ((nobs - 1)/(nobs - ncolx))*(1-R2) //Adjusted R2 vif[i] = 1/(1-aR2) } return(vif) } //function to select variables based on the VIF information with intercept, x0 does not contain constant, return vif function _f_vif_mata(real matrix x0, real matrix wt) { nobs = rows(x0) x0 = x0, J(nobs,1,1) ncolx = cols(x0) ncolx1 = ncolx-1 xpx0 = quadcross(x0,wt,x0) vif = J(ncolx1,1,.) for (i=1; i<=ncolx1; i++) { ind = _f_rmindex(i,ncolx) y = x0[.,i] x = x0[.,ind] res = y - quadcross(x',quadcross(invsym(xpx0[ind, ind])', quadcross(x,wt,y))) R2 = 1 - quadcross(res,wt,res)/quadcrossdev(y, mean(y,wt), wt, y, mean(y,wt)) //R2 aR2 = 1 - ((nobs - 1)/(nobs - ncolx))*(1-R2) //Adjusted R2 vif[i] = 1/(1-aR2) } return(vif) } //stepwise vif function based on a threshold, remove one var at at time, return index function _f_stepvif(string scalar xvar, string scalar wvar, real scalar threshold, string scalar touse) { varname = tokens(xvar) x0 = st_data(.,tokens(xvar), touse) wt = st_data(.,tokens(wvar), touse) vif = _f_vif_mata(x0, wt) matind = selectindex(vif: threshold) { vif = _f_vif_mata(x0[., matind], wt) matind = matind[selectindex(vif :< max(vif))'] } st_local("vifvar", invtokens(varname[matind])) return(matind) } } //GLS Roy's paper function _f_hh_gls2(real matrix y, real matrix x, real matrix wt, real matrix sig_e, real scalar sig_n, real matrix info, real scalar EB, real scalar bs) { pointer(real matrix) rowvector glsout2 glsout2 = J(1,3,NULL) //Capital sigma matrix for GLS xtwex = xtwewx = J(cols(x), cols(x), 0) xtwey = J(cols(x), 1, 0) N = rows(info) //Loop through clusters for (i=1; i<=N; i++) { cv = panelsubmatrix(sig_e,i,info) v = diag(cv) + J(rows(cv),rows(cv),sig_n) wt1 = panelsubmatrix(wt,i,info) cv = diag(cv:/wt1) + (quadsum(wt1)/quadsum(wt1:^2))*J(rows(cv),rows(cv),sig_n) x1 = panelsubmatrix(x,i,info) y1 = panelsubmatrix(y,i,info) _invsym(cv) xt = quadcross(x1,cv) xtwex = xtwex + quadcross(xt',x1) xtwey = xtwey + quadcross(xt',y1) xtwewx = xtwewx + quadcross(quadcross(xt',v)',quadcross(cv,x1)) } if (bs==0) { _invsym(xtwex) Beta2 = quadcross(xtwex,xtwey) vcov2 = quadcross(quadcross((xtwex),xtwewx)',(xtwex)) } else { //Beta2 = lusolve(xtwex,xtwey) Beta2 = quadcross(invsym(xtwex),xtwey) } //following estimates GLS residuals for EB glsout2[1,1] = &(Beta2) glsout2[1,2] = &(vcov2) glsout2[1,3] = &(y -quadcross(x',Beta2)) return(glsout2) } function _f_hh_gls2old(real matrix y, real matrix x, real matrix wt, real matrix sig_e, real scalar sig_n, real matrix info, real scalar EB, real scalar bs) { pointer(real matrix) rowvector glsout2 glsout2 = J(1,3,NULL) //Capital sigma matrix for GLS xtwex = xtwewx = J(cols(x), cols(x), 0) xtwey = J(cols(x), 1, 0) N = rows(info) //Loop through clusters for (i=1; i<=N; i++) { cv = diag(panelsubmatrix(sig_e,i,info)) v = cv + J(rows(cv),cols(cv),sig_n) wt1 = panelsubmatrix(wt,i,info) cv = quadcross((diag((1:/wt1))),cv) + (quadsum(wt1)/quadsum(wt1:^2))*J(rows(cv),cols(cv),sig_n) x1 = panelsubmatrix(x,i,info) y1 = panelsubmatrix(y,i,info) _invsym(cv) xt = quadcross(x1,cv) xtwex = xtwex + quadcross(xt',x1) xtwey = xtwey + quadcross(xt',y1) //xtwewx = xtwewx + quadcross(quadcross(xt',v)',quadcross(luinv(cv)',x1)) xtwewx = xtwewx + quadcross(quadcross(xt',v)',quadcross(cv',x1)) } if (bs==0) { _invsym(xtwex) Beta2 = quadcross(xtwex',xtwey) vcov2 = quadcross(quadcross((xtwex)',xtwewx)',(xtwex)) } else { //Beta2 = lusolve(xtwex,xtwey) Beta2 = quadcross(invsym(xtwex),xtwey) } //following estimates GLS residuals for EB glsout2[1,1] = &(Beta2) glsout2[1,2] = &(vcov2) glsout2[1,3] = &(y -quadcross(x',Beta2)) return(glsout2) } // Sigma eta for ELL function _ell_sigeta(real matrix uch, real matrix wt, real matrix info, real scalar sim) { pointer(real matrix) rowvector ellsig ellsig = J(1,2,NULL) N = rows(info) tau2c = wc = ucdot = nc = J(N,1,.) wall = quadsum(wt) data = uch, wt for (a=1; a<=N; a++) { //Get weights datai = panelsubmatrix(data, a, info) wi = datai[.,cols(datai)] wc[a] = (quadsum(wi)/wall) //Get Tau_c ucdot[a] = mean(datai[.,1],wi) nc[a] = rows(datai) ech = datai[.,1] :- ucdot[a] tau2c[a] = (1/(nc[a]*(nc[a]-1)))*quadcrossdev(ech,mean(ech),ech,mean(ech)) //end of Tau } num1 = quadcrossdev(ucdot,(mean(ucdot,wc)),wc,ucdot,(mean(ucdot,wc))) num2 = quadcross((wc:*tau2c),(1:-wc)) den = quadcross(wc,(1:-wc)) sigmas = ((num1-num2)/den),0 varsigma2 = max(sigmas) ellsig[1,1] = &(varsigma2) if (sim==0) { sig2n_hat = 0 for (a=1; a<=N; a++) { sig2n_hat = sig2n_hat +2*(((wc[a]/den)^2)*((varsigma2^2)+(tau2c[a]^2)+2*varsigma2*tau2c[a])+(((wc[a]*(1-wc[a]))/den)^2)*((tau2c[a]^2)/(nc[a]-1))) } ellsig[1,2] = &(sig2n_hat) } return(ellsig) } //ELL GLS, no longer used in PovMap. Offer as option? function _f_ell_gls(real matrix y, real matrix x, real matrix wt, real matrix info, real matrix sig_e, real scalar sig_n) { pointer(real matrix) rowvector glsout glsout = J(1,2,NULL) N = rows(info) //Capital sigma matrix for GLS xtwex = xtwewx = J(cols(x), cols(x), 0) xtwey = J(cols(x), 1, 0) //Loop through clusters for (i=1; i<=N; i++) { cv = diag(panelsubmatrix(sig_e,i,info)) cv = cv + J(rows(cv),cols(cv),sig_n) x1 = panelsubmatrix(x,i,info) wt1 = panelsubmatrix(wt,i,info) y1 = panelsubmatrix(y,i,info) xt = quadcross(x1,wt1,luinv(cv)) xtwex = xtwex + quadcross(xt',x1) xtwey = xtwey + quadcross(xt',y1) xtwewx = xtwewx + quadcross(xt',wt1,x1) } Beta2 = quadcross(luinv(xtwex)',xtwey) vcov2 = quadcross(quadcross(luinv(xtwex)',xtwewx)',luinv(xtwex)) //VCOV is not symmetric, it must be symmetric, we follow Haslett, 2005 pg 160 vcov2 =(1/2)*(vcov2+vcov2') glsout[1,1] = &(Beta2) glsout[1,2] = &(vcov2) return(glsout) } //function to draw multivariate normal distribution function _f_drawnorm(real scalar n, real matrix M, real matrix V, real scalar seed) { return(M :+ invnormal(uniform(n,cols(V)))*cholesky(V)') } //function to draw multivariate normal distribution - POVMAP's way function _f_drawnorm2(real scalar n, real matrix M, real matrix vcov) { U=V=D=J(rows(vcov),rows(vcov),.) svd(vcov,U,D,V) return(M:+ (invnormal(uniform(n,rows(vcov)))*(U*diag(sqrt(D))*U'))) } //Random drawing with replacement of epsilons function _f_sampleeps(real scalar n, real scalar dim, real matrix eps) { sige2 = J(dim,n,0) N = rows(eps) if (cols(eps)==1) for(i=1; i<=n; i++) sige2[.,i]=eps[ceil(N*runiform(dim,1)),1] else for(i=1; i<=n; i++) sige2[.,i]=eps[ceil(N*runiform(dim,1)),i] //for(i=1; i<=n; i++) sige2[.,i]=eps[ceil(rows(eps)*runiform(dim,1)),i] return(sige2) } // data[|m0|] = dout[ceil(nrow*uniform(nrow,1)), .] //Gamma sampler for sigeta function _f_gammadraw(real matrix seta, real matrix var_seta, real scalar n) { return(rgamma(n,1,(seta^2/var_seta), (var_seta/seta))) } //Function to match location error for EB? function _f_ebmatch(real matrix sigeta, real matrix wt, real matrix sigeps, real matrix info, real matrix area, real matrix res) { pointer(real matrix) colvector ebout ebout = J(1,2,NULL) N = rows(info) sigout = J(N,3,.) //sigout -> Area, uhat, sigmaeta for(i=1;i<=N;i++) { sige = panelsubmatrix(sigeps,i,info) peso = panelsubmatrix(wt,i,info) resi = panelsubmatrix(res,i,info) alfach = ((peso:/sige) :/ quadsum((peso:/sige))) //alfach = quadcross(sige,(((peso:/sige):/quadsum((peso:/sige))):^2)) gammau = sigeta/(sigeta+(quadsum((peso:^2))/(quadsum(peso)*quadsum((peso:/sige))))) //sigout[i,.] = area[info[i,1]], (gammau*(alfach*quadsum(resi))), (sigeta - (gammau^2)*(sigeta + alfach)) sigout[i,.] = area[info[i,1]], (gammau*quadcross(resi,alfach)), (sigeta - (gammau^2)*(sigeta + quadcross(sige,(alfach:^2)))) res[|info[i,1],1 \ info[i,2],1|] = -(gammau*quadcross(resi,alfach)):+resi } ebout[1,1]=&sigout ebout[1,2]=&res return(ebout) } //Function to ..? function _f_hhech(real matrix res, real matrix wt, real matrix info, real matrix area) { pointer(real matrix) colvector echout echout=J(1,2,NULL) N = rows(info) eta=J(N,2,0) for(i=1;i<=N;i++) { peso = panelsubmatrix(wt,i,info) resi = panelsubmatrix(res,i,info) eta[i,.] = area[info[i,1]],mean(resi,peso) res[|info[i,1],1 \ info[i,2],1|] = -eta[i,2]:+resi } echout[1,1] = &eta echout[1,2] = &res return(echout) } //Pointer Function to clone pointers pointer (transmorphic matrix) scalar f_pointer_clone(transmorphic matrix X) { transmorphic matrix Y return(&(Y = X)) } //To assign etas and sigetas to census areas for EB function _ebcensus4(real matrix area1, real matrix etabs, real matrix etabs2, real matrix info1) { N = rows(info1) cens_etas=J(N,(cols(etabs)-1),.) for(i=1; i<=N; i++) { k=0 for(j=1;((j<=rows(etabs)) & (k==0));j++) { if (area1[i,1]==etabs[j,1]) { cens_etas[i,.] = etabs[|j,2\j,cols(etabs)|] if (j!=rows(etabs)) etabs = etabs[|j+1,. \ rows(etabs),.|] k=1 } if ((k==0) & (j==rows(etabs))) cens_etas[i,.] = etabs2[1,2..cols(etabs2)] } } return(cens_etas) } //To assign etas and sigetas to census areas for EB function _ebcensus5(real matrix area1, real matrix etabs, real matrix info1) { N = rows(info1) nrow = rows(etabs) ncol = cols(etabs) cens_etas=J(N,1,(nrow+1)) for(i=1; i<=nrow;i++) for(j=1; j<=N;j++) if (area1[j,1]==etabs[i,1]) cens_etas[j,1]=i return(cens_etas) } //1- DATA related functions //function to find the position of the variable //indlist is tokenize string matrix, strfind is string input //_fvarindex("roof", indlist) //varlist = tokens(strall), 0 is not found function _fvarindex(strfind, varlist) { match = 0 nvars = cols(varlist) for (i=1; i<=nvars; i++) { if (strmatch(varlist[i], strfind) == 1) { match = i i = nvars } } return(match) } //function to get the column from the mata matrix datafile //bytestart=p0, bytelength=p1-p0 function _fgetcoldata(colpos, filehandler, bytestart, bytelength) { if (colpos > 0) { fseek(filehandler, (colpos-1)*bytelength + bytestart ,-1) return(fgetmatrix(filehandler)) } else { exit(error(2000)) } } function _fgetcoldata2(colpos, filehandler, n) { fseek(filehandler, (n*8+77)*colpos ,-1) //this is because there is 1 vector before the data so colpos is the same, otherwise colspos-1 return(fgetmatrix(filehandler)) } //function to mark 0 and 1 to use in calculation (remove missing) function _fdatamark(obs, varmod, varlist, filehandler, bytestart, bytelength) { mark = J(obs,1,1) col = cols(varmod) for (i=1; i<=col; i++) mark = mark:*((_fgetcoldata(_fvarindex(varmod[i], varlist), filehandler, bytestart, bytelength)):~=.) return(mark) } //function to truncate IDs by the position of the ID function _ftruncateID(real matrix ids, real scalar pos) { return(trunc(ids:/(10^pos))) } //function to save ydump2dta function _fydump2dta(ydump, dtaout) { dtafcol = strtoreal(st_local("dtacol")) dtafsize = strtoreal(st_local("dtasize")) yd = fopen(ydump,"r") info = fgetmatrix(yd) varname = fgetmatrix(yd) header = 77+5*8+4*info[3]+77+info[5] addvar = info[3]-info[4] filesize = (info[2]*info[3]*8 + 4*info[2])/1024^2 ncolperfile = floor((dtafsize*(1024^2) - 4*info[2])/(8*info[2])) nfiles = ceil(filesize/dtafsize) //first file _seq, _ByID, _ID, _WEIGHT, addvars, sim //second file _seq, _ByID, _ID, _WEIGHT, sim } //function to get levelsof from Andrew Maurer/statalist.org real vector _fintlevelsof(real vector A) { real scalar maxA, minA, rangeA, offset real vector minmaxA, b minmaxA = minmax(A) minA = minmaxA[1,1] maxA = minmaxA[1,2] rangeA = maxA-minA+1 offset = -minA+1 if (rangeA > 10^9) _error(9,"range of vector must be less than 1 billion") b = J(rangeA, 1, 0) b[A:+offset,1] = J(length(A),1,1) return(selectindex(b):-offset) } //function to check the variable in the list, return 0/1 and local _itran function _fvarscheck(varscheck, varlist) { nvarscheck = cols(varscheck) ret = 0 for (v=1; v<=nvarscheck; v++) { if (_fvarindex(varscheck[v], varlist)==0) { st_local("_itran","9999") errprintf("Variable %s not found\n", varscheck[v]) ret = 1 } } return(ret) } //function to expand the col in data by info structure //data = unit, trans;, info is panel info matrix, areaid is id from each subgroup function _fdataexpand(data, info, areaid) { if (rows(data)~=rows(areaid)) exit(error(3200)) else { nrow = rows(info) out = J(info[rnrow,2],1,.) for (i=1; i<=nrow; i++) { j = selectindex(areaid:==data[i,1]) out[info[j,1]..info[j,2]] = J(info[j,2]-info[j,1], 1, data[i,2]) } return(out) } } //2- CALCULATION functions // function to compute indicators, X = inclist, wt, pline // y = nHHLDs, nIndividuals, min_y, max_y, mean, (indicator) function _fgetinds(x, wt, pl, lny) { //if (lny==1) x = exp(x) if (lny==1) pl = ln(pl) //we also can do the ln of poverty line, this is much faster than the exp(x) indlist = tokens(st_local("indicators")) y = J(cols(x),1,rows(x)), J(cols(x),1,quadsum(wt)), colminmax(x)', mean(x,wt)' for (i=1; i<=cols(indlist); i++) { if (indlist[i]=="fgt0") y = y , _fFGT(x, pl, wt, 0)' if (indlist[i]=="fgt1") y = y , _fFGT(x, pl, wt, 1)' if (indlist[i]=="fgt2") y = y , _fFGT(x, pl, wt, 2)' if (indlist[i]=="gini") y = y , _fGinis(x, wt)' if (indlist[i]=="ge0") y = y , _fGE(x, wt, 0)' if (indlist[i]=="ge1") y = y , _fGE(x, wt, 1)' if (indlist[i]=="ge2") y = y , _fGE(x, wt, 2)' } return(y) } // need to add different weights //function to simulate the budget allocation - JDE paper void _s2sc_inds_sim(string scalar ydump, string scalar plines, string scalar aggids, string scalar areavar, string scalar wgtvar, real matrix transdata) { //agglist = strtoreal(tokens(st_local("aggids"))) agglist = strtoreal(tokens(aggids)) fgtlist = tokens(st_local("fgtlist")) gelist = tokens(st_local("gelist")) pl = strtoreal(tokens(plines)) plreal = 1 if (missing(pl)>0) { pl = tokens(plines) plreal = 0 } in = fopen(ydump, "r" ) size = fgetmatrix(in) varname = fgetmatrix(in) p0 = ftell(in) id = fgetmatrix(in) p1 = ftell(in) id = NULL area = _fgetcoldata(_fvarindex(areavar, varname), in, p0, p1-p0) wt0 = _fgetcoldata(_fvarindex(wgtvar, varname), in, p0, p1-p0) p2 = 2*(p1-p0)+p1 nHHLDs = rows(wt0) nagg = cols(agglist) npovlines = cols(pl) info = areaid = nhh = J(1, nagg, NULL) nrow = J(1, nagg, .) for (j=1; j<=nagg; j++) { idfake = trunc(area/10^agglist[j]) info[j] = &(panelsetup(idfake, 1)) areaid[j] = &(idfake[(*info[j])[(1::rows(*info[j])),2],.]) nhh[j] = &((*info[j])[.,2] - (*info[j])[.,1] :+ 1) nrow[j] = rows(*info[j]) } idfake = NULL //Unit nHHLDs nDroppedHHLD nIndividuals YTrimed nSim Min_Y Max_Y Mean StdErr rmatnames = "nSim", "Unit", "nHHLDs", "nIndividuals", "Mean" senames = "StdErr" nfgts = cols(fgtlist) if (nfgts>0) { for (l=1; l<=npovlines; l++) { for (ind=1; ind<=nfgts; ind++) { plname = (plreal==1 ? strtoname(strofreal(pl[l])) : "_" + pl[l]) rmatnames = rmatnames, "avg_" + fgtlist[ind] + plname senames = senames, "se_" + fgtlist[ind] + plname } } } //nges = cols(gelist) nges = 0 if (nges>0) { for (ind=1; ind<=nges; ind++) { rmatnames = rmatnames, "avg_" + gelist[ind] senames = senames, "se_" + gelist[ind] } } rmatnames = rmatnames, senames if (npovlines>0 & nfgts>0 & plreal==0) { plvalue = J(1,npovlines, NULL) for (l=1; l<=npovlines; l++) plvalue[l] = &(_fgetcoldata(_fvarindex(pl[l], varname), in, p0, p1-p0)) } fseek(in, p2, -1) block = J(1, 5 + nfgts*npovlines + nges,.) printf("{txt}\nComputing indicators for {res}%g{txt} simulation(s).\n", size[4]) display("{txt}{hline 4}{c +}{hline 3} 1 " + "{hline 3}{c +}{hline 3} 2 " + "{hline 3}{c +}{hline 3} 3 " + "{hline 3}{c +}{hline 3} 4 " + "{hline 3}{c +}{hline 3} 5 ") for (sim=1; sim<=size[4]; sim++) { block0 = J(1,5,.) y0 = fgetmatrix(in) wt = wt0 if (colmissing(y0)>0) { _editmissing(y0, 0) index = selectindex(rowmissing(y0)) wt[index,.] = J(rows(index),1,0) } index = NULL wy = wt:*y0 running = quadrunningsum(wt,0), quadrunningsum(wy,0) //minmaxy = minmax(y) for (j=1; j<=nagg; j++) { //no mean if (nrow[j] >=2) { A = running[(*info[j])[1,2],.] \ running[(*info[j])[(2::nrow[j]),2],.] - running[(*info[j])[(1::nrow[j]-1),2],.] block0 = block0 \ J(nrow[j],1,sim), *areaid[j], *nhh[j], A[.,1] , J(rows(A),1,0) } else block0 = block0 \ sim , 0 , nHHLDs, running[nHHLDs,1], 0 } A = NULL block0 = block0[2..rows(block0),.] if (nfgts>0) { for (l=1; l<=npovlines; l++) { for (ind=1; ind<=nfgts; ind++) { fgtx = J(1,1,.) for (j=1; j<=nagg; j++) { y = y0 + _fdataexpand(*data[j,l], *info[j], *areaid[j]) if (plreal==1) { wt_p = (y:<= pl[l]):*wt rgap = 1:-(y:/ pl[l]) } else { wt_p = (y:<= *plvalue[l]):*wt rgap = 1:-(y:/ *plvalue[l]) } if (fgtlist[ind]=="fgt0") currfgt = running[.,1], quadrunningsum(wt_p,0) if (fgtlist[ind]=="fgt1") currfgt = running[.,1], quadrunningsum(wt_p:*rgap,0) if (fgtlist[ind]=="fgt2") currfgt = running[.,1], quadrunningsum(wt_p:*rgap:*rgap,0) if (nrow[j] >=2) { A = currfgt[(*info[j])[1,2],.] \ currfgt[(*info[j])[(2::nrow[j]),2],.] - currfgt[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ A[.,2] :/ A[.,1] } else fgtx = fgtx \ currfgt[nHHLDs,2]:/ currfgt[nHHLDs,1] } block0 = block0, fgtx[2..rows(fgtx),1] A = fgtx = currfgt = NULL } //ind } //plines } //nfgt if (nges>0) { lny = ln(y) wlny = wt:*lny for (ind=1; ind<=nges; ind++) { if (gelist[ind]=="ge0") current = running, quadrunningsum(wlny,0) if (gelist[ind]=="ge1") current = running, quadrunningsum(wy:*lny,0) if (gelist[ind]=="ge2") current = running, quadrunningsum(wy:*y,0) fgtx = J(1,1,.) if (gelist[ind]=="ge0") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*info[j])[1,2],.] \ current[(*info[j])[(2::nrow[j]),2],.] - current[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ -(A[.,3]:/A[.,1]) :+ ln(A[.,2]:/A[.,1]) } else fgtx = fgtx \ -(current[nHHLDs,3]:/current[nHHLDs,1]) :+ ln(current[nHHLDs,2]:/current[nHHLDs,1]) } } if (gelist[ind]=="ge1") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*info[j])[1,2],.] \ current[(*info[j])[(2::nrow[j]),2],.] - current[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ (A[.,3]:/A[.,2]) :- ln(A[.,2]:/A[.,1]) } else fgtx = fgtx \ (current[nHHLDs,3]:/current[nHHLDs,2]) :- ln(current[nHHLDs,2]:/current[nHHLDs,1]) } } if (gelist[ind]=="ge2") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*info[j])[1,2],.] \ current[(*info[j])[(2::nrow[j]),2],.] - current[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ 0.5*((((A[.,2] :/A[.,1]) :^-2):*(A[.,3]:/A[.,1])):-1) } else fgtx = fgtx \ 0.5*((((current[nHHLDs,2]:/current[nHHLDs,1]):^-2):*(current[nHHLDs,3]:/current[nHHLDs,1])):-1) } } block0 = block0, fgtx[2..rows(fgtx),1] A = fgtx = current = NULL } //ind } //nges //add blocks block = block \ block0 printf(".") if (mod(sim,50)==0) printf(" %5.0f\n",sim) displayflush() } //sim block0 = y = y0 = wt0 = wt = area = wy = running = wt_p = rgap = plvalue = lny = wlny = info = areaid = nhh = NULL fclose(in) block = block[2..rows(block),.] _sort(block, (2,1)) info = panelsetup(block,2) rinfo = rows(info) outsim = J(1, 4+(cols(block)-4)*2, .) for (i=1; i<=rinfo; i++) { rr = info[i,1],5 \ info[i,2],. out = quadmeanvariance(block[|rr|]) outsim = outsim \ info[i,2]-info[i,1]+1, block[info[i,1],2::4], out[1,.], sqrt(diagonal(out[2..rows(out),.])') } out = block = NULL outsim = outsim[2..rows(outsim),.] stata("clear") (void) st_addvar("double", rmatnames) st_addobs(rows(outsim)) st_store(.,.,outsim) outsim = NULL } // function to process indicators for ydump, save output to dta void _s2sc_inds(string scalar ydump, string scalar plines, string scalar aggids, string scalar areavar, string scalar wgtvar) { //agglist = strtoreal(tokens(st_local("aggids"))) agglist = strtoreal(tokens(aggids)) fgtlist = tokens(st_local("fgtlist")) gelist = tokens(st_local("gelist")) pl = strtoreal(tokens(plines)) plreal = 1 if (missing(pl)>0) { pl = tokens(plines) plreal = 0 } in = fopen(ydump, "r" ) size = fgetmatrix(in) varname = fgetmatrix(in) p0 = ftell(in) id = fgetmatrix(in) p1 = ftell(in) id = NULL area = _fgetcoldata(_fvarindex(areavar, varname), in, p0, p1-p0) wt0 = _fgetcoldata(_fvarindex(wgtvar, varname), in, p0, p1-p0) p2 = 2*(p1-p0)+p1 nHHLDs = rows(wt0) nagg = cols(agglist) npovlines = cols(pl) info = areaid = nhh = J(1, nagg, NULL) nrow = J(1, nagg, .) for (j=1; j<=nagg; j++) { idfake = trunc(area/10^agglist[j]) info[j] = &(panelsetup(idfake, 1)) areaid[j] = &(idfake[(*info[j])[(1::rows(*info[j])),2],.]) nhh[j] = &((*info[j])[.,2] - (*info[j])[.,1] :+ 1) nrow[j] = rows(*info[j]) } idfake = NULL //Unit nHHLDs nDroppedHHLD nIndividuals YTrimed nSim Min_Y Max_Y Mean StdErr rmatnames = "nSim", "Unit", "nHHLDs", "nIndividuals", "Mean" senames = "StdErr" nfgts = cols(fgtlist) if (nfgts>0) { for (l=1; l<=npovlines; l++) { for (ind=1; ind<=nfgts; ind++) { plname = (plreal==1 ? strtoname(strofreal(pl[l])) : "_" + pl[l]) rmatnames = rmatnames, "avg_" + fgtlist[ind] + plname senames = senames, "se_" + fgtlist[ind] + plname } } } nges = cols(gelist) if (nges>0) { for (ind=1; ind<=nges; ind++) { rmatnames = rmatnames, "avg_" + gelist[ind] senames = senames, "se_" + gelist[ind] } } rmatnames = rmatnames, senames if (npovlines>0 & nfgts>0 & plreal==0) { plvalue = J(1,npovlines, NULL) for (l=1; l<=npovlines; l++) plvalue[l] = &(_fgetcoldata(_fvarindex(pl[l], varname), in, p0, p1-p0)) } fseek(in, p2, -1) block = J(1, 5 + nfgts*npovlines + nges,.) printf("{txt}\nComputing indicators for {res}%g{txt} simulation(s).\n", size[4]) display("{txt}{hline 4}{c +}{hline 3} 1 " + "{hline 3}{c +}{hline 3} 2 " + "{hline 3}{c +}{hline 3} 3 " + "{hline 3}{c +}{hline 3} 4 " + "{hline 3}{c +}{hline 3} 5 ") for (sim=1; sim<=size[4]; sim++) { block0 = J(1,5,.) y = fgetmatrix(in) wt = wt0 if (colmissing(y)>0) { index = selectindex(rowmissing(y0)) wt[index,.] = J(rows(index),1,0) _editmissing(y, 0) } index = NULL wy = wt:*y running = quadrunningsum(wt,0), quadrunningsum(wy,0) //minmaxy = minmax(y) for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = running[(*info[j])[1,2],.] \ running[(*info[j])[(2::nrow[j]),2],.] - running[(*info[j])[(1::nrow[j]-1),2],.] block0 = block0 \ J(nrow[j],1,sim), *areaid[j], *nhh[j], A[.,1] , A[.,2]:/A[.,1] } else block0 = block0 \ sim , 0 , nHHLDs, running[nHHLDs,1], running[nHHLDs,2]/running[nHHLDs,1] } A = NULL block0 = block0[2..rows(block0),.] if (nfgts>0) { for (l=1; l<=npovlines; l++) { if (plreal==1) { wt_p = (y:<= pl[l]):*wt rgap = 1:-(y:/ pl[l]) } else { wt_p = (y:<= *plvalue[l]):*wt rgap = 1:-(y:/ *plvalue[l]) } for (ind=1; ind<=nfgts; ind++) { if (fgtlist[ind]=="fgt0") currfgt = running[.,1], quadrunningsum(wt_p,0) if (fgtlist[ind]=="fgt1") currfgt = running[.,1], quadrunningsum(wt_p:*rgap,0) if (fgtlist[ind]=="fgt2") currfgt = running[.,1], quadrunningsum(wt_p:*rgap:*rgap,0) fgtx = J(1,1,.) for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = currfgt[(*info[j])[1,2],.] \ currfgt[(*info[j])[(2::nrow[j]),2],.] - currfgt[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ A[.,2] :/ A[.,1] } else fgtx = fgtx \ currfgt[nHHLDs,2]:/ currfgt[nHHLDs,1] } block0 = block0, fgtx[2..rows(fgtx),1] A = fgtx = currfgt = NULL } //ind } //plines } //nfgt if (nges>0) { lny = ln(y) wlny = wt:*lny for (ind=1; ind<=nges; ind++) { if (gelist[ind]=="ge0") current = running, quadrunningsum(wlny,0) if (gelist[ind]=="ge1") current = running, quadrunningsum(wy:*lny,0) if (gelist[ind]=="ge2") current = running, quadrunningsum(wy:*y,0) fgtx = J(1,1,.) if (gelist[ind]=="ge0") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*info[j])[1,2],.] \ current[(*info[j])[(2::nrow[j]),2],.] - current[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ -(A[.,3]:/A[.,1]) :+ ln(A[.,2]:/A[.,1]) } else fgtx = fgtx \ -(current[nHHLDs,3]:/current[nHHLDs,1]) :+ ln(current[nHHLDs,2]:/current[nHHLDs,1]) } } if (gelist[ind]=="ge1") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*info[j])[1,2],.] \ current[(*info[j])[(2::nrow[j]),2],.] - current[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ (A[.,3]:/A[.,2]) :- ln(A[.,2]:/A[.,1]) } else fgtx = fgtx \ (current[nHHLDs,3]:/current[nHHLDs,2]) :- ln(current[nHHLDs,2]:/current[nHHLDs,1]) } } if (gelist[ind]=="ge2") { for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = current[(*info[j])[1,2],.] \ current[(*info[j])[(2::nrow[j]),2],.] - current[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ 0.5*((((A[.,2] :/A[.,1]) :^-2):*(A[.,3]:/A[.,1])):-1) } else fgtx = fgtx \ 0.5*((((current[nHHLDs,2]:/current[nHHLDs,1]):^-2):*(current[nHHLDs,3]:/current[nHHLDs,1])):-1) } } block0 = block0, fgtx[2..rows(fgtx),1] A = fgtx = current = NULL } //ind } //nges //add blocks block = block \ block0 printf(".") if (mod(sim,50)==0) printf(" %5.0f\n",sim) displayflush() } //sim block0 = y = wt0 = wt = area = wy = running = wt_p = rgap = plvalue = lny = wlny = info = areaid = nhh = NULL fclose(in) block = block[2..rows(block),.] _sort(block, (2,1)) info = panelsetup(block,2) rinfo = rows(info) outsim = J(1, 4+(cols(block)-4)*2, .) for (i=1; i<=rinfo; i++) { rr = info[i,1],5 \ info[i,2],. out = quadmeanvariance(block[|rr|]) outsim = outsim \ info[i,2]-info[i,1]+1, block[info[i,1],2::4], out[1,.], sqrt(diagonal(out[2..rows(out),.])') } out = block = NULL outsim = outsim[2..rows(outsim),.] stata("clear") (void) st_addvar("double", rmatnames) st_addobs(rows(outsim)) st_store(.,.,outsim) outsim = NULL } // need to add different options for weights // function to process tabulations for ydump, save output to dta void _s2sc_stats(string scalar ydump, string scalar plines, string scalar aggids, string scalar areavar, string scalar wgtvar) { agglist = strtoreal(tokens(aggids)) indlist = tokens(st_local("indicators")) addvars = tokens(st_local("catvar")) naddvar = cols(addvars) convars = tokens(st_local("contvar")) nconvar = cols(convars) pl = strtoreal(tokens(plines)) plreal = 1 if (missing(pl)>0) { pl = tokens(plines) plreal = 0 } in = fopen(ydump, "r") size = fgetmatrix(in) varname = fgetmatrix(in) p0 = ftell(in) id = fgetmatrix(in) p1 = ftell(in) id = NULL area = _fgetcoldata(_fvarindex(areavar, varname), in, p0, p1-p0) wt0 = _fgetcoldata(_fvarindex(wgtvar, varname), in, p0, p1-p0) p2 = 2*(p1-p0)+p1 nobs = rows(wt0) nagg = cols(agglist) npovlines = cols(pl) info = areaid = J(1, nagg, NULL) nrow = J(1, nagg, .) for (j=1; j<=nagg; j++) { idfake = trunc(area/10^agglist[j]) info[j] = &(panelsetup(idfake, 1)) areaid[j] = &(idfake[(*info[j])[(1::rows(*info[j])),2],.]) nrow[j] = rows(*info[j]) } idfake = NULL ffgtall = J(quadsum(nrow)+1,1,.) ncells = 0 rmatnames = "nSim", "Unit", "Povline" senames = "se" if (naddvar > 0) { addlvl = add = J(1, naddvar, NULL) for (v=1; v<=naddvar; v++) { add[.,v] = &(_fgetcoldata(_fvarindex(addvars[v], varname), in, p0, p1-p0)) addlvl[v] = &(_fintlevelsof(*add[.,v])) ncells = ncells + rows(*addlvl[v]) for (cat=1; cat<=rows(*addlvl[v]); cat++) { for (c=0; c<=1; c++) { rmatnames = rmatnames, "avg_poor_" + strofreal(c) + "_" + addvars[v] + "_" + strofreal(cat) + "_" + "r" senames = senames, "se_poor_" + strofreal(c) + "_" + addvars[v] + "_" + strofreal(cat) + "_" + "r" } } } } if (nconvar > 0) { cont = J(1, nconvar, NULL) for (v=1; v<=nconvar; v++) { ncells = ncells + 1 cont[.,v] = &(_fgetcoldata(_fvarindex(convars[v], varname), in, p0, p1-p0)) for (c=0; c<=1; c++) { rmatnames = rmatnames, "avg_poor_" + strofreal(c) + "_" + convars[v] senames = senames, "se_poor_" + strofreal(c) + "_" + convars[v] } } } rmatnames = rmatnames, senames[2..cols(senames)] if (npovlines>0 & plreal==0) { plvalue = J(1,npovlines, NULL) for (l=1; l<=npovlines; l++) plvalue[l] = &(_fgetcoldata(_fvarindex(pl[l], varname), in, p0, p1-p0)) } printf("{txt}\nComputing statistics for {res}%g{txt} simulation(s).\n", size[4]) display("{txt}{hline 4}{c +}{hline 3} 1 " + "{hline 3}{c +}{hline 3} 2 " + "{hline 3}{c +}{hline 3} 3 " + "{hline 3}{c +}{hline 3} 4 " + "{hline 3}{c +}{hline 3} 5 ") fseek(in, p2, -1) fcell = J(1,3+2*ncells,.) for (sim=1; sim<=size[4]; sim++) { y = fgetmatrix(in) wt = wt0 if (colmissing(y)>0) { index = selectindex(rowmissing(y0)) wt[index,.] = J(rows(index),1,0) _editmissing(y, 0) } index = NULL //wt_running = quadrunningsum(wt,0) block0 = J(1,3,.) for (l=1; l<=npovlines; l++) { if (plreal==1) plv = pl[l] else plv = l for (j=1; j<=nagg; j++) { if (nrow[j] >=2) block0 = block0 \ (J(nrow[j],1,sim), *areaid[j], J(nrow[j],1, plv)) else block0 = block0 \ (sim , 0, plv) } } block0 = block0[2::rows(block0),.] if (naddvar > 0) { fcell1 = J(quadsum(nrow)*npovlines,1,.) for (v=1; v<=naddvar; v++) { lvl = rows(*addlvl[v]) cumwt = J(3, lvl, NULL) cum = J(1, 2, NULL) fcell0 = J(1, 2*lvl, .) for (l=1; l<=npovlines; l++) { cum[1] = &(J(nobs, 1, 0)) cum[2] = &(J(nobs, 1, 0)) if (plreal==1) dummy = y:<= pl[l] else dummy = y:<= *plvalue[l] for (cat=1; cat<=lvl; cat++) { cumwt[1,cat] = &(quadrunningsum(wt:* (dummy:==0) :* (*add[.,v]:==(*addlvl[v])[cat]),0)) cumwt[2,cat] = &(quadrunningsum(wt:* (dummy:==1) :* (*add[.,v]:==(*addlvl[v])[cat]),0)) cumwt[3,cat] = &(*cumwt[1,cat] :+ *cumwt[2,cat]) cum[1] = &(*cum[1] :+ *cumwt[1,cat]) cum[2] = &(*cum[2] :+ *cumwt[2,cat]) } for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { rowcell = J(nrow[j],1,.) for (cat=1; cat<=lvl; cat++) { for (c=1; c<=2; c++) { cell = (*cumwt[c,cat])[(*info[j])[1,2],.] \ (*cumwt[c,cat])[(*info[j])[(2::nrow[j]),2],.] - (*cumwt[c,cat])[(*info[j])[(1::nrow[j]-1),2],.] rtot = (*cum[c])[(*info[j])[1,2],.] \ (*cum[c])[(*info[j])[(2::nrow[j]),2],.] - (*cum[c])[(*info[j])[(1::nrow[j]-1),2],.] rowcell = rowcell, (cell:/rtot) } } fcell0 = fcell0 \ rowcell[.,2::cols(rowcell)] } else { r0cell = J(1,1,.) for (cat=1; cat<=lvl; cat++) { for (c=1; c<=2; c++) r0cell = r0cell, (*cumwt[c,cat])[nobs,.] :/ (*cum[c])[nobs,.] } fcell0 = fcell0 \ r0cell[.,2::cols(r0cell)] } } //nagg } //npovlines fcell1 = fcell1, fcell0[2::rows(fcell0),.] fcell0 = NULL } //addvars fcell1 = fcell1[.,2::cols(fcell1)] cum = cumwt = dummy = NULL } //if naddvar if (nconvar > 0) { fgtcont = J(quadsum(nrow)*npovlines,1,.) for (v=1; v<=nconvar; v++) { fgtx = J(1,2,.) for (l=1; l<=npovlines; l++) { if (plreal==1) dummy = y:<= pl[l] else dummy = y:<= *plvalue[l] curr1 = quadrunningsum(wt:* (dummy:==1) ,0), quadrunningsum(wt:* *cont[.,v] :* (dummy:==1) ,0) curr0 = quadrunningsum(wt:* (dummy:==0) ,0), quadrunningsum(wt:* *cont[.,v] :* (dummy:==0) ,0) for (j=1; j<=nagg; j++) { if (nrow[j] >=2) { A = curr0[(*info[j])[1,2],.] \ curr0[(*info[j])[(2::nrow[j]),2],.] - curr0[(*info[j])[(1::nrow[j]-1),2],.] B = curr1[(*info[j])[1,2],.] \ curr1[(*info[j])[(2::nrow[j]),2],.] - curr1[(*info[j])[(1::nrow[j]-1),2],.] fgtx = fgtx \ (A[.,2] :/ A[.,1], B[.,2] :/ B[.,1]) } else fgtx = fgtx \ (curr0[nobs,2]:/ curr0[nobs,1], curr1[nobs,2]:/ curr1[nobs,1]) } //nagg A = B = NULL } //povlines fgtcont = fgtcont, fgtx[2::rows(fgtx),.] } //nconvar fgtcont = fgtcont[.,2::cols(fgtcont)] curr1 = curr0 = dummy = fgtx = NULL } //if nconvar if (naddvar > 0 & nconvar > 0) fcell = fcell \ (block0, fcell1, fgtcont) if (naddvar > 0 & nconvar ==0) fcell = fcell \ (block0, fcell1) if (naddvar ==0 & nconvar > 0) fcell = fcell \ (block0, fgtcont) fcell1 = fgtcont = block0 = NULL printf(".") if (mod(sim,50)==0) printf(" %5.0f\n",sim) displayflush() } //sim block0 = y = wt0 = wt = area = wy = dummy = plvalue = info = areaid = addlvl = add = cont= NULL fclose(in) fcell = fcell[2::rows(fcell),.] _sort(fcell, (3,2,1)) info = panelsetup(strofreal(fcell[.,2])+strofreal(fcell[.,3]),1) rinfo = rows(info) outsim = J(1, 3+(cols(fcell)-3)*2, .) for (i=1; i<=rinfo; i++) { rr = info[i,1],4 \ info[i,2],. out = quadmeanvariance(fcell[|rr|]) outsim = outsim \ info[i,2]-info[i,1]+1, fcell[info[i,1],2::3], out[1,.], sqrt(diagonal(out[2..rows(out),.])') } out = fcell = NULL outsim = outsim[2..rows(outsim),.] stata("clear") (void) st_addvar("double", rmatnames) st_addobs(rows(outsim)) st_store(.,.,outsim) outsim = NULL } //Generalized entropy index GE(0)=MLD, GE(1)= Theil function _fGE(y, w, alpha) { if (alpha==0) return(-quadcolsum(w:*ln(y:/mean(y,w))):/quadcolsum(w)) else if (alpha==1) return(quadcolsum(w:*((y:/mean(y,w)):*ln(y:/mean(y,w)))):/quadcolsum(w)) else return((1/(alpha*(alpha-1))):*(quadcolsum(w:*(((y:/mean(y,w)):^alpha):-1)):/quadcolsum(w))) } //Gini coefficient (fastgini formula) x and w are vectors function _fGini(x, w) { t = x,w _sort(t,1) x=t[.,1] w=t[.,2] xw = x:*w rxw = quadrunningsum(xw) :- (xw:/2) return(1- 2*((quadcross(rxw,w)/quadcross(x,w))/quadcolsum(w))) } //Gini coefficient (fastgini formula) x and w are vectors function _fGininew(x, w) { t = x,w _collate(t,1) xw = t[.,1]:*t[.,2] rxw = quadrunningsum(t[.,1]:*t[.,2]) :- ((t[.,1]:*t[.,2]):/2) return(1- 2*((quadcross(rxw,t[.,2])/quadcross(t[.,1],t[.,2]))/quadcolsum(t[.,2]))) } //Gini coefficient (fastgini formula) x is MATRIX, w is vector function _fGinis(x, w) { out = J(1,0,.) ncols = cols(x) for(i=1;i<=ncols;i++) out = out, _fGini(x[.,i], w) return(out) } // function to return 1 if matrix is between z0, z1 function _franges(inc, z0, z1) { if (allof(z0,0)==1) { return(inc:< z1) } else { a = z0 :<= inc b = inc:< z1 c = a:+b return(c:==2) } } //FGT functions function _fFGT(inc, z1, wt, alpha) { return(mean(((inc:=top) return(mean(x2[.,1], x2[.,2])) } //ratio of bottom mean over all mean function _fratio(x, w, btm) { x1 = x, w, _fpctile(x, 100, w) x2 = select(x1, x1[.,cols(x1)]:<=btm) return(mean(x2[.,1], x2[.,2])/mean(x,w)) } //Block processing function void _s2sc_sim_break(string scalar xvar, string scalar zvars, string scalar yhats, string scalar yhats2, string scalar areavar, string scalar wgt, string scalar touse, string scalar hhid) { sim = strtoreal(st_local("rep")) seed = strtoreal(st_local("seed")) h3 = strtoreal(st_local("h3")) if (st_local("nolocation")~="") h3 = 2 hheff = strtoreal(st_local("hheffs")) boots = strtoreal(st_local("boots")) etanorm = strtoreal(st_local("etanormal")) epsnorm = strtoreal(st_local("epsnormal")) EB = strtoreal(st_local("ebest")) pline = strtoreal(st_local("pline")) lg = strtoreal(st_local("lny")) ydump = strtoreal(st_local("ydump1")) res = strtoreal(st_local("results1")) fgt0d = strtoreal(st_local("fgt0")) fgt1d = strtoreal(st_local("fgt1")) fgt2d = strtoreal(st_local("fgt2")) mem = strtoreal(st_local("maxmem")) mem = floor((mem/8/sim)) external bsim, asim, maxA, varr, sigma, v_sigma, sigma_eps, locerr, hherr, locerr2 //The data st_view(x ,.,tokens(xvar), touse) st_view(z1 ,.,tokens(zvars), touse) st_view(yh ,.,tokens(yhats), touse) st_view(yh2 ,.,tokens(yhats2), touse) st_view(area,.,tokens(areavar), touse) st_view(wt,.,tokens(wgt), touse) st_view(id ,.,tokens(hhid), touse) if (st_local("cuts")!="") st_view(cut ,.,tokens(st_local("cuts")),touse) if (st_local("addvars")!="") st_view(add ,.,tokens(st_local("addvars")),touse) if (pline==.) st_view(povline,.,tokens(st_local("pline")), touse) totmem=rows(x) if (hheff==1) { //zalpha condition if ((allof(z1,.)==1) & (allof(yh,.)==1) & (allof(yh2,.)==1)) zcond = "000" //0,0,0 if ((allof(z1,.)==1) & (allof(yh,.)==1) & (allof(yh2,.)==0)) zcond = "001" //0,0,1 if ((allof(z1,.)==1) & (allof(yh,.)==0) & (allof(yh2,.)==1)) zcond = "010" //0,1,0 if ((allof(z1,.)==1) & (allof(yh,.)==0) & (allof(yh2,.)==0)) zcond = "011" //0,1,1 if ((allof(z1,.)==0) & (allof(yh,.)==1) & (allof(yh2,.)==1)) zcond = "100" //1,0,0 if ((allof(z1,.)==0) & (allof(yh,.)==1) & (allof(yh2,.)==0)) zcond = "101" //1,0,1 if ((allof(z1,.)==0) & (allof(yh,.)==0) & (allof(yh2,.)==1)) zcond = "110" //1,1,0 if ((allof(z1,.)==0) & (allof(yh,.)==0) & (allof(yh2,.)==0)) zcond = "111" //1,1,1 } //sort is done before and setup area panel info = panelsetup(area,1) if((any((info[.,2]-info[.,1]):>mem))) _error("One of your cluster's memory requirements exceeds the amount of memory you specified, increase maxmem") infi2 = _fuptohere(mem,info, totmem) inf2 = panelsetup(infi2,1) //Prepare ydump file if (ydump==1) { yd = fopen(st_local("ydump"),"rw") //append the weight variable for ydump numcol = cols(area) + cols(wt) + cols(id) if (st_local("cuts")!="") numcol = numcol + cols(cut) if (st_local("addvars")!="") numcol = numcol + cols(add) fputmatrix(yd, st_local("ydnames")) fputmatrix(yd, numcol) fputmatrix(yd, sim) fputmatrix(yd, area) fputmatrix(yd, wt) fputmatrix(yd, id) if (st_local("cuts")!="") fputmatrix(yd, cut) if (st_local("addvars")!="") fputmatrix(yd, add) } if (res==1) { indis = fopen(st_local("results"),"rw") unit = J(rows(info),3,.) fputmatrix(indis, ("Area Observations Weights")) fputmatrix(indis, 3) fputmatrix(indis,fgt0d) fputmatrix(indis,fgt1d) fputmatrix(indis,fgt2d) } if (fgt0d==1) fgt0 = J(rows(info),sim,.) if (fgt1d==1) fgt1 = J(rows(info),sim,.) if (fgt2d==1) fgt2 = J(rows(info),sim,.) if (boots==0) { display("Parametric drawing of betas") maggie=0 for(i=1;i<=rows(inf2);i++) { infin = info[|inf2[i,1],.\inf2[i,2],.|] panelsubview(xi,x,.,infin) panelsubview(zi,z1,.,infin) panelsubview(yhi,yh,.,infin) panelsubview(yh2i,yh2,.,infin) panelsubview(areai,area,.,infin) panelsubview(wti,wt,.,infin) if (pline==.) panelsubview(povi,povline,.,infin) if (fgt0d==1) fgt0i = J(rows(infin),sim,.) if (fgt1d==1) fgt1i = J(rows(infin),sim,.) if (fgt2d==1) fgt2i = J(rows(infin),sim,.) if (res==1) uniti = J(rows(infin),3,.) lisa = panelsetup(areai,1) if (i!=1) rseed(lastseed1) if ((epsnorm==1) & (hheff==1)) { if (zcond == "100") xb = (zi,J(rows(zi),1,1))*asim' if (zcond == "110") xb = zi*asim[|.,1\.,cols(zi)|]' + (yhi*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi)|]'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' if (zcond == "111") xb = zi*asim[|.,1\.,cols(zi)|]' + (yhi*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi)|]'):*((xi,J(rows(xi),1,1))*bsim') + (yh2i*asim[|.,1+cols(yhi) \ .,cols(yhi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' if (zcond == "010") xb = (yhi*asim[.,cols(asim)-1]'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' if (zcond == "011") xb = (yhi*asim[.,1..cols(yhi)]'):*((xi,J(rows(xi),1,1))*bsim') + (yh2i*asim[|.,1+cols(yhi) \ .,cols(yhi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim')+ J(rows(yhi),1,1)*asim[.,cols(asim)]' if (zcond == "001") xb = (yh2i*asim[.,1..cols(yh2i)]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yh2i),1,1)*asim[.,cols(asim)]' if (zcond == "101") xb = zi*asim[|.,1 \.,cols(zi)|]' + (yh2i*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yh2i),1,1)*asim[.,cols(asim)]' xb =exp(xb) xb = (maxA[1,1]*(xb:/(1:+xb))) + (.5*varr[1,1])*((maxA[1,1]*(xb:*(1:-xb))):/((1:+xb):^3)) xb = rnormal(1,1,0,sqrt(xb)) + ((xi,J(rows(xi),1,1))*bsim') } if (epsnorm==0) xb = (xi,J(rows(xi),1,1))*bsim'+ _f_sampleeps(sim, rows(xi), hherr) if ((epsnorm==1) & (hheff==0)) xb = (xi,J(rows(xi),1,1))*bsim'+ rnormal(rows(xi),sim,0,sqrt(sigma_eps[1,1])) //to ensure that when we partition blocks we get the same seeds lastseed1=rseed() if (i!=1) rseed(lastseed2) for(j=1;j<=rows(lisa);j++) { if (etanorm==1) xb[|lisa[j,1],. \ lisa[j,2],.|] = xb[|lisa[j,1],. \ lisa[j,2],.|]:+ rnormal(1,1,0,sqrt(locerr')) if (etanorm==0) xb[|lisa[j,1],. \ lisa[j,2],.|] = xb[|lisa[j,1],. \ lisa[j,2],.|]:+ _f_sampleeps(sim,1, locerr) if ((fgt0d==1) & (lg==1)) fgt0i[j,.] = _fFGT(exp(xb[|lisa[j,1],.\lisa[j,2],.|]), povi[|lisa[j,1],1 \ lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],0) if ((fgt0d==1) & (lg==0)) fgt0i[j,.] = _fFGT((xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],0) if ((fgt1d==1) & (lg==1)) fgt1i[j,.] = _fFGT(exp(xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],1) if ((fgt1d==1) & (lg==0)) fgt1i[j,.] = _fFGT((xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],1) if ((fgt2d==1) & (lg==1)) fgt2i[j,.] = _fFGT(exp(xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],2) if ((fgt2d==1) & (lg==0)) fgt2i[j,.] = _fFGT((xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],2) uniti[j,.] = areai[lisa[j,1],1], rows(wti[|lisa[j,1],1\lisa[j,2],1|]), quadsum(wti[|lisa[j,1],1\lisa[j,2],1|]) maggie=maggie+1 } //Outputs for povmap if (ydump==1) fputmatrix(yd, xb) if (fgt0d==1) fgt0[|inf2[i,1],. \ inf2[i,2],.|] = fgt0i if (fgt1d==1) fgt1[|inf2[i,1],. \ inf2[i,2],.|] = fgt1i if (fgt2d==1) fgt2[|inf2[i,1],. \ inf2[i,2],.|] = fgt2i if (res==1) unit[|inf2[i,1],. \ inf2[i,2],.|] = uniti //to ensure that when we partition blocks we get the same seeds lastseed2=rseed() } if ((res==1)){ fputmatrix(indis,(unit)) if (fgt0d==1){ fgt0 = quadmeanvariance(fgt0') fgt0 = fgt0[1,.]',sqrt(diagonal(fgt0[|2,1 \ .,.|])) fputmatrix(indis,fgt0) } if (fgt1d==1){ fgt1 = quadmeanvariance(fgt1') fgt1 = fgt1[1,.]',sqrt(diagonal(fgt1[|2,1 \ .,.|])) fputmatrix(indis,fgt1) } if (fgt2d==1){ fgt2 = quadmeanvariance(fgt2') fgt2 = fgt2[1,.]',sqrt(diagonal(fgt2[|2,1 \ .,.|])) fputmatrix(indis,fgt2) } } } //boots==0 if (boots==1) { display("Bootstrapped drawing of betas and parameters") for(i=1;i<=rows(inf2);i++) { infin = info[|inf2[i,1],.\inf2[i,2],.|] panelsubview(xi,x,.,infin) panelsubview(zi,z1,.,infin) panelsubview(yhi,yh,.,infin) panelsubview(yh2i,yh2,.,infin) panelsubview(areai,area,.,infin) panelsubview(wti,wt,.,infin) if (pline==.) panelsubview(povi,povline,.,infin) if (fgt0d==1) fgt0i = J(rows(infin),sim,.) if (fgt1d==1) fgt1i = J(rows(infin),sim,.) if (fgt2d==1) fgt2i = J(rows(infin),sim,.) if (res==1) uniti = J(rows(infin),3,.) lisa = panelsetup(areai,1) if (EB==1) { cens_area = J(rows(lisa),1,.) for(r=1;r<=rows(lisa);r++){ cens_area[r,1]=areai[lisa[r,1],1] } D = _ebcensus4(cens_area,locerr,locerr2, lisa) } if (i!=1) rseed(lastseed1) if ((epsnorm==1) & (hheff==1)) { if (zcond == "100") xb = (zi,J(rows(zi),1,1))*asim' if (zcond == "110") xb = zi*asim[|.,1\.,cols(zi)|]' + (yhi*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi)|]'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' if (zcond == "111") xb = zi*asim[|.,1\.,cols(zi)|]' + (yhi*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yhi)|]'):*((xi,J(rows(xi),1,1))*bsim') + (yh2i*asim[|.,1+cols(yhi) \ .,cols(yhi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' if (zcond == "010") xb = (yhi*asim[.,cols(asim)-1]'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yhi),1,1)*asim[.,cols(asim)]' if (zcond == "011") xb = (yhi*asim[.,1..cols(yhi)]'):*((xi,J(rows(xi),1,1))*bsim') + (yh2i*asim[|.,1+cols(yhi) \ .,cols(yhi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim')+ J(rows(yhi),1,1)*asim[.,cols(asim)]' if (zcond == "001") xb = (yh2i*asim[.,1..cols(yh2i)]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yh2i),1,1)*asim[.,cols(asim)]' if (zcond == "101") xb = (zi*asim[|.,1 \.,cols(zi)|]')+ (yh2i*asim[|.,1+cols(zi) \ .,cols(zi)+cols(yh2i)|]'):*((xi,J(rows(xi),1,1))*bsim'):*((xi,J(rows(xi),1,1))*bsim') + J(rows(yh2i),1,1)*asim[.,cols(asim)]' xb = exp(xb) xb = (maxA':*(xb:/(1:+xb))) + (.5*varr'):*((maxA':*(xb:*(1:-xb))):/((1:+xb):^3)) xb = rnormal(1,1,0,sqrt(xb)) + ((xi,J(rows(xi),1,1))*bsim') } if (epsnorm==0) xb = (xi,J(rows(xi),1,1))*bsim'+ _f_sampleeps(sim, rows(xi), hherr) if ((epsnorm==1) & (hheff==0)) xb = (xi,J(rows(xi),1,1))*bsim'+ rnormal(rows(xi),1,0,sqrt(sigma_eps)') //to ensure that when we partition blocks we get the same seeds lastseed1=rseed() if (i!=1) rseed(lastseed2) //ETA PARTS for(j=1;j<=rows(lisa);j++) { if ((EB==0) & (etanorm==0)) xb[|lisa[j,1],. \ lisa[j,2],.|] = xb[|lisa[j,1],. \ lisa[j,2],.|]:+_f_sampleeps(sim,1, locerr) if ((EB==0) & (etanorm==1)) xb[|lisa[j,1],. \ lisa[j,2],.|] = xb[|lisa[j,1],. \ lisa[j,2],.|]:+rnormal(1,1,0,sqrt(locerr')) if (EB==1) { dd = rowshape(D[j,.]',sim) xb[|lisa[j,1],. \ lisa[j,2],.|] = xb[|lisa[j,1],. \ lisa[j,2],.|]:+rnormal(1,1,dd[.,1]',sqrt(dd[.,2]')) } if ((fgt0d==1) & (lg==1)) fgt0i[j,.] = _fFGT(exp(xb[|lisa[j,1],.\lisa[j,2],.|]), povi[|lisa[j,1],1 \ lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],0) if ((fgt0d==1) & (lg==0)) fgt0i[j,.] = _fFGT((xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],0) if ((fgt1d==1) & (lg==1)) fgt1i[j,.] = _fFGT(exp(xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],1) if ((fgt1d==1) & (lg==0)) fgt1i[j,.] = _fFGT((xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],1) if ((fgt2d==1) & (lg==1)) fgt2i[j,.] = _fFGT(exp(xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],2) if ((fgt2d==1) & (lg==0)) fgt2i[j,.] = _fFGT((xb[|lisa[j,1],.\lisa[j,2],.|]),povi[|lisa[j,1],1\lisa[j,2],1|],wti[|lisa[j,1],1\lisa[j,2],1|],2) uniti[j,.] = areai[lisa[j,1],1], rows(wti[|lisa[j,1],1\lisa[j,2],1|]), quadsum(wti[|lisa[j,1],1\lisa[j,2],1|]) } //Outputs for povmap if (ydump==1) fputmatrix(yd, xb) if (fgt0d==1) fgt0[|inf2[i,1],. \ inf2[i,2],.|] = fgt0i if (fgt1d==1) fgt1[|inf2[i,1],. \ inf2[i,2],.|] = fgt1i if (fgt2d==1) fgt2[|inf2[i,1],. \ inf2[i,2],.|] = fgt2i if (res==1) unit[|inf2[i,1],. \ inf2[i,2],.|] = uniti //to ensure that when we partition blocks we get the same seeds lastseed2=rseed() } if (res==1){ fputmatrix(indis,(unit)) if (fgt0d==1){ fgt0 = quadmeanvariance(fgt0') fgt0 = fgt0[1,.]',sqrt(diagonal(fgt0[|2,1 \ .,.|])) fputmatrix(indis,fgt0) } if (fgt1d==1){ fgt1 = quadmeanvariance(fgt1') fgt1 = fgt1[1,.]',sqrt(diagonal(fgt1[|2,1 \ .,.|])) fputmatrix(indis,fgt1) } if (fgt2d==1){ fgt2 = quadmeanvariance(fgt2') fgt2 = fgt2[1,.]',sqrt(diagonal(fgt2[|2,1 \ .,.|])) fputmatrix(indis,fgt2) } } } //boots==1 if (ydump==1) fclose(yd) if (res==1) fclose(indis) st_local("marge", strofreal(rows(info))) } //Function to select cell numbers function _fuptohere(mem, info, totmem) { wiggins = ceil(totmem/mem) krusty = select((info[.,2]:<=mem),(info[.,2]:<=mem)) for(i=2;i<=wiggins;i++) { krusty = krusty\(select((info[|(rows(krusty)+1),2\rows(info),2|]:<=(mem*i)),(info[|(rows(krusty)+1),2\rows(info),2|]:<=(mem*i))):*i) } return(krusty) } //Process indicators function _f_inds() { res = fopen(st_local("results"), "r") names = fgetmatrix(res) numcol = fgetmatrix(res) f0 = fgetmatrix(res) f1 = fgetmatrix(res) f2 = fgetmatrix(res) yhat = fgetmatrix(res) if (f0==1) yhat=yhat,fgetmatrix(res) if (f1==1) yhat=yhat,fgetmatrix(res) if (f2==1) yhat=yhat,fgetmatrix(res) fclose(res) return(yhat) } mata mlib create lsae_povmap, dir("`c(sysdir_plus)'l") replace mata mlib add lsae_povmap *(), dir("`c(sysdir_plus)'l") mata mlib index end