//////////////////////////////////////////////////////////////////////////////// // STATA FOR Chiang, H., Kato, K., Ma, Y. & Sasaki, Y. (2021): Multiway // Cluster Robust Double/Debiased Machine Learning. Journal of Business & // Economic Statistics, forthcoming. //////////////////////////////////////////////////////////////////////////////// program define crhdreg, eclass version 14.2 syntax varlist(numeric min=3) [if] [in] [, cluster1(varname) cluster2(varname) iv(varlist) dimension(real 1) folds(real 1) resample(real 10) median alpha(real 1) tol(real 0.000001) maxiter(real 1000)] marksample touse gettoken depvar indepvars : varlist _fv_check_depvar `depvar' fvexpand `indepvars' local cnames `r(varlist)' local varnames local iter = 0 foreach var of varlist `varlist' { if( 1 <= `iter' & `iter' <= `dimension' ){ local varnames `varnames' "`var'" } local iter = `iter' + 1 } local rob = 1 local fsa = "median" if "`median'" == "" { // Median version of finite-sample adjustment local rob = 0 local fsa = "mean" } tempname b V N p ways K G1 G2 //////////////////////////////////////////////////////////////////////////// // Regression if "`iv'" == "" & "`cluster1'" == "" { mata: reg0("`depvar'", "`cnames'", "`touse'", /// `dimension', `folds', `alpha', `resample', `rob', `tol', `maxiter', /// "`b'", "`V'", "`N'", "`p'", "`ways'", "`K'", "`G1'", "`G2'") } //////////////////////////////////////////////////////////////////////////// // IV Regression if "`iv'" != "" & "`cluster1'" == "" { //tempvar ivvar //gen `ivvar' = `iv' mata: ivreg0("`depvar'", "`cnames'", "`touse'", "`iv'", /// `dimension', `folds', `alpha', `resample', `rob', `tol', `maxiter', /// "`b'", "`V'", "`N'", "`p'", "`ways'", "`K'", "`G1'", "`G2'") } //////////////////////////////////////////////////////////////////////////// // 1-Way Cluster Robust Regression if "`iv'" == "" & "`cluster1'" != "" & "`cluster2'" == "" { tempvar c1 gen `c1' = `cluster1' mata: reg1("`depvar'", "`cnames'", "`touse'", "`c1'", /// `dimension', `folds', `alpha', `resample', `rob', `tol', `maxiter', /// "`b'", "`V'", "`N'", "`p'", "`ways'", "`K'", "`G1'", "`G2'") } //////////////////////////////////////////////////////////////////////////// // 1-Way Cluster Robust IV Regression if "`iv'" != "" & "`cluster1'" != "" & "`cluster2'" == "" { //tempvar ivvar //gen `ivvar' = `iv' tempvar c1 gen `c1' = `cluster1' mata: ivreg1("`depvar'", "`cnames'", "`touse'", "`iv'", "`c1'", /// `dimension', `folds', `alpha', `resample', `rob', `tol', `maxiter', /// "`b'", "`V'", "`N'", "`p'", "`ways'", "`K'", "`G1'", "`G2'") } //////////////////////////////////////////////////////////////////////////// // 2-Way Cluster Robust Regression if "`iv'" == "" & "`cluster1'" != "" & "`cluster2'" != "" { tempvar c1 gen `c1' = `cluster1' tempvar c2 gen `c2' = `cluster2' mata: reg2("`depvar'", "`cnames'", "`touse'", "`c1'", "`c2'", /// `dimension', `folds', `alpha', `resample', `rob', `tol', `maxiter', /// "`b'", "`V'", "`N'", "`p'", "`ways'", "`K'", "`G1'", "`G2'") } //////////////////////////////////////////////////////////////////////////// // 2-Way Cluster Robust IV Regression if "`iv'" != "" & "`cluster1'" != "" & "`cluster2'" != "" { //tempvar ivvar //gen `ivvar' = `iv' tempvar c1 gen `c1' = `cluster1' tempvar c2 gen `c2' = `cluster2' mata: ivreg2("`depvar'", "`cnames'", "`touse'", "`iv'", "`c1'", "`c2'", /// `dimension', `folds', `alpha', `resample', `rob', `tol', `maxiter', /// "`b'", "`V'", "`N'", "`p'", "`ways'", "`K'", "`G1'", "`G2'") } matrix colnames `b' = `varnames' matrix colnames `V' = `varnames' matrix rownames `V' = `varnames' ereturn post `b' `V', esample(`touse') buildfvinfo ereturn scalar N = `N' ereturn scalar ways = `ways' ereturn scalar G1 = `G1' ereturn scalar G2 = `G2' ereturn scalar dimD = `dimension' ereturn scalar dimX = `p' ereturn scalar K = `K' ereturn scalar alpha = `alpha' ereturn scalar fsa_n = `resample' ereturn local cmd "crhdreg" ereturn local cluster1 "`cluster1'" ereturn local cluster2 "`cluster2'" ereturn local iv "`iv'" ereturn local fsa_m "`fsa'" //Finite sample adjustment method ereturn display di "* crhdreg is based on Chiang, H., K. Kato, Y. Ma, & Y. Sasaki (2022) Multiway" di " Cluster Robust Double/Debiased Machine Learning. Journal of Business" di " & Economic Statistics, 40(3), pp. 1046-1056." end mata: //////////////////////////////////////////////////////////////////////////////// // ELASTIC NET OBJECTIVE FUNCTION //////////////////////////////////////////////////////////////////////////////// void objective(todo,beta,Y,X,lambda,alpha,crit,g,H){ crit = ( mean((Y:-X*beta'):^2)/2 + lambda * ((1-alpha)*sum(beta:^2)/2 + alpha*sum(abs(beta)) ) ) } //////////////////////////////////////////////////////////////////////////////// // GLMNET FUNCTION //////////////////////////////////////////////////////////////////////////////// real vector glmnet(Y,X,lambda,alpha,TOLERANCE,MAXITER){ S = optimize_init() optimize_init_evaluator(S,&objective()) optimize_init_which(S,"min") optimize_init_evaluatortype(S, "d0") optimize_init_technique(S,"nm") optimize_init_nmsimplexdeltas(S,1) optimize_init_singularHmethod(S,"hybrid") optimize_init_argument(S,1,Y) optimize_init_argument(S,2,X) optimize_init_argument(S,3,lambda) optimize_init_argument(S,4,alpha) optimize_init_params(S, J(1,cols(X),0)) optimize_init_conv_ptol(S,TOLERANCE) optimize_init_conv_maxiter(S,MAXITER) optimize_init_conv_warning(S,"off") optimize_init_tracelevel(S,"none") est=optimize(S) return( est' ) } //////////////////////////////////////////////////////////////////////////////// // GET SCALE FUNCTION //////////////////////////////////////////////////////////////////////////////// real matrix get_scale(X){ sd = J(cols(X),1,0) for( idx = 1 ; idx <= cols(X) ; idx++ ){ sd[idx,1] = variance(X[,idx])^0.5 } return( sd ) } //////////////////////////////////////////////////////////////////////////////// // SCALE FUNCTION //////////////////////////////////////////////////////////////////////////////// real matrix scale(X){ for( idx = 1 ; idx <= cols(X) ; idx++ ){ X[,idx] = (X[,idx] :- mean(X[,idx])):/(variance(X[,idx])^0.5) } return( X ) } //////////////////////////////////////////////////////////////////////////////// // MEDIAN FUNCTION //////////////////////////////////////////////////////////////////////////////// real matrix median(x){ return ( sort(x,1)[ceil(rows(x)/2),1] ) } //////////////////////////////////////////////////////////////////////////////// // REGRESSION FOR 0 WAY CLUSTERING ///////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// void reg0( string scalar yv, string scalar dxv, string scalar touse, real scalar dimtheta, real scalar K, real scalar alpha, real scalar num_resample, real scalar robust, real scalar TOLERANCE, real MAXITER, string scalar bname, string scalar Vname, string scalar nname, string scalar pname, string scalar wname, string scalar fname, string scalar g1name, string scalar g2name) { //////////////////////////////////////////////////////////////////////////// // DATA ORGANIZATION real vector y, d, x real scalar n, p Y = st_data(., yv, touse) dx = st_data(., dxv, touse) N = rows(dx) D = dx[.,1..dimtheta] Z = D X = dx[.,(dimtheta+1)..(cols(dx))] p = cols(X) if( K <= 1 ){ K = 5 } sdD = get_scale(D) sdZ = get_scale(Z) D = scale(D) Z = scale(Z) X = scale(X) list_thetahat = J(dimtheta,num_resample,0) list_Var = J(dimtheta,dimtheta*num_resample,0) for( iter = 1 ; iter <= num_resample ; iter++ ){ printf("\nResampled Cross-Fitting: Iteration %3.0f/%f",iter,num_resample) fold = ceil(uniform(N,1)*K) //////////////////////////////////////////////////////////////////////// // DML lambda_Z = J(dimtheta,1,0) lambda_D = J(dimtheta,1,0) for( idx = 1 ; idx <= dimtheta ; idx++ ){ lambda_Z[idx,1] = 0.01*variance(Z[,idx])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) lambda_D[idx,1] = 0.01*variance(D[,idx])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) } lambda_Y = 0.01*variance(Y[,1])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) pre_Z = Z res_Z = Z pre_D = D res_D = D pre_Y = Y res_Y = Y for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(fold:!=kdx) for( idx = 1 ; idx <= dimtheta ; idx++ ){ xihat = glmnet(Z[indices,idx],(X,J(N,1,1))[indices,],lambda_Z[idx,1],alpha,TOLERANCE,MAXITER) pre_Z[indices,idx] = (X,J(N,1,1))[indices,] * xihat res_Z[indices,idx] = Z[indices,idx] - pre_Z[indices,idx] } for( idx = 1 ; idx <= dimtheta ; idx++ ){ gammahat = glmnet(D[indices,idx],(X,J(N,1,1))[indices,],lambda_D[idx,1],alpha,TOLERANCE,MAXITER) pre_D[indices,idx] = (X,J(N,1,1))[indices,] * gammahat res_D[indices,idx] = D[indices,idx] - pre_D[indices,idx] } betahat = glmnet(Y[indices,1],(X,J(N,1,1))[indices,],lambda_Y,alpha,TOLERANCE,MAXITER) pre_Y[indices,1] = (X,J(N,1,1))[indices,] * betahat res_Y[indices,1] = Y[indices,1] - pre_Y[indices,1] } thetahat = 0 for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(fold:==kdx) thetahat = thetahat :+ luinv( res_Z[indices,]' * res_D[indices,] ) * res_Z[indices,]' * res_Y[indices,1] } thetahat = thetahat :/ K list_thetahat[,iter] = thetahat res = res_Y :- res_D * thetahat //////////////////////////////////////////////////////////////////////// // VARIANCE J = res_Z' * res_D :/ N sigma2 = 0 for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(fold:==kdx) sigma2 = sigma2 :+(((res*J(1,dimtheta,1)) :* res_Z)[indices,])' * (((res*J(1,dimtheta,1)) :* res_Z)[indices,]) :/ rows(indices) } Var = luinv(J) * (sigma2:/K) * luinv(J)' list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = Var } //////////////////////////////////////////////////////////////////////////// // MEAN RESULTS b = mean(list_thetahat') V = 0 for( iter = 1 ; iter <= num_resample ; iter++ ){ V = V :+ list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-b')*(list_thetahat[,iter]:-b')' } V = V :/ num_resample :/ N //////////////////////////////////////////////////////////////////////////// // MEDIAN RESULTS rb = b list_rVar = list_Var for( idx = 1 ; idx <= dimtheta ; idx++ ){ rb[1,idx] = median(list_thetahat[idx,]') } for( iter = 1 ; iter <= num_resample ; iter++ ){ list_rVar[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-rb')*(list_thetahat[,iter]:-rb')' } rV = V for( idx = 1 ; idx <= dimtheta ; idx++ ){ for( jdx = 1 ; jdx <= dimtheta ; jdx++ ){ rV[idx,jdx] = median((list_rVar[idx,((1..num_resample):-1):*dimtheta:+jdx])') } } rV = rV :/ N //////////////////////////////////////////////////////////////////////////// // RETURN THE RESULTS if( robust == 0 ){ st_matrix(bname, b*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*V*diag(1:/sdD)) } if( robust == 1 ){ st_matrix(bname, rb*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*rV*diag(1:/sdD)) } st_numscalar(nname, N) st_numscalar(pname, p) st_numscalar(wname, 0) st_numscalar(fname, K) st_numscalar(g1name, 0) st_numscalar(g2name, 0) printf("\n{hline 78}\n") printf(" Observations: %12.0f\n",N) printf(" Cluster Way: 0\n") printf(" dim(D): %12.0f\n",dimtheta) printf(" dim(X): %12.0f\n",p) printf(" Folds K: %12.0f\n",K) printf("{hline 78}\n") printf(" High-Dimensional Regression Based on DML \n") } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // IV REGRESSION FOR 0 WAY CLUSTERING ////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// void ivreg0( string scalar yv, string scalar dxv, string scalar touse, string scalar iv, real scalar dimtheta, real scalar K, real scalar alpha, real scalar num_resample, real scalar robust, real scalar TOLERANCE, real MAXITER, string scalar bname, string scalar Vname, string scalar nname, string scalar pname, string scalar wname, string scalar fname, string scalar g1name, string scalar g2name) { //////////////////////////////////////////////////////////////////////////// // DATA ORGANIZATION real vector y, d, x real scalar n, p Y = st_data(., yv, touse) dx = st_data(., dxv, touse) N = rows(dx) D = dx[.,1..dimtheta] Z = st_data(., iv, touse) if( dimtheta > 1 ){ Z = Z, D[,2..dimtheta] } X = dx[.,(dimtheta+1)..(cols(dx))] p = cols(X) if( K <= 1 ){ K = 5 } sdD = get_scale(D) sdZ = get_scale(Z) D = scale(D) Z = scale(Z) X = scale(X) list_thetahat = J(dimtheta,num_resample,0) list_Var = J(dimtheta,dimtheta*num_resample,0) for( iter = 1 ; iter <= num_resample ; iter++ ){ printf("\nResampled Cross-Fitting: Iteration %3.0f/%f",iter,num_resample) fold = ceil(uniform(N,1)*K) //////////////////////////////////////////////////////////////////////// // DML lambda_Z = J(dimtheta,1,0) lambda_D = J(dimtheta,1,0) for( idx = 1 ; idx <= dimtheta ; idx++ ){ lambda_Z[idx,1] = 0.01*variance(Z[,idx])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) lambda_D[idx,1] = 0.01*variance(D[,idx])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) } lambda_Y = 0.01*variance(Y[,1])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) pre_Z = Z res_Z = Z pre_D = D res_D = D pre_Y = Y res_Y = Y for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(fold:!=kdx) for( idx = 1 ; idx <= dimtheta ; idx++ ){ xihat = glmnet(Z[indices,idx],(X,J(N,1,1))[indices,],lambda_Z[idx,1],alpha,TOLERANCE,MAXITER) pre_Z[indices,idx] = (X,J(N,1,1))[indices,] * xihat res_Z[indices,idx] = Z[indices,idx] - pre_Z[indices,idx] } for( idx = 1 ; idx <= dimtheta ; idx++ ){ gammahat = glmnet(D[indices,idx],(X,J(N,1,1))[indices,],lambda_D[idx,1],alpha,TOLERANCE,MAXITER) pre_D[indices,idx] = (X,J(N,1,1))[indices,] * gammahat res_D[indices,idx] = D[indices,idx] - pre_D[indices,idx] } betahat = glmnet(Y[indices,1],(X,J(N,1,1))[indices,],lambda_Y,alpha,TOLERANCE,MAXITER) pre_Y[indices,1] = (X,J(N,1,1))[indices,] * betahat res_Y[indices,1] = Y[indices,1] - pre_Y[indices,1] } thetahat = 0 for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(fold:==kdx) thetahat = thetahat :+ luinv( res_Z[indices,]' * res_D[indices,] ) * res_Z[indices,]' * res_Y[indices,1] } thetahat = thetahat :/ K list_thetahat[,iter] = thetahat res = res_Y :- res_D * thetahat //////////////////////////////////////////////////////////////////////// // VARIANCE J = res_Z' * res_D :/ N sigma2 = 0 for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(fold:==kdx) sigma2 = sigma2 :+(((res*J(1,dimtheta,1)) :* res_Z)[indices,])' * (((res*J(1,dimtheta,1)) :* res_Z)[indices,]) :/ rows(indices) } Var = luinv(J) * (sigma2:/K) * luinv(J)' list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = Var } //////////////////////////////////////////////////////////////////////////// // MEAN RESULTS b = mean(list_thetahat') V = 0 for( iter = 1 ; iter <= num_resample ; iter++ ){ V = V :+ list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-b')*(list_thetahat[,iter]:-b')' } V = V :/ num_resample :/ N //////////////////////////////////////////////////////////////////////////// // MEDIAN RESULTS rb = b list_rVar = list_Var for( idx = 1 ; idx <= dimtheta ; idx++ ){ rb[1,idx] = median(list_thetahat[idx,]') } for( iter = 1 ; iter <= num_resample ; iter++ ){ list_rVar[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-rb')*(list_thetahat[,iter]:-rb')' } rV = V for( idx = 1 ; idx <= dimtheta ; idx++ ){ for( jdx = 1 ; jdx <= dimtheta ; jdx++ ){ rV[idx,jdx] = median((list_rVar[idx,((1..num_resample):-1):*dimtheta:+jdx])') } } rV = rV :/ N //////////////////////////////////////////////////////////////////////////// // RETURN THE RESULTS if( robust == 0 ){ st_matrix(bname, b*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*V*diag(1:/sdD)) } if( robust == 1 ){ st_matrix(bname, rb*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*rV*diag(1:/sdD)) } st_numscalar(nname, N) st_numscalar(pname, p) st_numscalar(wname, 0) st_numscalar(fname, K) st_numscalar(g1name, 0) st_numscalar(g2name, 0) printf("\n{hline 78}\n") printf(" Observations: %12.0f\n",N) printf(" Cluster Way: 0\n") printf(" dim(D): %12.0f\n",dimtheta) printf(" dim(X): %12.0f\n",p) printf(" Folds K: %12.0f\n",K) printf("{hline 78}\n") printf(" High-Dimensional IV Regression Based on DML \n") } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // REGRESSION FOR 1 WAY CLUSTERING ///////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// void reg1( string scalar yv, string scalar dxv, string scalar touse, string scalar cluster1, real scalar dimtheta, real scalar K, real scalar alpha, real scalar num_resample, real scalar robust, real scalar TOLERANCE, real MAXITER, string scalar bname, string scalar Vname, string scalar nname, string scalar pname, string scalar wname, string scalar fname, string scalar g1name, string scalar g2name) { //////////////////////////////////////////////////////////////////////////// // DATA ORGANIZATION real vector y, d, x real scalar n, p Y = st_data(., yv, touse) dx = st_data(., dxv, touse) g = st_data(., cluster1, touse) N = rows(dx) D = dx[.,1..dimtheta] Z = D X = dx[.,(dimtheta+1)..(cols(dx))] p = cols(X) if( K <= 1 ){ K = 5 } sdD = get_scale(D) sdZ = get_scale(Z) D = scale(D) Z = scale(Z) X = scale(X) uniqueg = uniqrows(g) Ghat = rows(uniqueg) list_thetahat = J(dimtheta,num_resample,0) list_Var = J(dimtheta,dimtheta*num_resample,0) for( iter = 1 ; iter <= num_resample ; iter++ ){ printf("\nResampled Cross-Fitting: Iteration %3.0f/%f",iter,num_resample) fold = ceil(uniform(Ghat,1)*K) foldi = J(N,1,0) for( idx = 1 ; idx <= N ; idx++ ){ foldi[idx] = fold[selectindex(g[idx,1] :== uniqueg),1] } //////////////////////////////////////////////////////////////////////// // DML lambda_Z = J(dimtheta,1,0) lambda_D = J(dimtheta,1,0) for( idx = 1 ; idx <= dimtheta ; idx++ ){ lambda_Z[idx,1] = 0.01*variance(Z[,idx])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) lambda_D[idx,1] = 0.01*variance(D[,idx])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) } lambda_Y = 0.01*variance(Y[,1])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) pre_Z = Z res_Z = Z pre_D = D res_D = D pre_Y = Y res_Y = Y for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(foldi:!=kdx) for( idx = 1 ; idx <= dimtheta ; idx++ ){ xihat = glmnet(Z[indices,idx],(X,J(N,1,1))[indices,],lambda_Z[idx,1],alpha,TOLERANCE,MAXITER) pre_Z[indices,idx] = (X,J(N,1,1))[indices,] * xihat res_Z[indices,idx] = Z[indices,idx] - pre_Z[indices,idx] } for( idx = 1 ; idx <= dimtheta ; idx++ ){ gammahat = glmnet(D[indices,idx],(X,J(N,1,1))[indices,],lambda_D[idx,1],alpha,TOLERANCE,MAXITER) pre_D[indices,idx] = (X,J(N,1,1))[indices,] * gammahat res_D[indices,idx] = D[indices,idx] - pre_D[indices,idx] } betahat = glmnet(Y[indices,1],(X,J(N,1,1))[indices,],lambda_Y,alpha,TOLERANCE,MAXITER) pre_Y[indices,1] = (X,J(N,1,1))[indices,] * betahat res_Y[indices,1] = Y[indices,1] - pre_Y[indices,1] } thetahat = 0 for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(foldi:==kdx) thetahat = thetahat :+ luinv( res_Z[indices,]' * res_D[indices,] ) * res_Z[indices,]' * res_Y[indices,1] } thetahat = thetahat :/ K list_thetahat[,iter] = thetahat res = res_Y :- res_D * thetahat //////////////////////////////////////////////////////////////////////// // VARIANCE J = res_Z' * res_D :/ Ghat sigma2 = 0 for( gdx = 1 ; gdx <= Ghat ; gdx++ ){ indices = selectindex(g:==uniqueg[gdx,1]) sigma2 = sigma2 :+(((res*J(1,dimtheta,1)) :* res_Z)[indices,])' * (((res*J(1,dimtheta,1)) :* res_Z)[indices,]) } Var = luinv(J) * (sigma2:/Ghat) * luinv(J)' list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = Var } //////////////////////////////////////////////////////////////////////////// // MEAN RESULTS b = mean(list_thetahat') V = 0 for( iter = 1 ; iter <= num_resample ; iter++ ){ V = V :+ list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-b')*(list_thetahat[,iter]:-b')' } V = V :/ num_resample :/ Ghat //////////////////////////////////////////////////////////////////////////// // MEDIAN RESULTS rb = b list_rVar = list_Var for( idx = 1 ; idx <= dimtheta ; idx++ ){ rb[1,idx] = median(list_thetahat[idx,]') } for( iter = 1 ; iter <= num_resample ; iter++ ){ list_rVar[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-rb')*(list_thetahat[,iter]:-rb')' } rV = V for( idx = 1 ; idx <= dimtheta ; idx++ ){ for( jdx = 1 ; jdx <= dimtheta ; jdx++ ){ rV[idx,jdx] = median((list_rVar[idx,((1..num_resample):-1):*dimtheta:+jdx])') } } rV = rV :/ Ghat //////////////////////////////////////////////////////////////////////////// // RETURN THE RESULTS if( robust == 0 ){ st_matrix(bname, b*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*V*diag(1:/sdD)) } if( robust == 1 ){ st_matrix(bname, rb*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*rV*diag(1:/sdD)) } st_numscalar(nname, N) st_numscalar(pname, p) st_numscalar(wname, 1) st_numscalar(fname, K) st_numscalar(g1name, Ghat) st_numscalar(g2name, 0) printf("\n{hline 78}\n") printf(" Observations: %12.0f\n",N) printf(" Cluster Way: 1\n") printf(" Cluster Size: %12.0f\n",Ghat) printf(" dim(D): %12.0f\n",dimtheta) printf(" dim(X): %12.0f\n",p) printf(" Folds K: %12.0f\n",K) printf("{hline 78}\n") printf(" High-Dimensional Regression Based on Cluster-Robust DML \n") } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // IV REGRESSION FOR 1 WAY CLUSTERING ////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// void ivreg1( string scalar yv, string scalar dxv, string scalar touse, string scalar iv, string scalar cluster1, real scalar dimtheta, real scalar K, real scalar alpha, real scalar num_resample, real scalar robust, real scalar TOLERANCE, real MAXITER, string scalar bname, string scalar Vname, string scalar nname, string scalar pname, string scalar wname, string scalar fname, string scalar g1name, string scalar g2name) { //////////////////////////////////////////////////////////////////////////// // DATA ORGANIZATION real vector y, d, x real scalar n, p Y = st_data(., yv, touse) dx = st_data(., dxv, touse) g = st_data(., cluster1, touse) N = rows(dx) D = dx[.,1..dimtheta] Z = st_data(., iv, touse) if( dimtheta > 1 ){ Z = Z, D[,2..dimtheta] } X = dx[.,(dimtheta+1)..(cols(dx))] p = cols(X) if( K <= 1 ){ K = 5 } sdD = get_scale(D) sdZ = get_scale(Z) D = scale(D) Z = scale(Z) X = scale(X) uniqueg = uniqrows(g) Ghat = rows(uniqueg) list_thetahat = J(dimtheta,num_resample,0) list_Var = J(dimtheta,dimtheta*num_resample,0) for( iter = 1 ; iter <= num_resample ; iter++ ){ printf("\nResampled Cross-Fitting: Iteration %3.0f/%f",iter,num_resample) fold = ceil(uniform(Ghat,1)*K) foldi = J(N,1,0) for( idx = 1 ; idx <= N ; idx++ ){ foldi[idx] = fold[selectindex(g[idx,1] :== uniqueg),1] } //////////////////////////////////////////////////////////////////////// // DML lambda_Z = J(dimtheta,1,0) lambda_D = J(dimtheta,1,0) for( idx = 1 ; idx <= dimtheta ; idx++ ){ lambda_Z[idx,1] = 0.01*variance(Z[,idx])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) lambda_D[idx,1] = 0.01*variance(D[,idx])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) } lambda_Y = 0.01*variance(Y[,1])*((N*(K-1)/K)*log((N*(K-1)/K)*p))^0.5/(N*(K-1)/K) pre_Z = Z res_Z = Z pre_D = D res_D = D pre_Y = Y res_Y = Y for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(foldi:!=kdx) for( idx = 1 ; idx <= dimtheta ; idx++ ){ xihat = glmnet(Z[indices,idx],(X,J(N,1,1))[indices,],lambda_Z[idx,1],alpha,TOLERANCE,MAXITER) pre_Z[indices,idx] = (X,J(N,1,1))[indices,] * xihat res_Z[indices,idx] = Z[indices,idx] - pre_Z[indices,idx] } for( idx = 1 ; idx <= dimtheta ; idx++ ){ gammahat = glmnet(D[indices,idx],(X,J(N,1,1))[indices,],lambda_D[idx,1],alpha,TOLERANCE,MAXITER) pre_D[indices,idx] = (X,J(N,1,1))[indices,] * gammahat res_D[indices,idx] = D[indices,idx] - pre_D[indices,idx] } betahat = glmnet(Y[indices,1],(X,J(N,1,1))[indices,],lambda_Y,alpha,TOLERANCE,MAXITER) pre_Y[indices,1] = (X,J(N,1,1))[indices,] * betahat res_Y[indices,1] = Y[indices,1] - pre_Y[indices,1] } thetahat = 0 for( kdx = 1 ; kdx <= K ; kdx++ ){ indices = selectindex(foldi:==kdx) thetahat = thetahat :+ luinv( res_Z[indices,]' * res_D[indices,] ) * res_Z[indices,]' * res_Y[indices,1] } thetahat = thetahat :/ K list_thetahat[,iter] = thetahat res = res_Y :- res_D * thetahat //////////////////////////////////////////////////////////////////////// // VARIANCE J = res_Z' * res_D :/ Ghat sigma2 = 0 for( gdx = 1 ; gdx <= Ghat ; gdx++ ){ indices = selectindex(g:==uniqueg[gdx,1]) sigma2 = sigma2 :+(((res*J(1,dimtheta,1)) :* res_Z)[indices,])' * (((res*J(1,dimtheta,1)) :* res_Z)[indices,]) } Var = luinv(J) * (sigma2:/Ghat) * luinv(J)' list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = Var } //////////////////////////////////////////////////////////////////////////// // MEAN RESULTS b = mean(list_thetahat') V = 0 for( iter = 1 ; iter <= num_resample ; iter++ ){ V = V :+ list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-b')*(list_thetahat[,iter]:-b')' } V = V :/ num_resample :/ Ghat //////////////////////////////////////////////////////////////////////////// // MEDIAN RESULTS rb = b list_rVar = list_Var for( idx = 1 ; idx <= dimtheta ; idx++ ){ rb[1,idx] = median(list_thetahat[idx,]') } for( iter = 1 ; iter <= num_resample ; iter++ ){ list_rVar[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-rb')*(list_thetahat[,iter]:-rb')' } rV = V for( idx = 1 ; idx <= dimtheta ; idx++ ){ for( jdx = 1 ; jdx <= dimtheta ; jdx++ ){ rV[idx,jdx] = median((list_rVar[idx,((1..num_resample):-1):*dimtheta:+jdx])') } } rV = rV :/ Ghat //////////////////////////////////////////////////////////////////////////// // RETURN THE RESULTS if( robust == 0 ){ st_matrix(bname, b*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*V*diag(1:/sdD)) } if( robust == 1 ){ st_matrix(bname, rb*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*rV*diag(1:/sdD)) } st_numscalar(nname, N) st_numscalar(pname, p) st_numscalar(wname, 1) st_numscalar(fname, K) st_numscalar(g1name, Ghat) st_numscalar(g2name, 0) printf("\n{hline 78}\n") printf(" Observations: %12.0f\n",N) printf(" Cluster Way: 1\n") printf(" Cluster Size: %12.0f\n",Ghat) printf(" dim(D): %12.0f\n",dimtheta) printf(" dim(X): %12.0f\n",p) printf(" Folds K: %12.0f\n",K) printf("{hline 78}\n") printf(" High-Dimensional IV Regression Based on Cluster-Robust DML \n") } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // REGRESSION FOR 2 WAY CLUSTERING ///////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// void reg2( string scalar yv, string scalar dxv, string scalar touse, string scalar cluster1, string scalar cluster2, real scalar dimtheta, real scalar K, real scalar alpha, real scalar num_resample, real scalar robust, real scalar TOLERANCE, real MAXITER, string scalar bname, string scalar Vname, string scalar nname, string scalar pname, string scalar wname, string scalar fname, string scalar g1name, string scalar g2name) { //////////////////////////////////////////////////////////////////////////// // DATA ORGANIZATION real vector y, d, x real scalar n, p Y = st_data(., yv, touse) dx = st_data(., dxv, touse) g1 = st_data(., cluster1, touse) g2 = st_data(., cluster2, touse) N = rows(dx) D = dx[.,1..dimtheta] Z = D X = dx[.,(dimtheta+1)..(cols(dx))] p = cols(X) if( K <= 1 ){ K = 3 } sdD = get_scale(D) sdZ = get_scale(Z) D = scale(D) Z = scale(Z) X = scale(X) uniqueg1 = uniqrows(g1) Ghat1 = rows(uniqueg1) uniqueg2 = uniqrows(g2) Ghat2 = rows(uniqueg2) list_thetahat = J(dimtheta,num_resample,0) list_Var = J(dimtheta,dimtheta*num_resample,0) for( iter = 1 ; iter <= num_resample ; iter++ ){ printf("\nResampled Cross-Fitting: Iteration %3.0f/%f",iter,num_resample) fold1 = ceil(uniform(Ghat1,1)*K) fold2 = ceil(uniform(Ghat2,1)*K) fold1i = J(N,1,0) fold2i = J(N,1,0) for( idx = 1 ; idx <= N ; idx++ ){ fold1i[idx] = fold1[selectindex(g1[idx,1] :== uniqueg1),1] fold2i[idx] = fold2[selectindex(g2[idx,1] :== uniqueg2),1] } //////////////////////////////////////////////////////////////////////// // DML lambda_Z = J(dimtheta,1,0) lambda_D = J(dimtheta,1,0) for( idx = 1 ; idx <= dimtheta ; idx++ ){ lambda_Z[idx,1] = 0.01*variance(Z[,idx])*((N*(K-1)^2/K^2)*log((N*(K-1)^2/K^2)*p))^0.5/(N*(K-1)^2/K^2) lambda_D[idx,1] = 0.01*variance(D[,idx])*((N*(K-1)^2/K^2)*log((N*(K-1)^2/K^2)*p))^0.5/(N*(K-1)^2/K^2) } lambda_Y = 0.01*variance(Y[,1])*((N*(K-1)^2/K^2)*log((N*(K-1)^2/K^2)*p))^0.5/(N*(K-1)^2/K^2) pre_Z = Z res_Z = Z pre_D = D res_D = D pre_Y = Y res_Y = Y for( kdx1 = 1 ; kdx1 <= K ; kdx1++ ){ for( kdx2 = 1 ; kdx2 <= K ; kdx2++ ){ indices = selectindex(fold1i:!=kdx1 :& fold2i:!=kdx2) for( idx = 1 ; idx <= dimtheta ; idx++ ){ xihat = glmnet(Z[indices,idx],(X,J(N,1,1))[indices,],lambda_Z[idx,1],alpha,TOLERANCE,MAXITER) pre_Z[indices,idx] = (X,J(N,1,1))[indices,] * xihat res_Z[indices,idx] = Z[indices,idx] - pre_Z[indices,idx] } for( idx = 1 ; idx <= dimtheta ; idx++ ){ gammahat = glmnet(D[indices,idx],(X,J(N,1,1))[indices,],lambda_D[idx,1],alpha,TOLERANCE,MAXITER) pre_D[indices,idx] = (X,J(N,1,1))[indices,] * gammahat res_D[indices,idx] = D[indices,idx] - pre_D[indices,idx] } betahat = glmnet(Y[indices,1],(X,J(N,1,1))[indices,],lambda_Y,alpha,TOLERANCE,MAXITER) pre_Y[indices,1] = (X,J(N,1,1))[indices,] * betahat res_Y[indices,1] = Y[indices,1] - pre_Y[indices,1] } } thetahat = 0 for( kdx1 = 1 ; kdx1 <= K ; kdx1++ ){ for( kdx2 = 1 ; kdx2 <= K ; kdx2++ ){ indices = selectindex(fold1i:==kdx1 :& fold2i:==kdx2) thetahat = thetahat :+ luinv( res_Z[indices,]' * res_D[indices,] ) * res_Z[indices,]' * res_Y[indices,1] } } thetahat = thetahat :/ K^2 list_thetahat[,iter] = thetahat res = res_Y :- res_D * thetahat //////////////////////////////////////////////////////////////////////// // VARIANCE J = (res_Z' * res_D) :/ (Ghat1/K) :/ (Ghat2/K) :/ K^2 sigma2 = 0 for( kdx1 = 1 ; kdx1 <= K ; kdx1++ ){ for( kdx2 = 1 ; kdx2 <= K ; kdx2++ ){ for( idx1 = 1 ; idx1 <= rows(uniqueg1) ; idx1++ ){ for( jdx1 = 1 ; jdx1 <= rows(uniqueg2) ; jdx1++ ){ indices1 = selectindex(fold1i:==kdx1 :& fold2i:==kdx2 :& g1:==uniqueg1[idx1,1] :& g2:==uniqueg2[jdx1,1]) if( rows(indices1) > 0 ){ for( jdx2 = 1 ; jdx2 <= rows(uniqueg2) ; jdx2++ ){ indices2 = selectindex(fold1i:==kdx1 :& fold2i:==kdx2 :& g1:==uniqueg1[idx1,1] :& g2:==uniqueg2[jdx2,1]) if( rows(indices2) > 0 ){ sigma2 = sigma2 :+ rowsum((((res*J(1,dimtheta,1)) :* res_Z)[indices1,])') * colsum((((res*J(1,dimtheta,1)) :* res_Z)[indices2,])) }}}}}}} for( kdx1 = 1 ; kdx1 <= K ; kdx1++ ){ for( kdx2 = 1 ; kdx2 <= K ; kdx2++ ){ for( idx1 = 1 ; idx1 <= rows(uniqueg1) ; idx1++ ){ for( jdx1 = 1 ; jdx1 <= rows(uniqueg2) ; jdx1++ ){ indices1 = selectindex(fold1i:==kdx1 :& fold2i:==kdx2 :& g1:==uniqueg1[idx1,1] :& g2:==uniqueg2[jdx1,1]) if( rows(indices1) > 0 ){ for( idx2 = 1 ; idx2 <= rows(uniqueg1) ; idx2++ ){ indices2 = selectindex(fold1i:==kdx1 :& fold2i:==kdx2 :& g1:==uniqueg1[idx2,1] :& g2:==uniqueg2[jdx1,1]) if( rows(indices2) > 0 ){ sigma2 = sigma2 :+ rowsum((((res*J(1,dimtheta,1)) :* res_Z)[indices1,])') * colsum((((res*J(1,dimtheta,1)) :* res_Z)[indices2,])) }}}}}}} Var = luinv(J) * (sigma2:*min((Ghat1/K,Ghat2/K)'):/((Ghat1/K)^2*(Ghat2/K)^2):/K^2) * luinv(J)' list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = Var } //////////////////////////////////////////////////////////////////////////// // MEAN RESULTS b = mean(list_thetahat') V = 0 for( iter = 1 ; iter <= num_resample ; iter++ ){ V = V :+ list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-b')*(list_thetahat[,iter]:-b')' } V = V :/ num_resample :/ min((Ghat1,Ghat2)') //////////////////////////////////////////////////////////////////////////// // MEDIAN RESULTS rb = b list_rVar = list_Var for( idx = 1 ; idx <= dimtheta ; idx++ ){ rb[1,idx] = median(list_thetahat[idx,]') } for( iter = 1 ; iter <= num_resample ; iter++ ){ list_rVar[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-rb')*(list_thetahat[,iter]:-rb')' } rV = V for( idx = 1 ; idx <= dimtheta ; idx++ ){ for( jdx = 1 ; jdx <= dimtheta ; jdx++ ){ rV[idx,jdx] = median((list_rVar[idx,((1..num_resample):-1):*dimtheta:+jdx])') } } rV = rV :/ min((Ghat1,Ghat2)') //////////////////////////////////////////////////////////////////////////// // RETURN THE RESULTS if( robust == 0 ){ st_matrix(bname, b*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*V*diag(1:/sdD)) } if( robust == 1 ){ st_matrix(bname, rb*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*rV*diag(1:/sdD)) } st_numscalar(nname, N) st_numscalar(pname, p) st_numscalar(wname, 2) st_numscalar(fname, K) st_numscalar(g1name, Ghat1) st_numscalar(g2name, Ghat2) printf("\n{hline 78}\n") printf(" Observations: %12.0f\n",N) printf(" Cluster Ways: 2\n") printf(" Cluster Size 1: %12.0f\n",Ghat1) printf(" Cluster Size 2: %12.0f\n",Ghat2) printf(" dim(D): %12.0f\n",dimtheta) printf(" dim(X): %12.0f\n",p) printf(" Folds K: %12.0f\n",K) printf("{hline 78}\n") printf(" High-Dimensional Regression Based on 2-Way Cluster-Robust DML \n") } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // IV REGRESSION FOR 2 WAY CLUSTERING ////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// void ivreg2( string scalar yv, string scalar dxv, string scalar touse, string scalar iv, string scalar cluster1, string scalar cluster2, real scalar dimtheta, real scalar K, real scalar alpha, real scalar num_resample, real scalar robust, real scalar TOLERANCE, real MAXITER, string scalar bname, string scalar Vname, string scalar nname, string scalar pname, string scalar wname, string scalar fname, string scalar g1name, string scalar g2name) { //////////////////////////////////////////////////////////////////////////// // DATA ORGANIZATION real vector y, d, x real scalar n, p Y = st_data(., yv, touse) dx = st_data(., dxv, touse) g1 = st_data(., cluster1, touse) g2 = st_data(., cluster2, touse) N = rows(dx) D = dx[.,1..dimtheta] Z = st_data(., iv, touse) if( dimtheta > 1 ){ Z = Z, D[,2..dimtheta] } X = dx[.,(dimtheta+1)..(cols(dx))] p = cols(X) if( K <= 1 ){ K = 3 } sdD = get_scale(D) sdZ = get_scale(Z) D = scale(D) Z = scale(Z) X = scale(X) uniqueg1 = uniqrows(g1) Ghat1 = rows(uniqueg1) uniqueg2 = uniqrows(g2) Ghat2 = rows(uniqueg2) list_thetahat = J(dimtheta,num_resample,0) list_Var = J(dimtheta,dimtheta*num_resample,0) for( iter = 1 ; iter <= num_resample ; iter++ ){ printf("\nResampled Cross-Fitting: Iteration %3.0f/%f",iter,num_resample) fold1 = ceil(uniform(Ghat1,1)*K) fold2 = ceil(uniform(Ghat2,1)*K) fold1i = J(N,1,0) fold2i = J(N,1,0) for( idx = 1 ; idx <= N ; idx++ ){ fold1i[idx] = fold1[selectindex(g1[idx,1] :== uniqueg1),1] fold2i[idx] = fold2[selectindex(g2[idx,1] :== uniqueg2),1] } //////////////////////////////////////////////////////////////////////// // DML lambda_Z = J(dimtheta,1,0) lambda_D = J(dimtheta,1,0) for( idx = 1 ; idx <= dimtheta ; idx++ ){ lambda_Z[idx,1] = 0.01*variance(Z[,idx])*((N*(K-1)^2/K^2)*log((N*(K-1)^2/K^2)*p))^0.5/(N*(K-1)^2/K^2) lambda_D[idx,1] = 0.01*variance(D[,idx])*((N*(K-1)^2/K^2)*log((N*(K-1)^2/K^2)*p))^0.5/(N*(K-1)^2/K^2) } lambda_Y = 0.01*variance(Y[,1])*((N*(K-1)^2/K^2)*log((N*(K-1)^2/K^2)*p))^0.5/(N*(K-1)^2/K^2) pre_Z = Z res_Z = Z pre_D = D res_D = D pre_Y = Y res_Y = Y for( kdx1 = 1 ; kdx1 <= K ; kdx1++ ){ for( kdx2 = 1 ; kdx2 <= K ; kdx2++ ){ indices = selectindex(fold1i:!=kdx1 :& fold2i:!=kdx2) for( idx = 1 ; idx <= dimtheta ; idx++ ){ xihat = glmnet(Z[indices,idx],(X,J(N,1,1))[indices,],lambda_Z[idx,1],alpha,TOLERANCE,MAXITER) pre_Z[indices,idx] = (X,J(N,1,1))[indices,] * xihat res_Z[indices,idx] = Z[indices,idx] - pre_Z[indices,idx] } for( idx = 1 ; idx <= dimtheta ; idx++ ){ gammahat = glmnet(D[indices,idx],(X,J(N,1,1))[indices,],lambda_D[idx,1],alpha,TOLERANCE,MAXITER) pre_D[indices,idx] = (X,J(N,1,1))[indices,] * gammahat res_D[indices,idx] = D[indices,idx] - pre_D[indices,idx] } betahat = glmnet(Y[indices,1],(X,J(N,1,1))[indices,],lambda_Y,alpha,TOLERANCE,MAXITER) pre_Y[indices,1] = (X,J(N,1,1))[indices,] * betahat res_Y[indices,1] = Y[indices,1] - pre_Y[indices,1] } } thetahat = 0 for( kdx1 = 1 ; kdx1 <= K ; kdx1++ ){ for( kdx2 = 1 ; kdx2 <= K ; kdx2++ ){ indices = selectindex(fold1i:==kdx1 :& fold2i:==kdx2) thetahat = thetahat :+ luinv( res_Z[indices,]' * res_D[indices,] ) * res_Z[indices,]' * res_Y[indices,1] } } thetahat = thetahat :/ K^2 list_thetahat[,iter] = thetahat res = res_Y :- res_D * thetahat //////////////////////////////////////////////////////////////////////// // VARIANCE J = (res_Z' * res_D) :/ (Ghat1/K) :/ (Ghat2/K) :/ K^2 sigma2 = 0 for( kdx1 = 1 ; kdx1 <= K ; kdx1++ ){ for( kdx2 = 1 ; kdx2 <= K ; kdx2++ ){ for( idx1 = 1 ; idx1 <= rows(uniqueg1) ; idx1++ ){ for( jdx1 = 1 ; jdx1 <= rows(uniqueg2) ; jdx1++ ){ indices1 = selectindex(fold1i:==kdx1 :& fold2i:==kdx2 :& g1:==uniqueg1[idx1,1] :& g2:==uniqueg2[jdx1,1]) if( rows(indices1) > 0 ){ for( jdx2 = 1 ; jdx2 <= rows(uniqueg2) ; jdx2++ ){ indices2 = selectindex(fold1i:==kdx1 :& fold2i:==kdx2 :& g1:==uniqueg1[idx1,1] :& g2:==uniqueg2[jdx2,1]) if( rows(indices2) > 0 ){ sigma2 = sigma2 :+ rowsum((((res*J(1,dimtheta,1)) :* res_Z)[indices1,])') * colsum((((res*J(1,dimtheta,1)) :* res_Z)[indices2,])) }}}}}}} for( kdx1 = 1 ; kdx1 <= K ; kdx1++ ){ for( kdx2 = 1 ; kdx2 <= K ; kdx2++ ){ for( idx1 = 1 ; idx1 <= rows(uniqueg1) ; idx1++ ){ for( jdx1 = 1 ; jdx1 <= rows(uniqueg2) ; jdx1++ ){ indices1 = selectindex(fold1i:==kdx1 :& fold2i:==kdx2 :& g1:==uniqueg1[idx1,1] :& g2:==uniqueg2[jdx1,1]) if( rows(indices1) > 0 ){ for( idx2 = 1 ; idx2 <= rows(uniqueg1) ; idx2++ ){ indices2 = selectindex(fold1i:==kdx1 :& fold2i:==kdx2 :& g1:==uniqueg1[idx2,1] :& g2:==uniqueg2[jdx1,1]) if( rows(indices2) > 0 ){ sigma2 = sigma2 :+ rowsum((((res*J(1,dimtheta,1)) :* res_Z)[indices1,])') * colsum((((res*J(1,dimtheta,1)) :* res_Z)[indices2,])) }}}}}}} Var = luinv(J) * (sigma2:*min((Ghat1/K,Ghat2/K)'):/((Ghat1/K)^2*(Ghat2/K)^2):/K^2) * luinv(J)' list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = Var } //////////////////////////////////////////////////////////////////////////// // MEAN RESULTS b = mean(list_thetahat') V = 0 for( iter = 1 ; iter <= num_resample ; iter++ ){ V = V :+ list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-b')*(list_thetahat[,iter]:-b')' } V = V :/ num_resample :/ min((Ghat1,Ghat2)') //////////////////////////////////////////////////////////////////////////// // MEDIAN RESULTS rb = b list_rVar = list_Var for( idx = 1 ; idx <= dimtheta ; idx++ ){ rb[1,idx] = median(list_thetahat[idx,]') } for( iter = 1 ; iter <= num_resample ; iter++ ){ list_rVar[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] = list_Var[1..dimtheta,(1..dimtheta):+(dimtheta*(iter-1))] + (list_thetahat[,iter]:-rb')*(list_thetahat[,iter]:-rb')' } rV = V for( idx = 1 ; idx <= dimtheta ; idx++ ){ for( jdx = 1 ; jdx <= dimtheta ; jdx++ ){ rV[idx,jdx] = median((list_rVar[idx,((1..num_resample):-1):*dimtheta:+jdx])') } } rV = rV :/ min((Ghat1,Ghat2)') //////////////////////////////////////////////////////////////////////////// // RETURN THE RESULTS if( robust == 0 ){ st_matrix(bname, b*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*V*diag(1:/sdD)) } if( robust == 1 ){ st_matrix(bname, rb*diag(1:/sdD)) st_matrix(Vname, diag(1:/sdD)*rV*diag(1:/sdD)) } st_numscalar(nname, N) st_numscalar(pname, p) st_numscalar(wname, 2) st_numscalar(fname, K) st_numscalar(g1name, Ghat1) st_numscalar(g2name, Ghat2) printf("\n{hline 78}\n") printf(" Observations: %12.0f\n",N) printf(" Cluster Ways: 2\n") printf(" Cluster Size 1: %12.0f\n",Ghat1) printf(" Cluster Size 2: %12.0f\n",Ghat2) printf(" dim(D): %12.0f\n",dimtheta) printf(" dim(X): %12.0f\n",p) printf(" Folds K: %12.0f\n",K) printf("{hline 78}\n") printf(" High-Dimensional IV Regression Based on 2-Way Cluster-Robust DML \n") } //////////////////////////////////////////////////////////////////////////////// end