*! version 1.1 1April2020 *! version 1.2 9September2020 - Simplifies syntax and changes how the estimation is done *! version 1.3 11September2020 - Fix some issues regarding collinearity *! version 1.4 22September2020 - Fix issue with the syntax in the selection eq. *! version 1.5 3March2021 - Added progress' dots, simplify the code and speed it up * Authors: Ercio Munoz & Mariel Siravegna /* Wrap file to implement copula-based sample selection model in quantile regression as suggested by Arellano and Bonhomme (2017). */ * qregsel wage educ age, quantile(.1 .5 .9) select(married children educ age) program define qregsel, eclass sortpreserve version 15.0 syntax varlist(numeric) [if] [in], SELect(string) quantile(string) /// [ copula(string) NOCONStant finergrid coarsergrid rescale nodots ] gettoken depvar indepvars : varlist _fv_check_depvar `depvar' fvexpand `indepvars' local cnames `r(varlist)' tokenize `select', parse("=") if "`2'" != "=" { local x_s `select' tempvar y_s qui gen byte `y_s' = (`depvar'!=.) `if' `in' } else { local y_s `1' local x_s `3' } capture unab x_s : `x_s' if ("`noconstant'"!="") local _constant , noconstant ******************************************************************************** ** Marking the samples to use ******************************************************************************** tempvar touse_all touse mark `touse_all' `if' `in' markout `touse_all' `indepvars' `x_s' qui gen byte `touse' = `touse_all' markout `touse' `depvar' `indepvars' qui replace `touse' = 0 if `y_s'==1 & `touse'!=1 qui count if `touse_all' local all = r(N) qui count if `touse' local N_selected = r(N) ******************************************************************************** ** Checking errors in the selection indicator ******************************************************************************** qui tab `y_s' if r(r) != 2 { dis as error "Dependent variable never censored because of selection or selection indicator is not binary." exit 198 } qui sum `y_s' if r(max) != 1 & r(min) != 0 { dis as error "Dependent variable never censored because of selection or selection indicator is not binary." exit 198 } ******************************************************************************** ** Checking collinearity ******************************************************************************** qui _rmdcoll `depvar' `indepvars' if `touse' `_constant' local result "`r(varlist)'" local coll_x: list indepvars - result if ~missing("`coll_x'") { noisily display as text "note: `coll_x' omitted from outcome equation because of collinearity" local `indepvars' `result' } cap qui _rmdcoll `y_s' `x_s' if (_rc!=0) display as error "Check the syntax in the selection equation" local result "`r(varlist)'" local coll_x: list x_s - result if ~missing("`coll_x'") { noisily display as text "note: `coll_x' omitted from selection equation because of colliearity" local x_s `result' } ******************************************************************************** ** Checking errors with copula specification (default is gaussian) ******************************************************************************** if "`copula'" == "" { local copula "gaussian" } if "`copula'" != "gaussian" & "`copula'" != "frank" { dis as error "`copula' is not an available copula." exit 198 } ******************************************************************************** ** Checking errors in the quantiles requested ******************************************************************************** tokenize `quantile' local orig `1' macro shift if "`orig'" == "" { di in red "option quantile() required" exit 198 } capture confirm number `orig' if _rc { di "`orig' not a number" exit 198 } if `orig' >= 1 { local orig = `orig'/100 } if `orig'<=0 | `orig' >=1 { local orig = 100*`orig' di "`orig' out of range" exit 198 } local quants = `orig' while "`1'" != "" { local orig `1' macro shift if "`orig'" == "" { di in red "option quantile() required" exit 198 } capture confirm number `orig' if _rc { di "`orig' not a number" exit 198 } if `orig' >= 1 { local orig = `orig'/100 } if `orig' >=1 { local orig = 100*`orig' di "`orig' out of range" exit 198 } if `orig'<=0 { di "`orig' out of range" exit 198 } local quants = "`quants' `orig'" } ******************************************************************************** ** Generate the propensity score and the instrument ******************************************************************************** tempname pZ grid object y X R M N k index copula_p b betas_taus coefs value gridsize obj qui: probit `y_s' `x_s' if `touse_all' qui: predict `pZ' if `touse_all' ******************************************************************************** ** Estimate rho looping over quantiles and the grid for rho ******************************************************************************** if "`finergrid'" == "finergrid" & "`coarsergrid'" != "coarsergrid" { if ("`copula'" == "frank") mata: `grid' = finergrid("frank") if ("`copula'" == "gaussian") mata: `grid' = finergrid("gaussian") } else if "`coarsergrid'" == "coarsergrid" { if ("`copula'" == "frank") mata: `grid' = coarsergrid("frank") if ("`copula'" == "gaussian") mata: `grid' = coarsergrid("gaussian") } else { if ("`copula'" == "frank") mata: `grid' = regulargrid("frank") if ("`copula'" == "gaussian") mata: `grid' = regulargrid("gaussian") } mata: st_matrix("`object'",`grid') ******************************************************************************** * Send data to mata ******************************************************************************** mata: `y' = st_data(., "`depvar'", "`touse'") mata: `X' = st_data(., "`cnames'", "`touse'") if ("`rescale'" != "") { mata: `R' = meanvariance(`X') mata: `X' = (`X':-`R'[1,.]):/sqrt(diagonal(`R'[2::rows(`R'),]))' } mata: `M' = rows(`X') mata: `N' = cols(`X') if ("`noconstant'"=="") mata: `X' = `X',J(`M',1,1) mata: `k' = cols(`X') mata: st_numscalar("`k'",`k') mata: `pZ' = st_data(.,"`pZ'", "`touse'") ******************************************************************************** * Grid search ******************************************************************************** mata: st_numscalar("`gridsize'",rows(`grid')) mat `value' = J(`gridsize',1,.) local gridsize = `gridsize' forvalues g = 1(1)`gridsize' { mata: `obj' = maxrho(`pZ',`y',`X',`M',`N',`grid',"`copula'",`g') mata: st_numscalar("`obj'",`obj') mat `value'[`g',1] = `obj' if "`nodots'"=="" { if `g'==1 { display "Grid for the copula parameter (`gridsize')" display "----+--- 1 ---+--- 2 ---+--- 3 ---+--- 4 ---+--- 5" } if !inlist(`g',50,100,150,200) di "." _cont if inlist(`g',50,100,150,200) di "." _cont _newline } } mata: `value' = st_matrix("`value'") mata: `index' = select((1::rows(`value')), (`value' :== min(`value''))) mata: st_numscalar("`index'",`index') ******************************************************************************** ** Estimate rotated quantile regression using the selected rho ******************************************************************************** local rho = `object'[`index',1] local spearman = `object'[`index',2] local kendall = `object'[`index',3] mat `object' = `object',`value' mat colnames `object' = rho spearman kendall value local count: word count `indepvars' if ("`noconstant'" == "") mat `coefs' = J(`count'+1,1,.) else if ("`noconstant'" != "") mat `coefs' = J(`count',1,.) foreach tau of local quants { mata: `copula_p' = copulafn(`pZ',`rho',`tau',"`copula'") mata: `b' = mywork(`y',`X',`pZ',`rho',`tau',`copula_p',`M',`N') local qtau = round(100*`tau') if ("`noconstant'" == "") local cnames2 `cnames' _cons else if ("`noconstant'" != "") local cnames2 `cnames' local names = "" foreach lname2 of local cnames2 { local names = "`names' q`qtau':`lname2'" } mata st_matrix("`b'", `b') mat rownames `b' = `names' mat `betas_taus' = ( nullmat(`betas_taus') \ `b') mat colnames `b' = "q`qtau'" mat `coefs' = `coefs',`b' } mat `coefs' = `coefs'[1...,2...] if ("`noconstant'" == "") local cnames `cnames' _cons matrix rownames `coefs' = `cnames' ******************************************************************************** ** Generating the output ******************************************************************************** if ("`rescale'"!="") local rescale = "rescaled" else if ("`rescale'"=="") local rescale = "non-rescaled" dis " " dis in green "Quantile selection model" /// _column (50) "Number of obs" _column(69) "=" _column(71) %8.0f in yellow `all' dis in green _column (50) "Selected" _column(69) "=" _column(71) %8.0f in yellow `N_selected' dis in green _column (50) "Nonselected" _column(69) "=" _column(71) %8.0f in yellow `all'-`N_selected' dis in green "Copula parameter (`copula'): " %8.2f in yellow `rho' tempname b matrix `b' = `betas_taus'' ereturn post `b', esample(`touse_all') buildfvinfo ereturn matrix coefs = `coefs' ereturn matrix grid = `object' ereturn scalar N = `all' ereturn scalar N_selected = `N_selected' ereturn scalar rho = `rho' ereturn scalar kendall = `kendall' ereturn scalar spearman = `spearman' ereturn local title "Quantile selection model" ereturn local rescale "`rescale'" ereturn local predict "qregsel_p" ereturn local cmd "qregsel" ereturn local select_eq "`select'" ereturn local outcome_eq "`depvar' `indepvars'" ereturn local cmdline "qregsel `depvar' `indepvars', select(`select')" ereturn local indepvars "`cnames'" ereturn local depvar "`depvar'" ereturn local copula "`copula'" ereturn display end ******************************************************************************** ** Auxiliary functions needed for the estimation ******************************************************************************** mata: real scalar maxrho(real matrix pZ1, real matrix y, real matrix X, real scalar M, real scalar N, real matrix grid, string scalar copula, real scalar j) { real vector copula_p, b real scalar rhoa, obj, obj2, gridsize gridsize = rows(grid) rhoa = grid[j,1] obj = 0 for (i=1; i<=9; i++) { copula_p = copulafn(pZ1,rhoa,i/10,copula) b = mywork(y,X,pZ1,rhoa,i/10,copula_p,M,N) obj2 = objective(pZ1,y,X,b,copula_p) obj = obj + obj2 } obj = obj*obj return(obj) } real matrix bound(numeric matrix x, numeric matrix dx) { real vector b, f b = 1e20 :+ 0* x f = selectindex(dx:<0) b[f] = -x[f] :/ dx[f] return(b) } real vector copulafn(real vector pZ1, numeric vector rho, numeric vector tau, string scalar name) { real vector G,vs,v1 if (name=="gaussian") { vs = J(rows(pZ1),1,invnormal(tau)) v1 = invnormal(pZ1) G = binormal(vs,v1,rho) :/ pZ1 } else { G = -ln(1:+(exp(-rho*tau):-1):*(exp(-rho:*pZ1):-1):/(exp(-rho)-1)):/(rho:*pZ1) } return(G) } real vector mywork(real vector y, real matrix X, real vector pZ1, numeric vector rho, real vector tau, real matrix p, real scalar M, real scalar N) { real vector yy, bb real matrix u, a, b, A, x, G real scalar m, n, k, it, beta, small, max_it k = cols(X) u = J(M, 1, 1) a = (1:-p):*u it=0 A = X' c = -y' b = X'*a x = a beta = 0.9995 small = 1e-5 max_it = 50 m = rows(A) n = cols(A) // Generate initial feasible point s = u - x yy = svsolve(A',c')' r = c - yy * A r = mm_cond(r:==0,r:+0.001,r) z = mm_cond(r:>0,r,0) w = z - r gap = c * x - yy * b + w * u // Start iterations it = 0 while (gap > small & it < max_it) { it++ // Compute affine step q = 1 :/ (z' :/ x + w' :/ s) r = z - w AQ = J(k,n,0) for (i=1;i<=n;i++) { for (j=1;j<=k;j++) { AQ[j,i] = q[i,1]*A[j,i] } } AQA = AQ * A' rhs = AQ * r' dy = (invsym(AQA) * rhs)' dx = q :* (dy * A - r)' ds = -dx dz = -z :* (1 :+ dx :/ x)' dw = -w :* (1 :+ ds :/ s)' // Compute maximum allowable step lengths fx = bound(x, dx) fs = bound(s, ds) fw = bound(w, dw) fz = bound(z, dz) fp = rowmin((fx,fs)) fd = colmin((fw \ fz)) fp = min((min(beta * fp), 1)) fd = min((min(beta * fd), 1)) if (mm_cond(fp: