*! version 2.01 07oct2011 * Estimates linear regression model with two high dimensional fixed effects /*---------------------------------------------------------*/ /* Guimaraes & Portugal Algorithm */ /* Author: Paulo Guimaraes */ /*---------------------------------------------------------*/ /* Based on: */ /* Paulo Guimaraes and Pedro Portugal. "A Simple Feasible Alternative Procedure to Estimate Models with */ /* High-Dimensional Fixed Effects", Stata Journal, 10(4), 628-649, 2010. */ program reg2hdfe, eclass version 9.1 if replay() { if ("`e(cmd)'"!="reg2hdfe") error 301 Display `0' } else Estimate `0' end program define Estimate, eclass syntax varlist [if] [in], id1(str) id2(str) /// [TOL(real 0.000001) MAXiter(integer 0) /// CHECK NODOTS SIMPLE fe1(str) fe2(str) cluster(str) GROUPid(str) INdata(string) /// OUTdata(string) IMProve(str) VERBose NOREGress PARAM1 PARAM2 OP1(integer 3) /// OP2(integer 1) OP3(integer 10) OP4(integer 1000) OP5(real 0.001) AUTOFF] ********************************************************************* * Checking syntax ********************************************************************* tokenize `varlist' local lhs `1' mac shift local rhs `*' if ("`fe1'"!=""&"`fe2'"=="")|("`fe2'"!=""&"`fe1'"=="") { di in red "Error: You must specify both options fe1 and fe2" error 198 } if "`param1'"=="param1"&"`param2'"=="param2" { di in red "Error: Choose either param1 or param2" error 198 } if "`indata'"!=""&"`improve'"!="" { di in red "Error: Indata option not valid with improve option" error 198 } if "`outdata'"!=""&"`improve'"!="" { di in red "Error: Outdata option not valid with improve option" error 198 } if "`improve'"!=""&"`rhs'"!="" { di in red "Error: Improve option can only be used with a single variable" error 198 } if "`improve'"!=""&"`fe1'"!="" { di in red "Error: Can not estimate fixed effects with improve option" error 198 } if "`improve'"!=""&"`check'"!="" { di in red "Error: Can not use check and improve option simultaneously" error 198 } if "`improve'"!=""&"`cluster'"!="" { di in red "Error: Can not use cluster and improve option simultaneously" error 198 } if "`indata'"==""&"`improve'"==""&"`outdata'"=="" { local standard "standard" } if `"`fe1'"'!=`""' confirm new var `fe1' if `"`fe2'"'!=`""' confirm new var `fe2' if `"`groupid'"'!=`""' confirm new var `groupid' capture drop __uid ********************************************************************** * Define Initial Variables ********************************************************************** tempvar clustervar di in ye "==============================================================" local dots `=cond("`nodots'"=="",1,0)' *********************************************************************** * Do Main Loop *********************************************************************** if "`improve'"!="" { preserve tempvar varfe2 tempfile tmp1 tmp2 quietly { if "`verbose'"!="" { noisily di "Reading `improve'_ids" } use `improve'_ids, clear sort __uid qui save `tmp1', replace merge __uid using `improve'_`lhs' sum _merge, meanonly if r(min) "_b[`fe1'] di in yellow "Coefficient for `id2' --> "_b[`fe2'] } } if `"`groupid'"'!=`""' { qui __makegps, id1(`id1') id2(`id2') groupid(`groupid') label var `groupid' "Unique identifier for mobility groups" } if "`indata'"==""&"`improve'"=="" { tempfile addvars qui describe if r(k)> 1 { keep __uid `fe1' `fe2' `groupid' sort __uid qui save `addvars', replace use `origdata', clear drop __touse sort __uid merge __uid using `addvars' drop _merge } else { use `origdata', clear drop __touse } } if "`nodisp'"!="nodisp" { Display `name1' } capture drop __uid di end program Display args name if "`name'"!="" { qui estimates restore `name' _coef_table_header, title( ********** Linear Regression with 2 High-Dimensional Fixed Effects ********** ) _coef_table } end program define iteralg args var id1 id2 start2 tol maxiter simple verbose dots jfe op1 op2 op3 op4 op5 autoff local count1=0 local count2=0 local count3=0 local count4=0 local count5=0 quietly { recast double `var' tempvar temp v1 v2 mean gen double `v1'=0 gen double `temp'=0 gen double `v2'=`start2' qui sum `v2', meanonly if r(min)!=r(max) { qui replace `var' = `var' + `v2', nopromote } if "`simple'"=="" { tempvar v0 ym1 ym2 dum gen double `v0'=0 gen double `ym1'=0 gen double `ym2'=0 } local iter=1 local dif=1 local c1 "(`v2'>`v1'&`v1'>`v0'&((`v2'+`v0')<(2*`v1')))" local c2 "(`v2'<`v1'&`v1'<`v0'&((`v2'+`v0')>(2*`v1')))" capture drop `mean' sort `id1' by `id1': g double `mean' = sum(`var')/_n qui by `id1': replace `var' = `var' - `mean'[_N], nopromote while abs(`dif')>`tol' & `iter'!=`maxiter'{ *while `dif'<1 & `iter'!=`maxiter'{ capture drop `mean' sort `id1' by `id1': g double `mean' = sum(`v2')/_n qui by `id1': replace `mean' = `mean'[_N], nopromote if "`simple'"=="" { capture drop `v0' rename `v1' `v0' } else { capture drop `v1' } rename `v2' `v1' sort `id2' by `id2': g double `v2' = sum(`var'+`mean')/_n qui by `id2': replace `v2' = `v2'[_N], nopromote if `iter'>`op1'&"`simple'"=="" { capture drop `dum' gen `dum'=`v1'+((`v2'-`v1')*(`v1'-`v0')/(2*`v1'-`v0'-`v2'))*(1-[(`v2'-`v1')/(`v1'-`v0')]^`op2') replace `v2'=`dum' if (`c1'|`c2')&(`dum'<.), nopromote if mod(`iter',`op3')==0 { replace `ym1'=`ym2', nopromote replace `ym2'=`v1'+((`v2'-`v1')*(`v1'-`v0')/(2*`v1'-`v0'-`v2'))*(1-[(`v2'-`v1')/(`v1'-`v0')]^`op4') replace `v2'=`ym2' if (`c1'|`c2')&(abs(`ym1'-`ym2')<`op5'), nopromote } } qui replace `temp'=sum(reldif(`v2',`v1')), nopromote local dif=`temp'[_N]/_N *count if abs(`v2'-`v1')<`tol' *local dif=r(N)/_N if `dots' { if `iter'==1 { _dots 0, title(Iterations) reps(`maxiter') } _dots `iter' 0 } if "`verbose'"!="" { count if abs(`v2'-`v1')<`tol' noisily di " `iter' - Dif --> " %-12.7g `dif' " % fe below tolerance --> " %-07.5f 100*r(N)/_N " OP2 -> "`op2' *noisily di "`iter' --> % fe below tolerance --> " %-08.5f 100*`dif' " OP2 -> "`op2' } local count1=`count2' local count2=`count3' local count3=`count4' local count4=`count5' local count5=`dif' if (`count5'>`count4')&(`count4'>`count3')&("`autoff'"=="")&("`simple'"=="")&(`op2'>1) { local op2=1 local count5=0 } if (`count5'<`count4')&(`count4'<`count3')&(`count3'<`count2')&(`count2'<`count1')&("`autoff'"=="")&("`simple'"=="") { local op2=`op2'+1 local count5=. } local iter=`iter'+1 } qui replace `var' = `var' - `v2' + `mean', nopromote } if `iter'==`maxiter' { di di in red "Maximum number of iterations reached" di in red "Algorithm did not converge for variable `var'" di in red "Last improvement: `dif'" } else { di di in yellow "Variable `var' converged after `iter' Iterations" } if "`jfe'"!="" { gen double `jfe'=`v2' } end program define outvars args orig var fe2 outdata preserve keep __uid `orig' `var' `fe2' sort __uid rename `var' __t_`var' rename `fe2' __fe2_`var' qui save `outdata'_`var', replace di in yellow " `var' was saved " restore end program define checkvars args orig fe2 id1 tempvar fe1 dum2 gen double `dum2'=`orig'-`fe2' sort `id1' by `id1': g double `fe1' = sum(`dum2')/_n qui by `id1': replace `fe1' = `fe1'[_N], nopromote qui _regress `orig' `fe1' `fe2' di in yellow "Checking if model converged - Coefficients for fixed effects should equal 1" di in yellow "Coefficient for id1 --> "_b[`fe1'] di in yellow "Coefficient for id2 --> "_b[`fe2'] end /* This routine is from Amine Quazad's a2reg program */ /* It establishes the connected groups in the data */ *Find connected groups for normalization capture program drop __makegps program define __makegps version 9.2 syntax [if] [in], id1(varname) id2(varname) groupid(name) marksample touse markout `touse' `id1' `id2' confirm new variable `groupid' sort `id1' `id2' preserve *Work with a subset of the data consisting of all id1-id2 combinations keep if `touse' collapse (sum) `touse', by(`id1' `id2') sort `id1' `id2' *Start by assigning the first id1 value to group 1, then iterate to fill this out tempvar group newgroup1 newgroup2 gen double `group'=`id1' local finished=0 local iter=1 while `finished'==0 { quietly { bysort `id2': egen double `newgroup1'=min(`group') bysort `id1': egen double `newgroup2'=min(`newgroup1') qui count if `newgroup2'~=`group' local nchange=r(N) local finished=(`nchange'==0) replace `group'=`newgroup2' drop `newgroup1' `newgroup2' } di in yellow "On iteration `iter', changed `nchange' assignments" local iter=`iter'+1 } sort `group' `id1' `id2' tempvar nobs complement by `group': egen double `nobs'=sum(`touse') replace `nobs'= -1*`nobs' egen double `groupid'=group(`nobs' `group') keep `id1' `id2' `groupid' sort `id1' `id2' tempfile gps save `gps' restore tempvar mrg2group merge `id1' `id2' using `gps', uniqusing _merge(`mrg2group') assert `mrg2group'~=2 assert `groupid'<. if `mrg2group'==3 assert `groupid'==. if `mrg2group'==1 drop `mrg2group' end