/* ado\mahaselectunique.ado, began 2012oct12 David Kantor; kantor.d@att.net */ prog def mahaselectunique version 8.2 /* for compatibility with other maha... programs. */ *! version 1.0.2 2012nov12 /* Given a file that is the -genfile- output of mahapick, select a set of unique matches for the treated set. This will -use- that file, so you need to start out empty or have non-altered content, or use the -clear- option. The results will be written to a file using post. */ #delimit ; syntax , usefile(string) writefile(string) idvar(name) [ prime_id(name) matchnum(name) scorevar(name) replace clear nmatch(integer 1) seed(string) nostrict ] ; #delimit cr /* seed is either an integer or a seed code ("X" + 36-digit hex value (possibly limited values)). */ if "`prime_id'" == "" { local prime_id "_prime_id" } if "`matchnum'" == "" { local matchnum "_matchnum" } capture des using `"`usefile'"' if _rc == 601{ disp as err "could not find file `usefile'" exit _rc } else if _rc == 610 { disp as err "file `usefile' not in Stata format" exit _rc } else if _rc { disp as err "unexpected error in seeking file `usefile'" exit _rc } local varstoseek "`prime_id' `idvar' `matchnum' `scorevar'" foreach v of local varstoseek { capture des `v' using `"`usefile'"' if _rc == 111 { disp as err "could not find var `v' in `usefile'" exit _rc } else if _rc{ disp as err "unexpected error in seeking var `v' in file `usefile'" exit _rc } } local scorevar2 "`scorevar'" if "`scorevar'" == "" { local scorevar2 "_score" capture des `scorevar2' using `"`usefile'"' if _rc { local scorevar2 } } /* scorevar may or may not be present in the file. `scorevar' is either user-specified or empty. `scorevar2' is, at this point,... a user-specified name -- if the specified var exists in usefile, else... "_score", if that variable exists in usefile, blank, otherwise. */ /* disp "scorevar2 = `scorevar2'" */ if "`replace'" == "" { if lower(substr("`writefile'", -4, .)) ~= ".dta" { local suffix ".dta" } confirm new file `writefile'`suffix' } if "`seed'" ~= "" { set seed `seed' } /* ~~~debug~~~ disp "---strict = `strict'" */ if "`strict'" == "" { local restriction "if `matchnum' ==0" } /* ~~~~debug~~~~ disp `"----use command: use `prime_id' `matchnum' using `"`usefile'"' `restriction' , `clear'"' */ use `prime_id' `matchnum' using `"`usefile'"' `restriction' , `clear' qui bysort `prime_id': keep if _n==1 /* no effect for properly formed files using -strict- */ drop `matchnum' forvalues jj = 1/`nmatch' { tempvar t1 gen double `t1' = uniform() sort `t1', stable tempname key`jj' gen long `key`jj'' = _n drop `t1' } sort `prime_id' by `prime_id': assert _N ==1 tempfile keys save `keys' use `"`usefile'"' if `matchnum' >0 sort `prime_id' qui merge `prime_id' using `keys', uniqus if "`strict'" == "" { qui count if _merge==1 if r(N) >0 { disp as err r(N) " non-matched records; " as text "will be dropped; alternatively use " as input "nostrict" as text " option." tempvar n1 bysort `prime_id': gen int `n1' = _n disp "list of prime_id values unmatched -- to be dropped" list `prime_id' if _merge==1 & `n1' ==1, noobs drop if _merge==1 } } else { assert _merge==2 | _merge==3 } assert mi(`matchnum') == (_merge==2) /* `prime_id' had no matches in the mahapick process. */ qui replace `matchnum' = -1 if mi(`matchnum') assert (`matchnum'<0) == (_merge==2) drop _merge /* adapting some code from mahapick Use the name genfile_handle, adapted from mahapick, though the current filename is stored in writefile. */ */ if "`scorevar2'" ~= "" { local scoretyp : type `scorevar2' local score_elt "`scoretyp' `scorevar2'" local score_elt2 "(`scorevar2'[\`kk'])" local score_elt_mis "(.)" } local typ1: type `idvar' if substr("`typ1'",1,3) == "str" { local idvarmisval `""""' } else { local idvarmisval "." } tempname genfile_handle postfile `genfile_handle' `typ1' (`prime_id' `idvar') int `matchnum' `score_elt' using `writefile', `replace' disp as text "file `writefile' opened for posting" tempname primeval end_group2 tempvar taken gen byte `taken' =0 forvalues jj = 1/`nmatch' { sort `key`jj'' `matchnum' by `key`jj'' `matchnum': assert _N==1 tempvar n end_of_group gen long `n' = _n by `key`jj'': gen long `end_of_group' = `n'[_N] local kk = 1 while `kk' <= _N { scalar `primeval' = `prime_id'[`kk'] scalar `end_group2' = `end_of_group'[`kk'] /* capture the value */ while `kk' <= scalar(`end_group2') & `taken'[`kk'] { local `kk++' } if `kk' <= scalar(`end_group2') { /* must be ~`taken'[`kk'] */ post `genfile_handle' (scalar(`primeval')) (`idvar'[`kk']) (`matchnum'[`kk']) `score_elt2' qui replace `taken' = 1 in `kk' if ~mi(`idvar'[`kk']) { qui replace `taken' = 1 if `idvar' == `idvar'[`kk'] } local kk = scalar(`end_group2') + 1 } else { /* `kk' > scalar(`end_group2') */ post `genfile_handle' (scalar(`primeval')) (`idvarmisval') (.) `score_elt_mis' } } drop `n' `end_of_group' } postclose `genfile_handle' disp as text "file `writefile' closed" end