*! v.1.3.74 iterative proportional fitting (raking) by Stas Kolenikov skolenik at gmail dot com program define ipfraking, rclass version 10 syntax [pw/] [if] [in] , [ CTOTal( namelist ) /// GENerate(name) quietly replace ITERate(int 2000) TOLerance(passthru) CTRLTOLerance(passthru) loglevel(int 0) meta double nograph /// trimhirel(passthru) trimhiabs(passthru) trimlorel(passthru) trimloabs(passthru) TRIMFREQuency(string) trace one(varlist min=1 max=1 numeric) /// selfcheck from(varname) noDIVergence alpha(passthru) LINear * ] // syntax: // [pw=original weight variable] // ctotal is the list of matrix names, each matrix is a e(b) of an appropriate -total y, over()- command // generate() is the name of the variable to be created // replace indicates that the weight variable is to be overwritten // from() is the variable to start from // iterate is the maximum number of iterations // tolerance is the difference in weight adjustment ratios over an iteration cycle // alpha is the relative adjustment (1 by default) // linear linear calibration // nodivergence usually makes sense; sometimes, especially with trimming, the objective function fails to go down // trim[hi|lo][abs|rel] trimming values: HI from above, LO from below; ABS in absolute values, REL in ratios if "`exp'"=="" & "`selfcheck'" == "" { display as error "pweight is required" exit 198 } if "`replace'" != "" { capture confirm numeric variable `exp' if _rc { di "{err}`exp' is not a numeric variable; cannot replace" exit 198 } } if "`selfcheck'" != "" { SelfCheck exit } else if "`ctotal'" == "" { display as error "ctotal() is required" exit 198 } // parse the trimming options // dirty tricks: pushes back // local trimopts `trimhiabs' `trimhirel' `trimloabs' `trimlorel' // updates trimfrequency with the default "sometimes" value as needed CheckTrimOptions , `trimhiabs' `trimhirel' `trimloabs' `trimlorel' trimfrequency(`trimfrequency') tempvar oldweight currweight prevweight marksample touse, zeroweight if "`one'" == "" { tempvar one quietly generate byte `one' = 1 } if ("`generate'"!="") + ("`replace'"!= "") != 1 { display as error "one and only one of generate() or replace must be specified" exit 198 } if "`generate'"!="" { capture confirm new variable `generate' // the following line will fail intentionally if _rc generate `double' `generate' = . } // finished checking input options display // parse and check the control totals ControlCheckParse `ctotal' , one( `one' ) loglevel(`loglevel') touse( `touse' ) generate double `oldweight' = `exp' if `touse' generate double `prevweight' = `oldweight' if "`from'" == "" generate double `currweight' = `oldweight' else generate double `currweight' = `from' local nvars : word count `ctotal' local prevobj . if "`trace'" != "" { local traceplot traceplot( forvalues k=1/`nvars' { tempname mreldif`k'var CheckResults , target(`mat`k'') `ctrltolerance' loglevel(`loglevel') quietly : /// total `var`k'' if `touse' [pw=`currweight'] , over(`over`k'', nolab) quietly generate double `mreldif`k'var' = r(mreldif) in 1 label variable `mreldif`k'var' "`: word `k' of `ctotal''" local traceplot `traceplot' `mreldif`k'var' } local traceplot `traceplot' ) } if !inlist("`mataoptevaltype'","","d0","d1","d1debug","d2","d2debug") { di "{err}invalid mataoptevaltype(`mataoptevaltype')" exit 198 } // choose the method if "`linear'" == "linear" { // linear calibration -- through Mata di "{txt}Linear calibration" if "`trimopts'" != "" { display as error "Trimming is not supported with linear weights" exit (198) } tempname prefix allctotals scale orig_scale GenerateCalibVars , ctotal(`ctotal') prefix(`prefix') local ctrlvarlist_u `r(varlist)' // vectorize the control totals; label the matrix rows for later use to create tempvars MergeCtotals `ctotal', noscale matrix `allctotals' = r(Merged) // touse mata : st_view( touse=., ., "`touse'" ) // input weights mata : st_view( currweight=., ., "`currweight'", "`touse'" ) // calibration variables mata : st_view( X=., ., "`ctrlvarlist_u'", "`touse'" ) // weighted total mata : wtotal = quadcross( X, currweight )' // targets mata : targets = st_matrix("`allctotals'") // X'X mata : XtX = quadcross( X, currweight, X ) mata : invXtX = makesymmetric( pinv( XtX ) ) // difference mata : lambda = (targets - wtotal) * invXtX // linear weights: overwrite mata : currweight[,] = currweight :* (1 :+ X * lambda') return local flavor Linear return scalar converged = 1 } else { // use iterative proportional fitting forvalues i=1/`iterate' { quietly replace `prevweight' = `currweight' if `loglevel' > 1 di forvalues k=1/`nvars' { PropAdjust `currweight' if `touse' , target(`mat`k'') control(`var`k'') over(`over`k'') loglevel(`loglevel') `alpha' if "`trimfrequency'" == "often" & "`trimopts'" != "" TrimWeights `oldweight' `currweight', `trimopts' one( `one' ) over( `over`k'' ) loglevel(`loglevel') } if "`trace'" != "" { forvalues k=1/`nvars' { CheckResults , target(`mat`k'') `ctrltolerance' loglevel(`loglevel') quietly : /// total `var`k'' if `touse' [pw=`currweight'] , over(`over`k'', nolab) quietly replace `mreldif`k'var' = r(mreldif) in `=`i'+1' } } if "`trimfrequency'" == "sometimes" & "`trimopts'"!="" TrimWeights `oldweight' `currweight', `trimopts' one( `one' ) over( `overlist' ) loglevel(`loglevel') CheckConvergence `prevweight' `currweight' if `touse', `tolerance' local currobj = r(maxreldif) if `loglevel' > 0 di _n display "{txt} Iteration `i', max rel difference of raked weights = {res}" `currobj' if r(converged) continue, break if `currobj' > `prevobj' & `i' > 2 { display "{err}Warning: raking procedure appears diverging" if "`divergence'" != "nodivergence" continue, break } local prevobj = `currobj' } return scalar converged = r(converged) return local flavor Raked } if !return(converged) { display "{err}Warning: raking procedure did not converge; check the `generate' variable" } if "`trimfrequency'" == "once" & "`trimopts'"!="" TrimWeights `oldweight' `currweight', `trimopts' one( `one' ) over( `overlist' ) return add // check if controls matched local badcontrols 0 tempname pass local mrdmax = 0 local worstvar local worstcat forvalues k=1/`nvars' { CheckResults , target(`mat`k'') `ctrltolerance' loglevel(`loglevel') `linear' : /// total `var`k'' if `touse' [pw=`currweight'] , over(`over`k'', nolab) matrix `pass' = r(target) matrix rowname `pass' = `over`k'' return matrix target`k' = `pass' matrix `pass' = r(result) return matrix result`k' = `pass' return scalar mreldif`k' = r(mreldif) matrix `pass' = r(reldif) return matrix reldif`k' = `pass' local mrdmax = max( `mrdmax', r(mreldif) ) local badcontrols = `badcontrols' + r(badcontrols) local whicharebad `whicharebad' `mat`k'' matrix `pass' = return(reldif`k') local thesecats : colnames `pass' forvalues j=1/`=colsof(`pass')' { if reldif(`pass'[1,`j'],`mrdmax') `ctrltolerance') return scalar badcontrols = `badcontrols' if `badcontrols' & "`quietly'" == "" { if `loglevel' > 0 di _n display as error "Warning: the controls `target' did not match" if `loglevel' > 0 { display "{txt}Target:" _c matrix list `target', noheader format(%12.0g) display "{txt}Realization:" _c matrix list `bb', noheader format(%12.0g) } } tempname mcopy rd matrix `rd' = J(1,`=colsof(`target')',.) forvalues k=1/`=colsof(`target')' { matrix `rd'[1,`k'] = reldif( `bb'[1,`k'], `target'[1,`k'] ) } matrix colnames `rd' = `: colfullnames `target'' matrix rownames `rd' = `: rownames `target'' matrix rownames `bb' = `: rownames `target'' matrix `mcopy' = `target' return scalar mreldif = mreldif(`bb',`target') return matrix target = `mcopy' return matrix result = `bb' return matrix reldif = `rd' end // of CheckResults program define PropAdjust syntax varname(numeric) [if] [in], target(namelist min=1 max=1) control(varname numeric) over(varname numeric) [alpha(real 1) loglevel(int 0)] local currweight `varlist' marksample touse /* capture total `control' [pw=`currweight'] if `touse', over( `over', nolab ) if _rc { display as error "cannot compute controls for `control' over `over' with the current weights" exit 301 } tempname bb matrix `bb' = e(b) */ qui levelsof `over' if `touse', local( allover ) forvalues k=1/`: word count `allover'' { sum `control' if `touse' & `over' == `: word `k' of `allover'' [aw=`currweight'], mean tempname ctrl`k' scalar `ctrl`k'' = r(sum) } if `loglevel' < 2 local quietly quietly if `: word count `allover'' == 1 { if scalar(`ctrl1') == 0 { display as error "Warning: division by zero weighted total encountered with `control' control" } if `loglevel' > 1 display "{txt}Control {res}`control'{txt}: {res}" %10.0g `=`target'[1,1]' "{txt}/{res}" %10.0g scalar(`ctrl1') " " _c `quietly' replace `currweight' = `currweight' * `target'[1,1] / scalar(`ctrl1') if `touse' & `currweight' != 0 & `control' != 0 } else { // cycle over categories forvalues k=1/`= colsof(`target')' { capture assert "`: word `k' of `allover''" == "`: word `k' of `: colnames `target' ''" if _rc { // we've done the diagnostic before, so this should not be happening display as error "categories mismatch in PropAdjust" di "{txt}categories of {res}`over'{txt}: {res}`allover'" matrix list `target' exit 111 } if scalar(`ctrl`k'') == 0 { qui count if `touse' & `over' == `: word `k' of `allover'' & `currweight'!=0 & `control' != 0 if r(N) display as error "Warning: division by zero weighted total encountered with `control' control with `over' == `: word `k' of `allover''" } if `loglevel' > 1 display "{txt}Control {res}`over'{txt}, category {res}`: word `k' of `allover''{txt}: {res}" /// %10.0g `=`target'[1,`k']' "{txt}/{res}" %10.0g scalar(`ctrl`k'') " " _c `quietly' replace `currweight' = `currweight' * (`target'[1,`k'] / scalar(`ctrl`k''))^`alpha' /// if `touse' & `over' == `: word `k' of `allover'' & `currweight'!=0 & `control' != 0 } } end // of PropAdjust program define ControlCheckParse syntax namelist , one( varname ) [ touse(varname) loglevel(int 0) ] tempname b local nvars : word count `namelist' forvalues k=1/`nvars' { local mat`k' : word `k' of `namelist' if colsof(`mat`k'') == 1 { // this is an overall total capture local var`k' : colnames `mat`k'' if _rc { display as error "cannot process matrix `mat`k''" exit 111 } capture confirm numeric variable `var`k'' if _rc { display as error "variable `var`k'' corresponding to the control matrix `mat`k'' not found" exit 111 } local over`k' `one' c_local over`k' `one' c_local var`k' `var`k'' c_local mat`k' `mat`k'' tempname sum`k' scalar `sum`k'' = el( matrix(`mat`k''), 1, 1) } else { // this is -over()- something, must be obtained capture local var`k' : word 1 of `: coleq `mat`k''' if _rc { display as error "cannot process matrix `mat`k''" exit 111 } capture confirm numeric variable `var`k'' if _rc { display as error "variable `var`k'' corresponding to the control matrix `mat`k'' not found" exit 111 } local over`k' : rownames `mat`k'' capture confirm numeric variable `over`k'' if _rc { display as error "variable `over`k'' tabulating the control matrix `mat`k'' not found" exit 111 } capture total `var`k'' if `touse', over( `over`k'', nolab ) if _rc { display as error "`var`k'' and `over`k'' variables are not compatible" exit 111 } matrix `b' = e(b) if "`: colfullnames `mat`k'''" != "`: colfullnames `b''" { display as error "categories of `over`k'' do not match in the control `mat`k'' and in the data (nolab option)" local matknames : colfullnames `mat`k'' local bnames : colfullnames `b' display as error "This is what `mat`k'' gives: " _n as res " `matknames'" display as error "This is what I found in data: " _n as res " `bnames'" display as error "This is what `mat`k'' has that data don't: " if "`: list matknames - bnames'" != "" { display as res " `: list matknames - bnames'" } else { display as text " " } display as error "This is what data have that `mat`k'' doesn't: " if "`: list bnames - matknames'" != "" { display as res " `: list bnames - matknames'" } else display as text " " exit 111 } quietly count if missing(`over`k'') & `touse' if r(N) { display as error "Warning: `=r(N)' missing values of `over`k'' encountered; convergence will be impaired" } tempname sum`k' mata : st_numscalar("`sum`k''", sum( st_matrix("`mat`k''") ) ) c_local var`k' `var`k'' c_local over`k' `over`k'' c_local mat`k' `mat`k'' } local overlist `overlist' `over`k'' } c_local overlist `overlist' * check the sums tempname sumdif avesum scalar `sumdif' = 0 scalar `avesum' = 0 forvalues k=1/`nvars' { forvalues l=1/`k' { scalar `sumdif' = `sumdif' + abs( `sum`k'' - `sum`l'' ) } scalar `avesum' = `avesum' + `sum`k''/`nvars' } if `sumdif' > 100*`avesum'*`nvars'*(`nvars'-1)*c(epsdouble) { display as err "Warning: the totals of the control matrices are different:" forvalues k=1/`nvars' { display _col(4) "{txt}Target {res}`k' {txt}({res}`mat`k''{txt}) total" _col(45) " = {res}" %20.10g `sum`k'' } display } end // of ControlCheckParse program define TrimWeights, rclass syntax varlist(numeric min=2 max=2) , [ one(varname numeric) over(varlist) /// trimhirel(real `=c(maxdouble)') trimhiabs(real `=c(maxdouble)') trimlorel(real 0) trimloabs(real 0) loglevel(int 0) ] local oldweight : word 1 of `varlist' local newweight : word 2 of `varlist' if `loglevel' < 2 local quietly quietly tempvar trimhi trimlo `quietly' generate byte `trimhi' = (`newweight' > `oldweight' * `trimhirel') | `newweight' > `trimhiabs' `quietly' generate byte `trimlo' = (`newweight' < `oldweight' * `trimlorel') | `newweight' < `trimloabs' `quietly' count if `trimhi' + `trimlo' > 0 & !missing( `trimhi' + `trimlo' ) if r(N) { // check ratios `quietly' replace `newweight' = `oldweight' * `trimhirel' if `newweight' > `oldweight' * `trimhirel' & !mi(`newweight') `quietly' replace `newweight' = `oldweight' * `trimlorel' if `newweight' < `oldweight' * `trimlorel' // check ranges `quietly' replace `newweight' = `trimhiabs' if `newweight' > `trimhiabs' & !mi(`newweight') `quietly' replace `newweight' = `trimloabs' if `newweight' < `trimloabs' // report the trimming categories if `loglevel' > 0 { local uniqover : list uniq over local uniqover : list uniqover - one tempvar groupover `quietly' egen `groupover' = group( `uniqover' ) `quietly' replace `groupover' = `groupover' * ( `trimhi' | `trimlo' ) quietly levelsof `groupover' if ( `trimhi' | `trimlo' ) , local( trimmedcells ) // display the header display "{txt}{dup 31:{c -}}{c TT}{dup 12:{c -}}{c TT}{dup 12:{c -}}" display _col(32) "{txt}{c |} Trimmed {c |} Trimmed" display _col(32) "{txt}{c |} from above {c |} from below" display "{txt}{dup 31:{c -}}{c +}{dup 12:{c -}}{c BT}{dup 12:{c -}}" _c foreach g in `trimmedcells' { foreach x of varlist `uniqover' { sum `x' if `groupover' == `g', mean display _n "{res}`x'" _col(20) "{txt} = {res}" r(mean) _col(32) "{txt}{c |}" _c } quietly count if `groupover' == `g' & `trimhi' display _col(36) "{res}" r(N) _col(45) "{txt}{c |}" _c quietly count if `groupover' == `g' & `trimlo' display _col(50) "{res}" r(N) //////// display the divider or the last line // sum `groupover' if ( `trimhi' | `trimlo' ), meanonly // if `g' == r(max) { display "{txt}{dup 31:{c -}}{c BT}{dup 12:{c -}}{c BT}{dup 12:{c -}}" _c // } // else { // display "{txt}{dup 31:{c -}}{c +}{dup 12:{c -}}{c +}{dup 12:{c -}}" _c // } } display _n } return scalar anytrim = 1 } else return scalar anytrim = 0 end // of TrimWeights program define CheckConvergence, rclass syntax varlist(numeric min=2 max=2) [if] [in], [ tolerance(real 1e-6) ] local oldweight : word 1 of `varlist' local newweight : word 2 of `varlist' marksample touse tempvar reldif quietly generate double `reldif' = abs(`newweight'-`oldweight')/`oldweight' if `touse' quietly count if `reldif' > `tolerance' & !missing(`reldif') if r(N) return scalar converged = 0 else return scalar converged = 1 sum `reldif', meanonly return scalar maxreldif = r(max) end // of CheckConvergence program define DiagDisplay, rclass syntax varlist(numeric min=2 max=2) [if] [in] , [ nograph traceplot(varlist) * ] marksample touse local oldweight : word 1 of `varlist' local newweight : word 2 of `varlist' tempvar wratio quietly generate double `wratio' = `newweight'/`oldweight' if `touse' // summary statistics quietly sum `oldweight' if `touse' local oldmean = r(mean) local oldsd = r(sd) local oldmin = r(min) local oldmax = r(max) quietly sum `newweight' if `touse' local newmean = r(mean) local newsd = r(sd) local newmin = r(min) local newmax = r(max) quietly sum `wratio' if `touse' local wrmean = r(mean) local wrmin = r(min) local wrmax = r(max) local wrsd = r(sd) local d = min(6-floor( log10(`newmean' ) ), 6) display _n "{txt} Summary of the weight changes" _n display _col(15) "{txt}{c |} Mean Std. dev. Min Max CV" display "{txt}{dup 14:{c -}}{c +}{dup 50:{c -}} " display "{txt}Orig weights {c |} " _c display _col(15) as res %8.`d'g `oldmean' _c display _col(28) as res %8.`d'g `oldsd' _c display _col(38) as res %8.`d'g `oldmin' _c display _col(49) as res %9.`d'g `oldmax' _c display _col(60) as res %6.4g `oldsd'/`oldmean' display "{txt}Raked weights {c |} " _c display _col(15) as res %8.`d'g `newmean' _c display _col(28) as res %8.`d'g `newsd' _c display _col(38) as res %8.`d'g `newmin' _c display _col(49) as res %9.`d'g `newmax' _c display _col(60) as res %6.4g `newsd'/`newmean' return scalar raked_mean = `newmean' return scalar raked_sd = `newsd' return scalar raked_min = `newmin' return scalar raked_max = `newmax' return scalar raked_cv = `newsd'/`newmean' display "{txt}Adjust factor {c |} " _c display _col(17) as res %8.4f `wrmean' _c display _col(38) as res %8.4f `wrmin' _c display _col(49) as res %9.4f `wrmax' return scalar factor_mean = `wrmean' return scalar factor_min = `wrmin' return scalar factor_max = `wrmax' return scalar factor_cv = `wrmean'/`wrsd' if "`graph'" == "" { // histograms tempname histnew histratio histboth label variable `newweight' "Calib weights" quietly histogram `newweight', freq nodraw name( `histnew' ) label variable `wratio' "Adjustment factor" quietly histogram `wratio', freq nodraw name( `histratio' ) * if "`traceplot'" != "" local nameopt name(`histboth') * else local nameopt `name' // traceplot if "`traceplot'" != "" { tempvar obsno mindif maxdif tempname traceline logtraceline sum `traceplot', mean qui gen int `obsno' = _n-1 in 1/`=r(N)' // split the legend local k=0 foreach x of varlist `traceplot' { local ++k if 2*`k' > `: word count `traceplot'' local order2 `order2' `k' else local order1 `order1' `k' } label variable `obsno' "Iteration" // the last raking margin should nearly always be zero local last : word `: word count `traceplot'' of `traceplot' local traceplotl : list traceplot - last // come up with neat labels on log scale qui egen float `mindif' = rowmin(`traceplotl') qui egen float `maxdif' = rowmax(`traceplot') sum `mindif', mean local logmin = floor( log10( r(min) ) ) sum `maxdif', mean local logmax = ceil( log10( r(max) ) ) local logstep = round( (`logmax'-`logmin')/5 ) if `logstep' == 0 local logstep 1 foreach d of numlist `logmin'(`logstep')`logmax' { local thelab `thelab' 1e`d' } quietly line `traceplot' `obsno' , nodraw name( `traceline' ) legend( cols(1) order( `order2' ) ) quietly line `traceplotl' `obsno' , nodraw name( `logtraceline' ) legend( cols(1) order( `order1' ) ) /// yscale( log ) ylab( `thelab', angle(horizontal) ) } graph combine `histnew' `histratio' `traceline' `logtraceline', `options' } end // of DiagDisplay program define SelfCheck preserve local cmore `c(more)' set more off tempname bb capture use nhanes2, clear if _rc webuse nhanes2, clear generate byte _one = 1 total _one [pw=finalwgt], over(sex, nolab) matrix total_sex = e(b) matrix total_sex[1,1] = total_sex[1,1] + 10000 matrix total_sex[1,2] = total_sex[1,2] + 10000 matrix rownames total_sex = sex di as inp _n ">> 1. raking with a single margin" ipfraking [pw=finalwgt] , ctotal( total_sex ) generate( rweight1 ) return list assert r(converged) == 1 assert r(badcontrols) == 0 total _one [pw=rweight1], over(sex, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_sex ) < c(epsfloat) di as inp _n ">> 2. raking with two margins" total _one [pw=finalwgt], over(race, nolab) matrix total_race = e(b) matrix total_race[1,1] = total_race[1,1] + 15000 matrix total_race[1,2] = total_race[1,2] + 4000 matrix total_race[1,3] = total_race[1,3] + 1000 matrix rownames total_race = race ipfraking [pw=finalwgt] , ctotal( total_sex total_race ) generate( rweight2 ) return list assert r(converged) == 1 assert r(badcontrols) == 0 total _one [pw=rweight2], over(sex, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_sex ) < c(epsfloat) total _one [pw=rweight2], over(race, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_race ) < c(epsfloat) // for later use with factors gen byte female_race = (sex==2) * race total female [pw=finalwgt], over(female_race, nolab) matrix total_female_race = e(b) matrix rownames total_female_race = female_race assert total_female_race[1,1] == 0 matrix total_female_race[1,2] = total_female_race[1,2] + 8000 matrix total_female_race[1,3] = total_female_race[1,3] + 2000 matrix total_female_race[1,4] = total_female_race[1,4] + 600 // somewhat unbalanced sample set seed 12345 sample 500, count by(region) di as inp _n ">> 3. regular raking without constraints" ipfraking [pw=finalwgt] , ctotal( total_sex total_race ) generate( rweight3 ) return list assert r(converged) == 1 assert r(badcontrols) == 0 total _one [pw=rweight3], over(sex, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_sex ) < c(epsfloat) total _one [pw=rweight3], over(race, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_race ) < c(epsfloat) di as inp _n ">> 4. raking with constraints on adjustment factors (control totals will be off)" ipfraking [pw=finalwgt] , ctotal( total_sex total_race ) generate( rweight4 ) trimhirel( 5.5 ) tol(1e-10) return list assert r(converged) == 1 assert r(badcontrols) > 0 total _one [pw=rweight4], over(sex, nolab) matrix `bb' = e(b) display mreldif( `bb',total_sex ) total _one [pw=rweight4], over(race, nolab) matrix `bb' = e(b) display mreldif( `bb',total_race ) di as inp _n ">> 5. raking with constraints on absolute values of weights" ipfraking [pw=finalwgt] , ctotal( total_sex total_race ) generate( rweight5 ) trimhirel( 5.5 ) trimhiabs(200000) return list assert r(converged) == 1 assert r(badcontrols) > 0 total _one [pw=rweight5], over(sex, nolab) matrix `bb' = e(b) capture noisily assert mreldif( `bb',total_sex ) < c(epsfloat) total _one [pw=rweight5], over(race, nolab) matrix `bb' = e(b) capture noisily assert mreldif( `bb',total_race ) < c(epsfloat) di as inp _n ">> 6. this will take longer to converge" capture noisily ipfraking [pw=finalwgt] , ctotal( total_sex total_race ) generate( rweight6 ) trimhiabs(100000) trimloabs(50000) return list assert r(converged) == 1 assert r(badcontrols) == 0 di as inp _n ">> 7. test -replace- option" gen rweight7 = finalwgt capture noisily ipfraking [pw=rweight7] , ctotal( total_sex total_race ) replace trimhiabs(100000) trimloabs(50000) return list assert r(converged) == 1 assert r(badcontrols) == 0 count if rweight7 != finalwgt assert r(N) > 0 assert reldif( rweight7, rweight6 ) < c(epsfloat) di as inp _n ">> 8. test limited # of iterations" capture noisily ipfraking [pw=finalwgt] , ctotal( total_sex total_race ) generate( rweight8 ) trimhiabs(100000) trimloabs(50000) iter(5) return list assert r(converged) == 0 assert r(badcontrols) > 0 di as inp _n ">> 9. test raking with non-trivial factors" capture noisily ipfraking [pw=finalwgt] , ctotal( total_sex total_race total_female_race ) generate( rweight9 ) trimhiabs(200000) trimloabs(10000) iter(50) return list assert r(converged) == 1 total _one [pw=rweight9], over(sex, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_sex ) < 100*c(epsfloat) total _one [pw=rweight9], over(race, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_race ) < 100*c(epsfloat) total female [pw=rweight9], over(female_race, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_female_race ) < 100*c(epsdouble) di as inp _n ">> 10. test linear weights" capture noisily ipfraking [pw=finalwgt] , ctotal( total_sex total_race ) generate( lweight10 ) linear return list assert r(converged) == 1 total _one [pw=lweight10], over(sex, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_sex ) < 100*c(epsfloat) total _one [pw=lweight10], over(race, nolab) matrix `bb' = e(b) assert mreldif( `bb',total_race ) < 100*c(epsfloat) compare lweight10 rweight3 reg lweight10 rweight3 di as inp _n ">> 99. intentional syntax errors" capture noisily ipfraking assert _rc == 198 capture noisily ipfraking [pw=finalwgt] assert _rc == 198 capture noisily ipfraking [pw=finalwgt], ctotal( total_sex total_race ) assert _rc == 198 capture noisily ipfraking [pw=finalwgt], ctotal( total_sex total_race ) generate( rweight99 ) replace assert _rc == 198 capture noisily ipfraking [pw=finalwgt], ctotal( total_sex total_race ) generate( rweight99 ) trimhiabs(0) assert _rc == 198 capture noisily ipfraking [pw=finalwgt], ctotal( total_sex total_race ) generate( rweight99 ) trimhiabs(200) trimloabs(10000) assert _rc == 198 capture noisily ipfraking [pw=finalwgt], ctotal( total_sex total_race ) generate( rweight99 ) trimhiabs(blah) assert _rc == 198 capture noisily ipfraking [pw=finalwgt], ctotal( total_sex total_race ) generate( rweight99 ) whatever(200) assert _rc == 198 capture noisily ipfraking [pw=finalwgt], ctotal( total_sex total_race ) generate( rweight99 ) trimhiabs(-5 5) assert _rc == 198 capture noisily ipfraking [pw=finalwgt], ctotal( total_sex total_race ) generate( rweight99 ) trimhiabs(200) linear assert _rc == 198 // end of checks set more `cmore' restore di _n _col(24) "{inp}ALL TESTS PASSED." end // of SelfCheck program define GenerateCalibVars , rclass syntax [if] [in], ctotal( string ) prefix( string ) marksample touse, novarlist * ctotal has been checked already, can be assumed good tokenize `ctotal' local k : word count `ctotal' forvalues i=1/`k' { local p = colsof(``i'') local thisvar : rownames ``i'' local thesecats : colnames ``i'' if `p' == 1 { * single continuous variable gen double `prefix'_`i' = `thisvar' if `touse' local calibvars `calibvars' `prefix'_`i' } else { forvalues j=1/`p' { local thisval : word `j' of `thesecats' gen byte `prefix'_`i'_`j' = (`thisvar'==`thisval') if `touse' local calibvars `calibvars' `prefix'_`i'_`j' } } } // quality check foreach x of varlist `calibvars' { qui count if `x' != 0 & !mi(`x') assert r(N)>0 } return local varlist `calibvars' end // of GenerateCalibVars program define ReportTrims, rclass syntax varlist(numeric min=2 max=2) [if] [in], [ one(varname numeric) over(varlist) /// trimhirel(real `=c(maxdouble)') trimhiabs(real `=c(maxdouble)') trimlorel(real 0) trimloabs(real 0) loglevel(int 0) ] local oldweight : word 1 of `varlist' local newweight : word 2 of `varlist' marksample touse qui { count if `touse' & `newweight'/`trimhiabs' > 1 - 1e-6 if r(N) noi di "{txt}Trimmed due to the upper absolute limit: {res}" r(N) "{txt} weights." return scalar n_trimhiabs = r(N) count if `touse' & `newweight'/`trimloabs' < 1 + 1e-6 if r(N) noi di "{txt}Trimmed due to the lower absolute limit: {res}" r(N) "{txt} weights." return scalar n_trimloabs = r(N) count if `touse' & `newweight'/`oldweight' > (1 - 1e-6)*(`trimhirel') if r(N) noi di "{txt}Trimmed due to the upper relative limit: {res}" r(N) "{txt} weights." return scalar n_trimhirel = r(N) count if `touse' & `newweight'/`oldweight' < (1 + 1e-6 ) * (`trimlorel' ) if r(N) noi di "{txt}Trimmed due to the lower relative limit: {res}" r(N) "{txt} weights." return scalar n_trimlorel = r(N) } end // of ReportTrims mata real rowvector my_total(string scalar yname, string scalar wname, string scalar tousename, string scalar overname, string scalar bname, string scalar lname) { st_view(X=.,.,(yname,wname),tousename) st_view(over=.,.,overname,tousename) info = panelsetup(over,1) bb = J(1,rows(info),.) ll = J(1,rows(info),.) for (i=1; i<=rows(info); i++) { XX = panelsubmatrix(X, i, info) bb[1,i] = quadcross( XX[,1] , XX[,2] ) ll[1,i] = over[info[i,1],1] } st_matrix(bname,bb) st_matrix(lname,ll) return(bb) } end // of mata within program exit /* History 1.1.8 added output of targets, controls and mismatches check that totals are the same 1.1.10 added support for maxentropy (which generally sucks) 1.1.11 -from()- option is added 1.1.12 all the reldifs are saved with -meta- option traceplot is added to the graphical output bugs with parameter transfer to -graph- are fixed 1.1.13 when a continuous variable over(1) is specified, only correct the weights for non-zero values of the control 1.1.14 nodivergence option is added; control convergence <- iter() ? alpha() is the speed of adjustment 1.1.15 Nothing is done with ipfraking, but mat2do utility program is added to the package 1.1.16 Nothing is done with ipfraking, but Stata Journal insert was initiated 1.1.17 Cosmetic changes in output (2013-01-02) Fixed bug in -replace- option 1.1.18 Chars always contain the objective function and the control accuracy on exit 1.1.19 Updated -selfcheck- 1.1.20 Cosmetic changes in output 1.1.21 -xls2row- is added to the package 1.1.22 xls2row.sthlp is written 1.1.23 utility programs -xls2row- and -mat2do- are documented 1.1.24 some additional information on convergence is being stored 1.1.25 raking through Mata optimization of the D-S objective function Case 2 1.1.26 trimming is added to fast implementation via trimming the converged Case 2 weights and cycling over 1.1.27 check for missing values in trimming high values; mstep added to slow down the algorithm 1.1.28 total computation rewritten in terms of a much faster -sum- than -total- 1.1.29 r(reldif#) matrix is returned 1.1.30 mat2do allows -append- option 1.1.31 -meta- better interacts with -replace- Stata Journal R&R is completed 1.1.33 -kink- options are being added 1.1.34 -kink- scheme is rewritten as kink[abs|rel][high|low], but it seems like only kinkrel* makes sense 1.1.34.22 debugging kink options (the numbering system reflects commits in 5878 project) 1.1.34.23 better meta data for ipfraking_report 1.2.35 -rapid- options are being tinkered with -touse- is passed to ControlCheckParse 1.2.36 -one()- variable is passed through to ControlCheckParse; otherewise report breaks down :( 1.2.37 the worst fitted target is explicitly reported 1.2.38 PropAdjust only adjusts the weights when the control/multiplication factor is not equal to zero This allows multiplicative factors together with -over()- specification Unit test is added on the functionality of nontrivial multiplicative factors Number of trimmed cases reported 1.2.41 Linear calibration with -linear- option 1.2.42 All -kink- and -rapid- options removed Unit test for -linear- added 1.3.44 Treatment of -replace- is changed: if -from- is specified, then the -from- variable is the one being replaced minor version bumped to reflect the important changes in functionality: kinks are gone, linear is added 1.3.46 Negative weights with linear calibration can still be used in CheckResults via my_total subroutine 1.3.49 Debugging the interaction of CheckResults with mytotal 1.3.61 Display if the list of mismatch categories is empty (intermediate commits are SJ edits) 1.3.62 Version numbers are aligned with -ipfraking-, -ipfraking_report-, -wgtcellcollapse- 1.3.67 Bugs in reporting mismatching categories 1.3.74 version numbers are aligned */