*! 1.2.0 NJC 2sep2015 * 1.1.0 NJC 27aug2015 * 1.0.0 NJC 20jul2015 * ipolate 1.3.3 21sep2004 program mipolate, byable(onecall) sort version 12 syntax varlist(min=2 max=2 numeric) [if] [in], /// GENerate(string) /// [ BY(varlist) /// Epolate /// Linear /// Cubic /// Spline /// Idw /// Idw2(numlist min=1 max=1 >=0) /// Pchip /// Forward /// Backward /// Nearest /// Groupwise /// ties(str) ] // syntax checks if _by() { if "`by'" != "" { di as err /* */ "option by() may not be combined with by prefix" exit 190 } local by "`_byvars'" } if "`ties'" != "" & "`nearest'" == "" { di as err "ties() applies only with nearest" exit 198 } // ties(): // indulge upper case: // any abbreviations of after, before, minimum, maximum if "`ties'" != "" { local ties = lower("`ties'") local nchar = length("`ties'") local OK = 0 if "`ties'" == substr("after", 1, `nchar') { local ties "after" local OK 1 } else if "`ties'" == substr("before", 1, `nchar') { local ties "before" local OK 1 } else if `nchar' == 1 & "`ties'" == "m" { di as err "m ambiguous for ties() option" exit 198 } else if "`ties'" == substr("minimum", 1, `nchar') { local ties "min" local OK 1 } else if "`ties'" == substr("maximum", 1, `nchar') { local ties "max" local OK 1 } if !`OK' { di as err "invalid ties() option: see help" exit 198 } } if "`idw2'" != "" local idw "idw" local nopts : word count /// `cubic' `spline' `pchip' `idw' `nearest' `linear' /// `forward' `backward' `groupwise' if `nopts' > 1 { di as err "must specify one interpolation method only" } else if `nopts' == 0 local linear "linear" confirm new var `generate' tokenize `varlist' args usery x local flag = 0 quietly { tempvar y marksample touse, novarlist replace `touse' = 0 if `x' >= . count if `touse' if r(N) == 0 error 2000 bysort `touse' `by' `x': /// gen double `y' = sum(`usery') / sum(`usery' < .) if `touse' by `touse' `by' `x': replace `y' = `y'[_N] if "`linear'" != "" { tempvar negy negx xhi yhi xlo ylo m b z gen double `negx' = -`x' gen double `negy' = -`y' sort `touse' `by' `negx' `negy' gen double `xhi' = `x' if `touse' gen double `yhi' = `y' if `touse' by `touse' `by': replace `yhi' = `yhi'[_n-1] if `y' >= . & `touse' by `touse' `by': replace `xhi' = `xhi'[_n-1] if `y' >= . & `touse' sort `touse' `by' `x' `y' gen double `xlo' = `x' if `touse' gen double `ylo' = `y' if `touse' by `touse' `by': replace `ylo' = `ylo'[_n-1] if `y' >= . & `touse' by `touse' `by': replace `xlo' = `xlo'[_n-1] if `y' >= . & `touse' gen double `m' = (`yhi'-`ylo')/(`xhi'-`xlo') drop `yhi' `xhi' gen double `b' = `ylo' - `m'*`xlo' drop `ylo' `xlo' `negx' `negy' gen double `z' = `y' if `touse' replace `z' = `m'*`x' + `b' if `touse' & `z' >= . } else if "`cubic'" != "" { tempvar negx negy ok ok2 x1 x2 x3 x4 y1 y2 y3 y4 m m1 m2 m3 m4 z * following values gen double `negx' = -`x' gen double `negy' = -`y' bysort `touse' `by' (`negx' `negy') : gen `ok' = _n * (`y' < .) by `touse' `by' : replace `ok' = `ok'[_n-1] if !`ok' by `touse' `by' : gen `ok2' = `ok'[_n-1] * (`ok' > `ok'[_n-1]) by `touse' `by' : replace `ok2' = `ok2'[_n-1] if !`ok2' by `touse' `by' : gen double `x4' = `x'[`ok2'] by `touse' `by' : gen double `y4' = `y'[`ok2'] by `touse' `by' : gen double `x3' = `x'[`ok'] by `touse' `by' : gen double `y3' = `y'[`ok'] * preceding values bysort `touse' `by' (`x' `y'): replace `ok' = _n * (`y' < .) by `touse' `by' : replace `ok' = `ok'[_n-1] if !`ok' by `touse' `by' : replace `ok2' = `ok'[_n-1] * (`ok' > `ok'[_n-1]) by `touse' `by' : replace `ok2' = `ok2'[_n-1] if !`ok2' by `touse' `by' : gen double `x1' = `x'[`ok2'] by `touse' `by' : gen double `y1' = `y'[`ok2'] by `touse' `by' : gen double `x2' = `x'[`ok'] by `touse' `by' : gen double `y2' = `y'[`ok'] gen double `m1' = (`x' - `x2') * (`x' - `x3') * (`x' - `x4') /* */ / ((`x1' - `x2') * (`x1' - `x3') * (`x1' - `x4')) gen double `m2' = (`x' - `x1') * (`x' - `x3') * (`x' - `x4') /* */ / ((`x2' - `x1') * (`x2' - `x3') * (`x2' - `x4')) gen double `m3' = (`x' - `x1') * (`x' - `x2') * (`x' - `x4') /* */ / ((`x3' - `x1') * (`x3' - `x2') * (`x3' - `x4')) gen double `m4' = (`x' - `x1') * (`x' - `x2') * (`x' - `x3') /* */ / ((`x4' - `x1') * (`x4' - `x2') * (`x4' - `x3')) gen double `m' = /* */ `m1' * `y1' + `m2' * `y2' + `m3' * `y3' + `m4' * `y4' gen double `z' = `y' if `touse' replace `z' = `m' if `touse' & `z' == . } else if "`nearest'" != "" { tempvar negx prevy prevx nexty nextx z * values before gen double `prevy' = `y' if `touse' & `y' < . gen double `prevx' = `x' if `touse' & `y' < . gen double `nexty' = `prevy' gen double `nextx' = `prevx' bysort `touse' `by' (`x'): replace `prevy' = `prevy'[_n - 1] if `prevy' == . by `touse' `by': replace `prevx' = `prevx'[_n - 1] if `prevx' == . * values after gen double `negx' = -`x' bysort `touse' `by' (`negx') : replace `nexty' = `nexty'[_n - 1] if `nexty' == . by `touse' `by' : replace `nextx' = `nextx'[_n - 1] if `nextx' == . * interpolation gen double `z' = `y' if `touse' replace `z' = `nexty' if (`nextx' - `x') < (`x' - `prevx') & `z' == . & `touse' replace `z' = `prevy' if (`x' - `prevx') < (`nextx' - `x') & `z' == . & `touse' if "`ties'" != "" { if "`ties'" == "after" { replace `z' = `nexty' if (`nextx' - `x') == (`x' - `prevx') & `z' == . & `touse' } else if "`ties'" == "before" { replace `z' = `prevy' if (`nextx' - `x') == (`x' - `prevx') & `z' == . & `touse' } else if "`ties'" == "min" { replace `z' = min(`nexty', `prevy') if (`nextx' - `x') == (`x' - `prevx') & `z' == . & `touse' } else if "`ties'" == "max" { replace `z' = max(`nexty', `prevy') if (`nextx' - `x') == (`x' - `prevx') & `z' == . & `touse' } } else replace `z' = (`nexty' + `prevy')/2 if (`nextx' - `x') == (`x' - `prevx') & `z' == . & `touse' local epolate } else if "`spline'" != "" { tempvar group z guse zmiss negy negx // only use one of any repeated x by `touse' `by' `x': gen byte `guse' = `touse' & (_n == 1) sort `guse' `by' `x' if "`by'" != "" { bysort `guse' `by': gen byte `group' = _n == 1 & `guse' by `guse': replace `group' = sum(`group') } else gen byte `group' = 1 su `group', meanonly local ng = r(max) gen double `z' = `y' if `touse' gen byte `zmiss' = 0 forval g = 1/`ng' { replace `guse' = `group' == `g' replace `zmiss' = `guse' & missing(`z') count if `zmiss' if r(N) > 1 { mata : csipolate("`y'", "`x'", "`guse'", "`z'", "`zmiss'") } } // copy interpolated y to repeated x bysort `touse' `by' `x' (`z') : /// replace `z' = `z'[1] if `touse' & `z' == . } else if "`pchip'" != "" { tempvar group z guse zmiss // only use one of any repeated x by `touse' `by' `x': gen byte `guse' = `touse' & (_n == 1) sort `guse' `by' `x' if "`by'" != "" { by `guse' `by': gen byte `group' = _n == 1 & `guse' by `guse': replace `group' = sum(`group') } else gen byte `group' = 1 su `group', meanonly local ng = r(max) gen double `z' = `y' if `touse' gen byte `zmiss' = 0 forval g = 1/`ng' { replace `guse' = `group' == `g' replace `zmiss' = `guse' & missing(`z') count if `guse' & !missing(`z') if r(N) > 2 { mata : /// pchipolate("`y'", "`x'", "`guse'", "`z'", "`zmiss'") } else local flag = 1 } // copy interpolated y to repeated x bysort `touse' `by' `x' (`z') : /// replace `z' = `z'[1] if `touse' & `z' == . local epolate } else if "`idw'`idw2'" != "" { tempvar group z guse zmiss // only use one of any repeated x by `touse' `by' `x': gen byte `guse' = `touse' & (_n == 1) sort `guse' `by' `x' if "`by'" != "" { bysort `guse' `by': gen byte `group' = _n == 1 & `guse' by `guse': replace `group' = sum(`group') } else gen byte `group' = 1 su `group', meanonly local ng = r(max) gen double `z' = `y' if `touse' gen byte `zmiss' = 0 if "`idw2'" == "" local idw2 = 2 forval g = 1/`ng' { replace `guse' = `group' == `g' replace `zmiss' = `guse' & missing(`z') count if `zmiss' mata : idwipolate("`y'", "`x'", "`guse'", "`z'", "`zmiss'", `idw2') } // copy interpolated y to repeated x bysort `touse' `by' `x' (`z') : /// replace `z' = `z'[1] if `touse' & `z' == . local epolate } else if "`forward'`backward'" != "" { tempvar z gen double `z' = `y' if "`forward'" != "" { bysort `touse' `by' (`x') : /// replace `z' = `z'[_n-1] if `touse' & `z' == . } else { tempvar negx gen double `negx' = -`x' bysort `touse' `by' (`negx'): /// replace `z' = `z'[_n-1] if `touse' & `z' == . } local epolate } else if "`groupwise'" != "" { tempvar z OK bysort `touse' `by' (`y') : /// gen byte `OK' = (`y' == `y'[1]) | missing(`y') count if `touse' & !`OK' if r(N) { if "`by'" != "" local text "within groups" di as err /// "different non-missing values of `usery' `text'" exit 498 } gen double `z' = `y' by `touse' `by': /// replace `z' = `z'[1] if missing(`z') & `touse' local epolate } if `"`epolate'"' != "" { sort `touse' `by' `x' `z' /* already sorted */ tempvar m b M B ismiss by `touse' `by': gen double /* */ `m' = (`z'[_n+1] - `z')/(`x'[_n+1] - `x') gen double `b' = `z' - `m'*`x' gen double `M' = `m' gen double `B' = `b' by `touse' `by': replace `m' = `m'[_n-1] if `m'[_n-1]< . by `touse' `by': replace `b' = `b'[_n-1] if `b'[_n-1]< . gen byte `ismiss' = `z' >= . by `touse' `by': replace `ismiss' = 0 /* */ if _n>1 & `ismiss'[_n-1]==0 by `touse' `by': replace `z' = `m'[_N]*`x' + `b'[_N] /* */ if `touse' & `ismiss' drop `ismiss' `m' `b' gen double `negx' = -`x' gen double `negy' = -`z' sort `touse' `by' `negx' `negy' by `touse' `by': replace `M' = `M'[_n-1] if `M'[_n-1] < . by `touse' `by': replace `B' = `B'[_n-1] if `B'[_n-1] < . gen byte `ismiss' = `z' >= . by `touse' `by': replace `ismiss' = 0 /* */ if _n>1 & `ismiss'[_n-1] == 0 by `touse' `by': replace `z' = `M'[_N]*`x' + `B'[_N] /* */ if `touse' & `ismiss' } rename `z' `generate' compress `generate' count if `generate' >= . } if `flag' { di as txt "note: at least 3 values needed in any interpolation" } if r(N) > 0 { if r(N) != 1 local pl "s" di as txt "(" r(N) `" missing value`pl' generated)"' } end mata: void idwipolate(string scalar yvarname, string scalar xvarname, string scalar tousename, string scalar zvarname, string scalar zmissname, real scalar power ) { real matrix xyvar real colvector x, y, where, newz, weight real scalar i st_view(xyvar, ., (xvarname, yvarname), tousename) x = select(xyvar[,1], (xyvar[,2] :< .)) y = select(xyvar[,2], (xyvar[,2] :< .)) newz = where = select(xyvar[,1], (xyvar[,2] :== .)) for(i = 1; i <= rows(where); i++) { weight = abs(x :- where[i]):^(-power) newz[i] = sum(y :* weight) / sum(weight) } st_store(., zvarname, zmissname, newz) } void csipolate(string scalar yvarname, string scalar xvarname, string scalar tousename, string scalar zvarname, string scalar zmissname ) { real matrix xyvar real colvector x, y, where st_view(xyvar, ., (xvarname, yvarname), tousename) x = select(xyvar[,1], (xyvar[,2] :< .)) where = select(xyvar[,1], (xyvar[,2] :== .)) y = select(xyvar[,2], (xyvar[,2] :< .)) st_store(., zvarname, zmissname, spline3eval(spline3(x, y), where)) } void pchipolate(string scalar yvarname, string scalar xvarname, string scalar tousename, string scalar zvarname, string scalar zmissname ) { real matrix xyvar real colvector x, y, where st_view(xyvar, ., (xvarname, yvarname), tousename) x = select(xyvar[,1], (xyvar[,2] :< .)) where = select(xyvar[,1], (xyvar[,2] :== .)) y = select(xyvar[,2], (xyvar[,2] :< .)) st_store(., zvarname, zmissname, pchip(x, y, where)) } real colvector pchip(real colvector x, real colvector y, real colvector u) { real scalar n, nu, k, j real colvector h, delta, d, c, b, which, s n = length(x) h = x[2::n] - x[1::n-1] delta = (y[2::n] - y[1::n-1]) :/ h d = pchipslopes(h, delta) c = (3*delta - 2*d[1::n-1] - d[2::n]) :/ h b = (d[1::n-1] - 2*delta + d[2::n]) :/ (h:^2) nu = length(u) k = J(nu, 1, 1) for (j = 2; j <= n-1; j++) { which = select((1::nu), x[j] :<= u) k[which] = J(length(which), 1, j) } s = u - x[k] return(y[k] + s :* (d[k] + s :* (c[k] + s :* b[k]))) } real colvector function pchipslopes(real colvector h, real colvector delta) { real scalar n real colvector d, k, w1, w2 n = length(h) + 1 d = J(n, 1, 0) k = 1 :+ select((1::n-2), sign(delta[1::n-2]) :* sign(delta[2::n-1]) :> 0) w1 = 2*h[k] + h[k:-1] w2 = h[k] + 2*h[k:-1] d[k] = (w1 + w2) :/ (w1 :/ delta[k:-1] + w2 :/ delta[k]) d[1] = pchipend(h[1], h[2], delta[1], delta[2]) d[n] = pchipend(h[n-1], h[n-2], delta[n-1], delta[n-2]) return(d) } real scalar function pchipend(h1, h2, del1, del2) { real scalar d d = ((2*h1 + h2)*del1 - h1*del2) / (h1 + h2) if (sign(d) != sign(del1)) d = 0 else { if (sign(del1) != sign(del2) & (abs(d) > abs(3*del1))) { d = 3*del1 } } return(d) } end