/* _diff - Evaluates the difference between the ECDFs of two interpoint distance distributions in two subsamples using equiprobable binning
Pietro Tebaldi , Harvard School of Public Health and Bocconi University
July 2010 */

program define _diff , rclass
version 10.1
syntax  , x(varname) y(varname) g(varname) [bins(integer 20)]
tempvar X1 Y1 X2 Y2
tempname d F1 F2 difF
quietly {
// separate the two samples and store them
gen `X1' = `x' if `g'==0
gen `Y1' = `y' if `g'==0
gen `X2' = `x' if `g'==1
gen `Y2' = `y' if `g'==1
preserve
// binning using the pooled sample
dbins `y' `x' , bins(`bins') 
mat `d' = r(d)
clear
restore
preserve
// ECDF for the interpoint distances within sample 1 only
drop if `X1' == .
Fhat `X1' `Y1' , d(`d')
mat `F1' = r(Fhat)
restore
preserve
// ECDF for the interpoint distances within sample 2 only
drop if `X2' == .
Fhat `X2' `Y2' , d(`d')
mat `F2' = r(Fhat)
mat `difF' = `F1'-`F2' // difference between the two ECDFs
// return results and restore the dataset
return matrix d = `d'
return matrix Fhat1 = `F1'
return matrix Fhat2 = `F2'
return matrix difF = `difF'
restore
count if `X1' != .
return scalar N1 = r(N)
count if `X2' != .
return scalar N2 = r(N)
count
return scalar N = r(N)
}
end