*! version 1.0.2, Francisco Ceron, 23Sept2024
	*! ehutchens: extended hutchens 'square root' segregation index (additively decomposable), with supergroup option, stored matrices, save new dataset and bootstrap options
	*! This is an auxilar program, a slightly modified hutchens comman for the bootstrap option in ethutchens program version 1.0.2.

	

		/// Auxiliar program (slighlty modified original hutchens program command) to be executed within ehutchens program command (line 230)
			program define hu_int, rclass
					version 15.1
					syntax varlist(min=2 max=2) [fw aw/] [if] [in] [, Missing Format(passthru) BYgroup(varname) ]

		///variables
			tokenize `varlist'

		///treatment of missing values on bygroup var
			if "`missing'" != "" {
                if "`bygroup'" == "" {
                        di as err "cannot specify missing option without bygroup option"
                        exit 198
                }
                marksample touse
                markout `touse', strok
        }
        else {
                marksample touse  
                markout `touse' `bygroup', strok
        }

		///stop if no valid obs
			qui count if `touse' 
			if r(N) == 0 { 
                di as error "no valid observations"
                error 2000
        }

		///groupvar 0/1
        capt assert `2'==0 | `2'==1 if `touse'
        if _rc {
                di as err "groupvar not 0/1"
                exit 198
        }

		///take care of weights
        if "`exp'"=="" local exp "`touse'"

		********* Aggregate index **********************************************

		///sort
        sort `touse' `1'

		///compute cell totals and unweighted number of categories
        tempvar cell0 cell1 iid
        qui by `touse' `1': gen byte `iid' = _n==_N & `touse'
        qui by `touse' `1': gen `cell0' = sum(`exp'*(1-`2')) if `touse'
        qui by `touse' `1': replace `cell0' = `cell0'[_N] if `touse'
        qui by `touse' `1': gen `cell1' = sum(`exp'*`2') if `touse'
        qui by `touse' `1': replace `cell1' = `cell1'[_N] if `touse'

		///compute column totals and n of cases
        tempvar col0 col1 id Ncat Nobs
        qui by `touse' : gen byte `id' = _n==_N & `touse'
        qui by `touse' : gen `col0' = sum(`exp'*(1-`2')) if `touse'
        qui by `touse' : replace `col0' = `col0'[_N] if `touse'
        qui by `touse' : gen `col1' = sum(`exp'*`2') if `touse'
        qui by `touse' : replace `col1' = `col1'[_N] if `touse'
        qui by `touse' : gen `Ncat' = sum(`iid') if `touse'
        qui by `touse' : replace `Ncat' = `Ncat'[_N] if `touse'
        if "`weight'"=="fweight" {
                qui gen `Nobs' = `col0' + `col1' if `touse'
        }
        else {
                qui by `touse' : gen `Nobs' = _N if `touse'
        }

		///compute summands
        tempvar sum2 s3 S
        qui gen `sum2' = sqrt( (`cell0'/`col0') * (`cell1'/`col1') )
        qui gen `s3' = .

		///compute S  etc.
        qui by `touse' : gen `S' = sum(`sum2') if `iid'
        qui by `touse' : replace `S' = 1 - `S'[_N] if `iid'
        qui by `touse' : replace `s3' = 100*( `col1' / (`col0' + `col1') ) if `iid'

		///display
        lab var `Ncat' "# units"
        lab var `Nobs' "# obs (raw)"
        lab var `S' "S"
        lab var `s3' "% `2'=1"

        local labl: var l `2'
        if `"`labl'"'=="" local labl "`2'"
        tempvar touse2
        ge byte `touse2' = `touse'
        lab var `touse2' `"`labl'"'
        qui tostring `touse2', replace
        qui replace `touse2' = "0/1"

        noi di " "
        noi di as txt "Hutchens 'square root' segregation index (S)"
        noi di as txt _dup(44) "_"
        noi di " "
        noi di as txt "Social unit var: " as res "`1'" as txt ". Segregation (social group) var: " as res "`2'" as txt"."
        noi di " "
        noi di as txt "Aggregate statistics"
       
	   tabdisp `touse2' if `id', cell(`S' `s3' `Ncat' `Nobs') `format'  // only 5 vars allowed in cell()

        qui su `S', meanonly    
        local SS =  r(mean)
        return scalar S = r(mean)

		******* Decomposition: (a) subgroup index values, by subgroup *********


		if "`bygroup'" != "" {

        ///sort
                sort `touse' `bygroup' `1'

        ///compute cell totals and number of categories
                tempvar gcell0 gcell1 giid
                qui by `touse' `bygroup' `1': gen byte `giid' = _n==_N & `touse'
                qui by `touse' `bygroup' `1': gen `gcell0' = sum(`exp'*(1-`2')) if `touse'
                qui by `touse' `bygroup' `1': replace `gcell0' = `gcell0'[_N] if `touse'
                qui by `touse' `bygroup' `1': gen `gcell1' = sum(`exp'*`2') if `touse'
                qui by `touse' `bygroup' `1': replace `gcell1' = `gcell1'[_N] if `touse'

        ///compute column totals and n of cases
                tempvar gcol0 gcol1 gid gNcat gNobs gNpc
                qui by `touse' `bygroup': gen byte `gid' = _n==_N & `touse'
                qui by `touse' `bygroup': gen `gcol0' = sum(`exp'*(1-`2')) if `touse'
                qui by `touse' `bygroup': replace `gcol0' = `gcol0'[_N] if `touse'
                qui by `touse' `bygroup': gen `gcol1' = sum(`exp'*`2') if `touse'
                qui by `touse' `bygroup': replace `gcol1' = `gcol1'[_N] if `touse'
                qui by `touse' `bygroup': gen `gNcat' = sum(`giid') if `touse'
                qui by `touse' `bygroup': replace `gNcat' = `gNcat'[_N] if `touse'
                if "`weight'"=="fweight" {
                        qui gen `gNobs' = `gcol0' + `gcol1' if `touse'
                }
                else {
                        qui by `touse' `bygroup': gen `gNobs' = _N if `touse'
                }
                qui by `touse' `bygroup': gen `gNpc' = 100*(`gcol0' + `gcol1')/(`col0' + `col1') if `touse'


        ///compute summands
                tempvar gsum2 gs3 gS gw scont
                qui gen `gsum2' = sqrt( (`gcell0'/`gcol0') * (`gcell1'/`gcol1') )
                qui gen `gs3' = .
                qui gen `gw' = sqrt( (`gcol0'/`col0') * (`gcol1'/`col1') )
                qui gen `scont' = (`gcell1'/`col1') - sqrt( (`gcell0'/`col0')*(`gcell1'/`col1')  )


        ///compute S  etc.

                qui by `touse' `bygroup' : gen `gS' = sum(`gsum2') if `giid'
                qui by `touse' `bygroup' : replace `gS' = 1 - `gS'[_N] if `giid'
                qui by `touse' `bygroup' : replace `gs3' = 100*( `gcol1' / (`gcol0' + `gcol1') ) if `giid'
                qui by `touse' `bygroup' : replace `scont' = sum(`scont') if `giid'
                qui by `touse' `bygroup' : replace `scont' = `scont'[_N]  if `giid'


        ///display

                lab var `gNcat' "# units"
                lab var `gNobs' "# obs (raw)"
                lab var `gS' "S"
                lab var `gw' "Weight"           // short labels required for -tabdisp-
                lab var `gs3' "% `2'=1"
                lab var `scont' "Contribution"
                lab var `gNpc' "% obs (wgted)"

                noi di " "
                noi di as txt "Statistics for each subgroup defined by " as res "`bygroup'"
                        // 2 tables for display: -tabdisp- only allows 5 vbles per table
                tabdisp `bygroup' if `gid', cell(`gs3' `gNcat' `gNobs' `gNpc') `format'
                tabdisp `bygroup' if `gid', cell(`gS' `gw' `scont') `format'


				******* Decomposition: (b) within- and betweeen-group breakdown  *********

                tempvar SW SB 
                qui ge `SW' = sum( `gw' * `gS') if `gid'
                qui replace `SW' = `SW'[_N]  if `gid'
                qui ge `SB' = sum( `gw' ) if `gid'
                qui replace `SB' = 1 - `SB'[_N] if `gid' 

                su `SW', meanonly
                local SW = r(mean)
                su `SB', meanonly
                local SB = r(mean)
                local SWpc = 100*`SW'/ `SS'
                local SBpc = 100*`SB'/ `SS' 

                noi di " "
                tempvar decomp
                qui ge byte `decomp' = 0 in 1
                qui replace `decomp' = 1 in 2
                lab var `decomp' "Decomposition"
                local dlabel declabel
                capture lab drop l____
                lab def l____  0 "Within-group segregation" 1 "Between-group segregation"
                lab val `decomp' l____
                tempvar ds dspc
                qui ge `ds' = `SW' if `decomp' == 0
                qui replace `ds' = `SB' if `decomp' == 1
                qui ge `dspc' = `SWpc' if `decomp' == 0
                qui replace `dspc' = `SBpc' if `decomp' == 1
                lab var `ds' "Value"
                lab var `dspc' "As percent"

                tabdisp `decomp' if `decomp' < 2, cell(`ds' `dspc') `format'
                noi di " "

                return scalar SB = `SB'
                return scalar SW = `SW'

}

end