program umbrella //------------------------------------------------------
version 10.0
preserve
*
* Perform o'Brien's multivariate test (Biometrics 1984;40:1079-1087).
* This is an alternative to the Hotelling' T test.  It is more powerful than
* Hotellings test when there is a natural ordering to 
* the univariate response variables.
*
* Input
*	y1 = first response variable
*	...
*	yK = Kth response variable
*       factor_var = classification variable
*       highlow string
*       ranktable
*       id variable name
*
*	(K = number of response variables is calculated by counting
*        the number of response variables on the command line)
*
* Output
*	Lists P value from O'Brien's multivariate test of the null hypothesis
*	that the mean of the response vectors are equal in groups defined by
*	each value of factor_var.
*
* Program call
*	umbrella y1...yK [if] [in] [, BY(factor_variables) highlow(string) ranktable id(varlist)]
*
syntax varlist [if] [in] [, BY(varlist) HIghlow(string) RAnktable id(varlist)]
*
*  deal with [if] and [in]
*
if "`if'`in'"~="" {
	keep `if' `in'
}
*
*  The by option is required.  Make sure it is here.
*
if "`by'"=="" {
	display in red "by() option required"
	exit 100
}       
*
*  Extract the factor variables.
*
local factor_var="`by'"
*
*  The remaining variables on the command line are the
*  response variables.  Process these one at a time.
*  Keep a count (K) of the number of response variables.
*
tokenize "`varlist'"
local missing = 0
local K=0
while "`1'" ~= "" {
	local K=`K' + 1
        local resp`K' `1'
	local missing =`missing'+ (`1' == .)
	quietly drop if `1' ==.
	sort `1'
        egen r`K' = rank(`1')
        egen r_`1' = rank(`1')
	mac shift
}
display _newline(1) as text "Number of response variables: " as result "`K'"
*
*  Reverse the rank if small values are "better".
*  H is assumed if an "L" is not present.
*
tokenize "`highlow'"
local J=0
while "`1'" ~= "" {
	local J=`J' + 1
        local hilo`J' `1'
        quietly count
        if "`1'"=="L" {
            qui replace r`J'=(r(N)+1)-r`J'
            qui replace r_`resp`J''=r`J'
        }
	mac shift
}
*
*  Calculate the sum of the ranks.
*
gen S = r1
forvalues k = 2/`K' {
    quietly replace S = S + r`k'
}
*
*  Make a table showing H and L settings.
*
* ----------------------------
display _newline(1) as text _col(5) "Variable" _col(14) "{c |}" _col(22) "Outcome"

display             as text "{hline 13}{c +}{hline 53}"

forvalues k = 1/`K' {
        local hilo_label`k' "lower values are better"
        if "`hilo`k''" ~= "L" {
                local hilo_label`k' "higher values are better"
        }
        local abname = abbrev("`resp`k''",12)
        display as text          "{ralign 12:`abname'}" /*
*/              as text _col(14) "{c |}"               /*
*/              as resu _col(23) "`hilo_label`k''"
}
*-------------------------------------
*
*  Output a summary table of the input variables by the factor variable(s).
*
bysort `factor_var': summ `varlist'
*
*  Produce the table that lists the ranks
*  and sum of ranks.
*
gen sum_of_ranks=S
if "`ranktable'" ~= "" {
	sort `factor_var' sum_of_ranks
	display _newline(1) as text "List of ranks"
	by `factor_var': list `id' r_* sum_of_ranks, abbreviate(12)
}

display _newline(1) as text "Missing observations dropped from analysis = " as result "`missing'" 

display _newline(1) as text "O'Brien's Umbrella test is the following Kruskal-Wallis test on the
display as text "sum of the ranks across the dependent variables."

kwallis S, by(`factor_var')

 
end //---------------------------------------------------