/*
   kwallis2: Kruskal-Wallis Test for equality of populations
   by HervŽ CACI, Nice, FRANCE

   kwallis2 varname,BY(groupvar) Control(controlgroup)

   Improvements: - more detailed output
                 - correction for tied observation
                 - multiple comparisons, including vs. control
                 - reports of p values
*/

*! version 1.1 (may 27, 1999)

program define kwallis2
version 5.0
local varlist "required existing max(1)"
local options "BY(string) Control(integer -1)"
local if "optional"
local in "optional"
parse "`*'"

/* Checks the non-optional "by" */

if "`by'"=="" {
 	di in red "You must specify the grouping variable"
	 exit 198
}

/* "If" "in" implementation. Discard missing obs */

tempvar Touse theRank RankSum Obs Mean Grp Ties
mark `Touse' `if' `in'
markout `Touse' `varlist' `by'
preserve
quietly drop if `Touse'== 0

/* Generate ranks */

egen `theRank'=rank(`varlist') if `Touse'==1
gen long `Obs'=1 if `theRank' ~=.
gen long `Mean'=`Obs'
gen long `RankSum'=`Obs'

quietly {
  sort `by'
  by `by': replace `RankSum'=cond(_n==_N,sum(`theRank'),.)
  by `by': replace `Obs'=cond(_n==_N,sum(`Obs'),.)
  by `by': replace `Mean'=cond(_n==_N,`RankSum'/`Obs',.)

  /* Find the number of non-empty groups */

  by `by': gen long `Grp'=1 if _n==_N & `Obs'>0 & `Obs'!=.
  replace `Grp'=sum(`Grp') if `Grp'!=.
  local maxGrp=`Grp'[_N]
}

/* Print table */

sort `Grp'
di _n in gr "One-way analysis of variance by ranks (Kruskal-Wallis Test)"
di _n "`by'" _col(10) "Obs   RankSum  RankMean " _n _dup(32) "-"
local i=1
local TotObs=0
local KW=0
while `i'<=`maxGrp' {
  di "  " `by'[`i'] _col(8) %5.0f `Obs'[`i'] _col(14) /*
*/ %9.2f `RankSum'[`i'] _col(24) %9.2f `Mean'[`i']
  local TotObs=`TotObs'+`Obs'[`i']
  local KW=`KW'+(`Mean'[`i']*`Mean'[`i']*`Obs'[`i'])
  local i=`i'+1
}

/* Tied observations */

sort `theRank'
local i=1
local Tied=1
local gTied=0
while `i'< `TotObs' {
  if `theRank'[`i']==`theRank'[`i'+1] {
    local Tied=`Tied'+1
  }
  else if `Tied'~=1 {
    local gTied=`gTied'+`Tied'^3-`Tied'
    local Tied=1
  }
  local i= `i'+1
}
if `Tied'~=1 {		    /* Checks if the dataset ends if tied values... */
    local gTied=`gTied'+`Tied'^3-`Tied'
}

local gTied=1-(`gTied'/(`TotObs'^3-`TotObs'))

/* Print statistics */

local KW=12/(`TotObs'*(`TotObs'+1))*`KW'-3*(`TotObs'+1)
local pchi2= max(chiprob(`maxGrp'-1,`KW'+(1e-20)),.0001)
local tchi2= max(chiprob(`maxGrp'-1,(`KW'/`gTied')+1e-20),.0001)
#delimit ;
di _n in gr "Chi-squared (uncorrected for ties) = " in ye %9.3f `KW'
in gr " with " in ye %4.0f `maxGrp'-1 in gr " d.f. (p = " in ye %6.5f `pchi2' ")";
di in gr "Chi-squared (corrected for ties)   = " in ye %9.3f `KW'/`gTied'
in gr " with " in ye %4.0f `maxGrp'-1 in gr " d.f. (p = " in ye %6.5f `tchi2' ")";
#delimit cr

if `tchi2'>=(1-$S_level/100) & ("`control'"!="" | "`detail'"!="") {
  di _n in red "Group comparisons aborted: unsignificant chi-squared"
  exit
}

sort `Grp'
local i=1
local c=sqrt(`TotObs'*(`TotObs'+1)/12)
local critic=0
local z=0

if `control'>=0 {
  di _n in gr "Multiple comparisons versus control group"
  di "(`by'=`control')"
  di _dup(41) "-"
  local c1=(1-$S_level/100)/(2*(`maxGrp'-1))
  di in gr "(Adjusted p-value for significance is " %7.6f `c1' ")" _n
  local c1=abs(`c'*invnorm(`c1'))
  while `i'<=`maxGrp' {
    if `i'!= `control' {
      local delta=abs(`Mean'[`control']-`Mean'[`i'])
      local c2=sqrt((1/`Obs'[`i'])+(1/`Obs'[`control']))
      local critic=`c1'*`c2' 
      local z=`z'+(`delta' >= `critic')
      di in gr "Ho: `varlist'(`by'==`i') = `varlist'(`by'==`control')"
      di in gr "    RankMeans difference = " in ye %9.2f `delta' /*
*/      "  Critical value = " in ye %9.2f `critic'
      di in gr "    Prob = " %7.6f in ye 1-normprob(`delta'/(`c'*`c2')) _c
      if `delta'>=`critic' { di in ye " (S)" _n }
      else { di in ye " (NS)" _n }
    }
    local i= `i'+1
  }
  if `z'==0 {
    di in gr "All the groups are comparable to the control group."
  }
}
else {
  di _n in gr "Multiple comparisons between groups"
  di _dup(35) "-"
  local c1=(1-$S_level/100)/(`maxGrp'*(`maxGrp'-1))
  di in gr "(Adjusted p-value for significance is " %7.6f `c1' ")" _n
  local c1=abs(`c'*invnorm(`c1'))
  while `i'<`maxGrp' {
    local j=`i'+1
    while `j'<=`maxGrp' {
      if `i'!=`j' {
        local delta=abs(`Mean'[`i']-`Mean'[`j'])
        local c2=sqrt((1/`Obs'[`i'])+(1/`Obs'[`j']))
        local critic=`c1'*`c2' 
        local z=`z'+(`delta' >= `critic')
        di in gr "Ho: `varlist'(`by'==`i') = `varlist'(`by'==`j')"
        di in gr "    RankMeans difference = " in ye %9.2f `delta' /*
*/      "  Critical value = " in ye %9.2f `critic'
        di in gr "    Prob = " %7.6f in ye 1-normprob(`delta'/(`c'*`c2')) _c
        if `delta'>=`critic' { di in ye " (S)" _n }
        else { di in ye " (NS)" _n }
      }
    local j=`j'+1
    }
  local i=`i'+1
  }
  if `z'==0 {
    di in gr "All the groups are comparable from each other."
  }
}

mac def S_1 `KW'
mac def S_2 `maxGrp'-1
mac def S_3 `KW'/`gTied'

end