program define sumby
/*
Date: 15/9/95  
New version: 24/9/95 with better heading of Table.
New version: 23/11/97 with varlist instead of one var and nobytot.
New version: 25/11/98 width of column labels shortened if less than 
            width required for display of values.
3/4/2001: test() option added for P-values
Author: Wim van Putten, ErasmusMC, Rotterdam
Creates formatted table of summary statistics of one variable by another variable (optional).
*/

version 4.0
quietly {
  local varlist "req ex min(1) "
  # delimit ;
  local options "BY(string) STat(string) Dec(string) Label(string) 
        Mis Width(int 10) Head noBYTot START(int 12) TEst(string) " ;
  #delimit cr      
  local if "opt"
  local in "opt"
  parse `"`0'"' 
  preserve
  if `"`if'`in'"'~="" { keep `if' `in' }
  if _N==0 {
    noisily display `"`if' `in' : no observations"'
    exit
    }
  tempvar x  NNN colnr _blb
*tempvar _blb _vlab _val NN  NNN SNN CAT  rownr NR
  
if index(upper("`test'"),"KW")>0        { local test K-W }
else if index(upper("`test'"),"K-W")>0  { local test K-W }
else if index(upper("`test'"),"WIL")>0  { local test K-W }
else if index(upper("`test'"),"RANK")>0 { local test K-W }
else if index(upper("`test'"),"TR")>0   { local test trend }
else if index(upper("`test'"),"SP")>0   { local test Spear }
  
  parse "`by'",parse(" ,")
  local by "`1'"             /* Only first string counts as by-variable */
  keep `varlist' `by'
  local nby 1
  if "`by'"~="" {
    unabbrev `by'
    local byname : variable label `by'
        local bytype : type  `by'
    if index("`bytype'","str")>0 {
        replace `by'=trim(`by')
        compress `by'
    }
    if "`byname'"=="" { local byname "`by'" }
    if "`mis'"=="" {     /* Exclude observations missing on by-variable 
                                unless the missing are included as category */
       if index("`bytype'","str")>0 { drop if `by'=="" }
       else { drop if `by'==. }
       if _N==0 {
        noisily display "no observations with `by' non-missing"
            exit
       }
    }
        sort `by' 
        quietly by `by' : gen int `NNN'=1 if _n==1
        gen int `colnr'=sum(`NNN')

    local blb : value label `by'
    if "`blb'"~="" {
       capture label list `blb'
       if _rc>0 {   /* value label not defined */
        local blb 
       }
    }
    if index("`bytype'","str")>0 {
     local w=max(`width',2)
     gen str`w' `_blb' = `by' 
    }
        else {
     if "`blb'"~="" {  decode `by' ,gen(`_blb') }
     else           {  gen str8 `_blb'=string(`by') }
     replace `_blb'=string(`by') if `_blb'==""
    }
    local byby "by(`by')" 
 
       *Create labels for head of Table, if requested
        tab `by',mis
        local nby=_result(2)
        sort `NNN' `colnr'
        local j=1 
        while `j'<=`nby' {
            local bylab`j'= `_blb'[`j']
            local j=`j'+1 
        }
        drop  `NNN' 
  }

  parse "`stat'" ,parse(" ,")
  
  if "`stat'"~="" {
   local i=1
   while "``i''"~="" {
    while "``i''"=="," { macro shift }
    if "``i''"~="" {
     local ST= upper("``i''")
     if "`ST'"=="ALL"|"`ST'"=="_ALL" {
     local ST ="ALL" 
     local i=100 }
     else if "`ST'"=="MEAN"   {  local r`i' 3  }     /*  _result(3)= mean */
     else if "`ST'"=="M"      {  local r`i' 3  }     /*  _result(3)= mean */
     else if "`ST'"=="MED"    {  local r`i' 10  }    /*  _result(10)= median */
     else if "`ST'"=="MEDIAN" {  local r`i' 10  }    /*  _result(10)= median */
     else if "`ST'"=="Q2"     {  local r`i' 10  }    /*  _result(10)= median */
     else if "`ST'"=="N"      {  local r`i' 1  }     /*  _result(1)= number of observations */
     else if "`ST'"=="NR"     {  local r`i' 1  }     /*  _result(1)= number of observations */
     else if "`ST'"=="SD"     {  local r`i' 4  }     /*  _result(4)= variance !! */
     else if "`ST'"=="R"      {  local r`i' 56  }    /*  range to be derived from _results 5 and 6 */
     else if index("`ST'","RA")==1 { local r`i' 56  } /*  range to be derived from _results 5 and 6 */
     else if "`ST'"=="MSD" {     local r`i' 34  }     /*  mean+/-sd to be derived from from _results 3 and 4 */
     else if "`ST'"=="MIN" {     local r`i' 5  }      /*  _result(5)= min */
     else if "`ST'"=="MAX" {     local r`i' 6  }      /*  _result(6)= max */
     else if "`ST'"=="Q1"  {     local r`i' 9  }      /*  _result(9)= Q1 25th percentile */
     else if "`ST'"=="Q3"  {     local r`i' 11  }     /*  _result(11)= Q3 75th percentile */
     else if "`ST'"=="P5"  {     local r`i' 7  }      /*  _result(7)= 5th percentile */
     else if "`ST'"=="P05" {     local r`i' 7  }      /*  _result(7)= 5th percentile */
     else if "`ST'"=="P95" {     local r`i' 13  }     /*  _result(13)= 95th percentile */
     else if "`ST'"=="P1"  {     local r`i' 16  }     /*  _result(16)= 1th percentile */
     else if "`ST'"=="P01"  {    local r`i' 16  }     /*  _result(16)= 1th percentile */
     else if "`ST'"=="P99"  {    local r`i' 17  }     /*  _result(17)= 99th percentile */
     else if "`ST'"=="P10"  {    local r`i' 8  }      /*  _result(8)= 10th percentile */
     else if "`ST'"=="P90"  {    local r`i' 12  }     /*  _result(12)= 90th percentile */
     else  {
     noisily display "Program stopped. ``i'' is not a valid statistics abbreviation."
     exit  
     }
     local i=`i'+1 
    }
   }
   local ns= `i'-1     /* Number of different statistics */
  
   if "`ST'"=="ALL" {
     local r1 1
     local r2 34
     local r3 56
     local r4 7
     local r5 9
     local r6 10
     local r7 11
     local r8 13
     /* for date formatted variables: */
     local rr1d 1
     local rr2d 5
     local rr3d 7
     local rr4d 9
     local rr5d 10
     local rr6d 11
     local rr7d 13
     local rr8d 6
     global l1d "Number"
     global l2d "Minimum"
     global l3d " 5 %"
     global l4d "Q1"
     global l5d "Median"
     global l6d "Q3"
     global l7d "95 %"
     global l8d "Maximum"
     local ns = 8 
   }
  }
  else if "`stat'"=="" {
     local r1 34
     local r2 56
     local r3 10
     local r4 1
     /* for date formatted variables: */
     local rr1d 5
     local rr2d 10
     local rr3d 6
     local rr4d 1
     global l1d "Minimum"
     global l2d "Median"
     global l3d "Maximum"
     global l4d "Number"
     local ns = 4 
  }
  local i=1
  while `i'<=`ns' {
    local rr`i' `r`i''
    local i=`i'+1
  }

  parse "`dec'" ,parse(" ,")   /* Number of decimals per statistic */
  local i=1
  while `i'<=`ns' {
   while "``i''"=="," { macro shift }
   if `r`i''== 1 {  /* Number of observations always without decimals */
    local dec`i' = 0 
    local date`i'  
   }
   else if "``i''"=="."|"``i''"=="" { local dec`i' . } 
   else {   local dec`i' ``i'' }
   local i=`i'+1 
  }
  
  
  parse "`label'" ,parse(" ,")   /* Specifies labels */
  local i=1
  while `i'<=`ns' {
   if "``i''"=="," { macro shift }
   if "``i''"=="."|"``i''"=="" {
    if `r`i''==3       { global l`i' "Mean" }
    else if `r`i''==1  { global l`i' "Number" }
    else if `r`i''==4  { global l`i' "SD"      }
    else if `r`i''==5  { global l`i' "Minimum" }
    else if `r`i''==16 { global l`i' " 1 %"      }
    else if `r`i''==7  { global l`i' " 5 %"      }
    else if `r`i''==8  { global l`i' "10 %"      }
    else if `r`i''==9  { global l`i' "Q1"      }
    else if `r`i''==10 { global l`i' "Median" }
    else if `r`i''==11 { global l`i' "Q3"      }
    else if `r`i''==12 { global l`i' "90 %"      }
    else if `r`i''==13 { global l`i' "95 %"      }
    else if `r`i''==17 { global l`i' "99 %"      }
    else if `r`i''==6  { global l`i' "Maximum" }
    else if `r`i''==56 { global l`i' "Range"  }
    else if `r`i''==34 { global l`i' "Mean;SD" }
   }
   else                { global l`i'  "``i''" }
   local rtot `rtot' `r`i''
   local i=`i'+1 
  }
  *Increase width if specified width<=8 on the basis of the width that
  * would be required to display statistics for the max of the variables.
  * In case of variables with date-format, the width will be made sufficient 
  * to display the maximum date in the chosen date format
  parse "`varlist'",parse(" ")
  while "`1'"!=""&`width'<=8 {
    local format:format `1'
    local format =subinstr("`format'","td","d",1)  /* 14/1/2004 %td is equivalent to %d in Stata 8 */
    if substr("`format'",2,1)~="d" {
      su `1'
      if _result(1)>0 {
        local mean=length(string(int(_result(3))))
        if _result(3)<0&`mean'==1 {local mean=2 }
        local min=length(string(int(_result(5))))
        if _result(5)<0&`min'==1 {local min=2 }
        local max=length(string(int(_result(6))))
        local sd =length(string(int((_result(4))^.5)))
        if index("`rtot'","56")>0 {local width =max(`width',`min'+`max'+1)}
        if index("`rtot'","34")>0 {local width =max(`width',`mean'+`sd'+1)}
        local width =max(`width',`max')
        if index("`rtot'","1")>0  {local width =max(`width',length(string(_result(1))))}
      }
    }
    else {  /* In case of date-format */
      su `1'
      if _result(1)>0 {
        local max=_result(6)
        fns a `max' ,date(`format') width(`width')
        global a $a
        local width =max(`width',length("$a"))
      }
    }
    macro shift
  }

}/* end quietly */  

local Ppos=`start'+`nby'*`width' /* Pos for P value */

local start1=`start'-1
*Display header if requested
if "`head'"~="" &"`by'"~="" {
        local maxl=`width'
        display _col(`start') "|  `byname' " 
        display _col(`start') "|"   _continu
              local j=1 
              while `j'<=`nby' {
            local bylab`j'=substr("`bylab`j''",1,`width')
            local skip=`maxl'-length("`bylab`j''")
            display _skip(`skip') "`bylab`j'' " _contin
            local j= `j'+1 }
    if "`bytot'"==""|"`by'"=="" {
            display "|" _cont  
            local skip=max(0,`maxl'-length("Total"))
            display  _skip(`skip') "Total " 
    }      
    else {display " " }
    display _dup(`start1') "-" _dup(1) "+" _continue 
        local j=`nby'*(`maxl'+1)
    if "`bytot'"==""|"`by'"=="" { display _dup(`j') "-" "+" _dup(`maxl') "-" }
    else  {
        local j=`j'-1
        display _dup(`j') "-" 
    }
}          

local w8=`width'
local w4=int((`w8'-2)/2)

*Loop over varlist
parse "`varlist'" ,parse(" ,")
while "`1'"~="" {
  quietly {
   local type: type `1'
   if index("`type'","str")==0 { /* no statistics for string variables */
    if "`type'"=="int"|"`type'"=="long"|"`type'"=="byte" { local type int }
    global varlab : variable label `1'
    if "$varlab"=="" {  global varlab `1'}
    local format:format `1'
    local format =subinstr("`format'","td","d",1)  /* 14/1/2004 %td is equivalent to %d in Stata 8 */
    if substr("`format'",2,1)=="d" {
        local date `format' 
        local format = 0 
    }
    else {
     local date  
     local p= index("`format'",".")
     if `p'>0 { local format=int(real(substr("`format'",`p'+1,1))) }
     /*Default number of decimals*/
     else { local format =0 }
    }

    *Modify the requested statistics in case of date format
    local d
    if "`date'"~=""&"`rr1d'"~="" { local d d }
    local i=1
    while `i'<=`ns' {
        local r`i' `rr`i'`d''
    local i=`i'+1
    }
    /* Number of decimals per statistic adapted to variable*/
    local i=1
    while `i'<=`ns' {
      while "``i''"=="," {  macro shift }
      if `dec`i''== . { local d`i' = `format'} 
      else            { local  d`i' `dec`i'' }
      if "`date'"~="" {
    if `r`i''~=4&`r`i''~=1  {local date`i' date(`date') }
    else          {
        local date`i' 
    }
      }
      else {
    local date`i'
    if "`type'"=="int"&index("34","`r`i''")==0 {local d`i' 0 }
    /* All percentiles of integer variables are rounded and
       given without decimals */
      }
      local i=`i'+1 
    }
        

    *Statistics for total-column if requested or no by-groups
    su `1',det
    global nobs=_result(1) /* if no observations calculations not required!!*/

    if $nobs>0&("`bytot'"==""|"`by'"=="") {
        local i=1
        while `i'<=`ns' {
            if `r`i''~=34&`r`i''~=4&`r`i''~=56 {
              local x1 =_result(`r`i'')
              fns  a `x1' ,w(`w8') d(`d`i'') `date`i''
            }
            else if `r`i''==4 {
              local x1 =(_result(4))^.5
              fns  a `x1' ,w(`w8') d(`d`i'')  
            }
            else if `r`i''==34 {
              local x1 =(_result(3))
              local x2 =(_result(4))^.5
              fns  a `x1' ,w(`w4') d(`d`i'') `date`i''  
              fns  b `x2' ,w(`w4') d(`d`i'')   
              global a "${a};${b}" 
                }
            else if `r`i''==56 {
              local x1 =(_result(5))
              local x2 =(_result(6))
              fns  a `x1' ,w(`w4') d(`d`i'') `date`i''  
              fns  b `x2' ,w(`w4') d(`d`i'') `date`i''  
              global a "${a}-${b}" 
                }
                global s`i'="$a"
                local i=`i'+1 
        }
    }   /* end Statistics for total-column */
    
    *Statistics within by-groups (if by defined)
    if "`by'"~=""&$nobs>0 {
        local pval 
        if "`test'"=="K-W" {
            kwallis `1',by(`by') 
            local chi2=r(chi2_adj)            /* 20/8/2004 adapted WvP analoog aan tabby */
            if `chi2'==. { local chi2=r(chi2)}
            local pval=chi2tail(r(df),`chi2')
            *   local pval=chi2tail(r(df),r(chi2))  voorheen unadjusted
        }
        else if "`test'"=="trend" {
                nptrend `1',by(`by')
                local pval =r(p)
        }
        else if "`test'"=="Spear" {
                spearman `1' `by'
                local pval = r(p)
        }
            
         local j=1 
         while `j'<=`nby'{
           su `1' if `colnr'==`j',det
           local i=1
           while `i'<=`ns' {
             if _result(1)==0&`r`i''~=1     {  global a  - }
             else if _result(1)==0&`r`i''==1{  global a  0 }
             else if `r`i''~=34&`r`i''~=4&`r`i''~=56 {
              local x1 =_result(`r`i'')
              fns  a `x1' ,w(`w8') d(`d`i'') `date`i''
             }
             else if `r`i''==4 {
              if _result(1)>1 {
                local x1 =(_result(4))^.5
                fns  a `x1' ,w(`w8') d(`d`i'')  
          }
              else { global a "--" }    /* SD only if at least 2 observations */
             }
             else if `r`i''==34 {
              local x1 =(_result(3))
              fns  a `x1' ,w(`w4') d(`d`i'')   `date`i''
              local x2 =(_result(4))^.5
              if _result(1)>1 {
                local x2 =(_result(4))^.5
                fns  b `x2' ,w(`w4') d(`d`i'')   
          }
              else { global b "--" }     /* SD only if at least 2 observations */
              global a "$a;$b"
             }
             else if `r`i''==56 {
              local x1 =(_result(5))
              local x2 =(_result(6))
              fns  a `x1' ,w(`w4') d(`d`i'')   `date`i''
              fns  b `x2' ,w(`w4') d(`d`i'')   `date`i''
              global a "$a-$b"
             }
             global s`i'_`j'="$a"
             local i=`i'+1
           }  /* end while i */
           local j= `j'+1
         } /* end while j */
    } /* end if by:  Statistics within by-groups */
    
    *Adapt max column width of all numeric strings to be displayed.
    local maxl=`width'
    local i=1
    while `i'<=`ns' {
    if "`bytot'"==""|"`by'"=="" {local maxl =max(`maxl',length("${s`i'}")) }
        if "`by'"~="" {
          local j = 1
          while `j'<=`nby' {
            local maxl =max(`maxl',length("${s`i'_`j'}"))
            local j=`j'+1 
          }
        }
        local i= `i'+1 
    }
   }
  } /* end quietly */

  if index("`type'","str")==0 { /* no statistics for string variables */
   if $nobs==0 { display "$varlab : no observations "}
   else {
    if "`pval'"!="" {
      display "$varlab" _col(`Ppos') "(P=" %5.3f `pval' ", `test')"
    }
    else { display "$varlab" }
    
    local i=1
    while `i'<=`ns' {
        if "`date'"~=""&"`rr1d'"~="" { 
            display _col(3) "${l`i'd}" _col(`start')   _continu
    }
    else {  display _col(3) "${l`i'}" _col(`start')   _continu }
        if "`by'"~="" {
          display     "|"      _continu
          local j=1 
          while `j'<=`nby' {
            local skip=`maxl'-length("${s`i'_`j'}")
            display _skip(`skip') "${s`i'_`j'} " _contin
            local j= `j'+1 
      }
      if "`bytot'"=="" {
              display "|" _cont  
              local skip=`maxl'-length("${s`i'}")
              display  _skip(`skip') "${s`i'} " 
      }
      else {display " "}
        }
        else {
          display  _skip(1) "|" _cont  
          local skip=1+`maxl'-length("${s`i'}")
          display  _skip(`skip') "${s`i'} " 
        }
        local i= `i'+1 
    }
   }
   display " "
  }
  macro shift
} /* end while loop over varlist */
end