#delim ;
prog def multproc,rclass byable(onecall) sortpreserve;
version 10.0;
/*
  Multiple test procedures.
  Take, as input, a data set with 1 obs per null hypothesis tested
  and a variable containing the correspoding P-values,
  and a specified multiple test procedure method
  and a specified uncorrected P-value threshold.
  Create, as output, new variables containing P-value ranks,
  corresponding critical P-values, and null hypothesis credibility indicators,
  and also an overall corrected P-value r(pcor),
  such that a null hypothesis with p-value p is incredible
  if and only if p<=r(pcor).
*! Author: Roger Newson
*! Date: 08 October 2012
*/

syntax [if] [in] [,
 RAnk(string) GPUncor(string) CRitical(string) GPCor(string)
 NHcred(string) REJect(string)
 FLOAT FAST * ];
/*
 -rank- is a new variable generated to contain the ranks of the P-values
  (from lowest to highest, with ties sorted in original order).
 -gpuncor- is a new variable generated to contain the uncorrected P-value threshold used.
 -critical- is a new variable generated to contain critical P-value thresholds
  corresponding to the P-values in -pvalue-
  (for use in a one-step, step-up or step-down procedure).
 -gpcor- is a new variable generated to contain, for all observations in each by-group,
  the overall corrected P-value threshold calculated for that by-group.
 -nhcred- is a new variable generated to contain an indicator
  that the null hypothesis for that observation is credible,
  given the choice of uncorrected P-threshold and method.
 -reject- is a new variable generated to contain an indicator
  that the null hypothesis for that observation is rejected,
  given the choice of uncorrected P-threshold and method.
 -float- specifies that the critical P-values
  in the variables -gpuncor-, -critical- and -gpcor-
  will be saved as -float- variables (instead of -double- variables).
 -fast- is an option for programmers,
  specifying that -multproc- will take no action to restore the pre-existing data set
  if the user presses -break-.
 All other options are passed to -_multproc- to process each by-group.
*/

if "`fast'"=="" {;preserve;};

*
 Define temporary output variables if necessary,
 otherwise confirm validity of user-defined output variable names,
 and initialise output variables to missing
*;
foreach X of any rank gpuncor critical gpcor nhcred reject {;
  if "``X''"=="" {;tempvar `X';};
  else {;confirm new variable ``X'';};
};
qui {;
  gene long `rank'=.;
  gene double `gpuncor'=.;
  gene double `critical'=.;
  gene double `gpcor'=.;
  gene byte `nhcred'=.;
  gene byte `reject'=.;
};

*
 Call _multproc to do multiple test procedure for each by-group
*;
if !_by() {;local bypref="";};
else {;
  local bypref "by `_byvars' `_byrc0':";
};
`bypref' _multproc `if' `in' ,
  rank(`rank') gpuncor(`gpuncor') critical(`critical') gpcor(`gpcor')
  nhcred(`nhcred') reject(`reject')
  `options';
return add;

*
 Complete calculation of output variables if processing last by-group
*;
* Convert -double- P-values to -float- if specified *;
if ("`float'"!="") {;
  foreach X of var `gpuncor' `critical' `gpcor' {;
    qui recast float `X',force;
  };
};
* Compress to save space if possible *;
qui compress `rank' `gpuncor' `critical' `gpcor' `nhcred' `reject';

if "`fast'"=="" {;restore,not;};

end;

prog def _multproc,rclass byable(recall);
/*
  Do multiple test procedure for each by-group,
  assuming that the output variables are already -generate-d
  and now only need to be replaced in each by-group.
*/

syntax [if] [in] ,
  RAnk(varname) GPUncor(varname) CRitical(varname) GPCor(varname)
  NHcred(varname) REJect(varname)
  [ PValue(varname numeric) PUncor(string) PCor(string)
  MEthod(string)
  ];
/*
 -rank- is a variable generated to contain the ranks of the P-values
  (from lowest to highest, with ties sorted in original order).
 -gpuncor- is a variable generated to contain the uncorrected P-value threshold used.
 -critical- is a variable generated to contain critical P-value thresholds
  corresponding to the P-values in -pvalue-
  (for use in a one-step, step-up or step-down procedure).
 -gpcor- is a variable generated to contain, for all observations in each by-group,
  the overall corrected P-value threshold calculated for that by-group.
 -nhcred- is a variable generated to contain an indicator
  that the null hypothesis for that observation is credible,
  given the choice of uncorrected P-threshold and method.
 -reject- is a variable generated to contain an indicator
  that the null hypothesis for that observation is rejected,
  given the choice of uncorrected P-threshold and method.
 -pvalue- is the variable containing the P-values.
 -puncor- is the uncorrected P-value threshold, possibly specified in a variable,
  set to $S_level if absent or out of range [0,1].
 -pcor- is the corrected P-value threshold, possibly specified in a variable,
  set according to -method- option if absent or out of range [0,1]
 -method- specifies the method used to calculate corrected P-values
  and is overridden and set to userspecified if -pcor- is in range [0,1]
*/

*
 Default variable name for P-value
 (assumed to be from a -parmest- or -parmby- output data set)
*;
if "`pvalue'"=="" {;
  confirm numeric variable p;
  local pvalue "p";
};

*
 Select marked sample of P-values
 and count the P-values in -npvalue-
*;
local varlist `pvalue';
marksample touse;
qui count if `touse';
local npvalue=r(N);
if `npvalue'==0 {;error 2000;};

* Screen out invalid P-values in -pvalue- *;
cap assert (`pvalue'>=0)&(`pvalue'<=1) if `touse';
if _rc!=0 {;
  disp as error "Invalid P-values outside range 0<=P<=1 in P-value variable: `pvalue'";
  error 498;
};

*
 Calculate -puncor- and -pcor-
 (resetting if out of range [0,1])
 and store in scalars,
 overriding -method- if -pcor- is provided and in range [0,1]
*;
tempname puscal pcscal;
* Uncorrected P-value *;
cap confirm numeric variable `puncor';
local isvar=_rc==0;
cap confirm number `puncor';
local isnum=_rc==0;
cap confirm name `puncor';
local isname=_rc==0;
if "`puncor'"=="" {;
  scal `puscal'=.;
};
else if `isvar' {;
 qui summ `puncor' if `touse';
 scal `puscal'=r(min);
 if r(min)!=r(max) {;
   disp as error "puncor(`puncor') has multiple values";
   error 498;
 };
};
else if `isnum'|`isname' {;
  scal `puscal'=`puncor';
};
if (`puscal'<0)|(`puscal'>1) {;
  if !missing(`puscal') {;
    disp as error "Invalid puncor() value: " `puscal';
    error 498;
  };
  scal `puscal'=1-$S_level/100;
};
* Corrected P-value *;
cap confirm numeric variable `pcor';
local isvar=_rc==0;
cap confirm number `pcor';
local isnum=_rc==0;
cap confirm name `pcor';
local isname=_rc==0;
if "`pcor'"=="" {;
  scal `pcscal'=.;
};
else if `isvar' {;
 qui summ `pcor' if `touse';
 scal `pcscal'=r(min);
 if r(min)!=r(max) {;
   disp as error "pcor(`pcor') has multiple values";
   error 498;
 };
 local method "userspecified";
};
else if `isnum'|`isname' {;
  scal `pcscal'=`pcor';
 local method "userspecified";
};
if (`pcscal'<0)|(`pcscal'>1) {;
  if !missing(`pcscal') {;
    disp as error "Invalid pcor() value: " `pcscal';
    error 498;
  };
  scal `pcscal'=.;
  if lower("`method'")=="userspecified" {;
    local method="";
    disp "Note: pcor() not specified. Default method assumed.";
  };
};
local puncor=`puscal';local pcor=`pcscal';

*
 Correct case of -method- (substituting default if necessary)
*;
if "`method'"=="" {;local method "bonferroni";};
local method=lower("`method'");

*
 Carry out multiple testing procedure
*;

*
 Sort by ascending P-value (preserving pre-existing sort order in -seqnum-)
 and define P-value ranks, with ties in pre-existing order
*;
tempvar seqnum;
qui {;
  gene long `seqnum'=_n;
  compress `seqnum';
  sort `touse' `pvalue' `seqnum';
  by `touse':replace `rank'=_n if `touse';
};

*
 Generate critical values, null hypothesis credibility,
 and overall corrected P-value threshold
 by the multiple test procedure method specified in -method()-
*;
if inlist("`method'","userspecified","bonferroni","sidak") {;
  *
   One-step procedure is specified
  *;
  * Procedure-specific section for one-step procedures *;
  if "`method'"=="userspecified" {;
    qui replace `critical'=`pcscal' if `touse';
  };
  else if "`method'"=="bonferroni" {;
    qui replace `critical'=`puscal'/`npvalue' if `touse';
  };
  else if "`method'"=="sidak" {;
    tempvar inflevel;
    qui gene double `inflevel'=(1-`puscal')^(1/`npvalue') if `touse';
    qui replace `critical'=1-`inflevel' if `touse' & `inflevel'<1;
    qui replace `critical'=`puscal'/`npvalue' if `touse' & `inflevel'>=1;
    drop `inflevel';
  };
  * Common section for all one-step procedures *;
  qui {;
    replace `nhcred'=`pvalue'>`critical' if `touse';
    * Calculate corrected P-value threshold *;
    summ `critical' if `touse';
    scal `pcscal'=r(min);
  };
};
else if inlist("`method'","holm","holland","liu1","liu2") {;
  *
   Step-down procedure is specified
  *;
  * Procedure-specific section for step-down procedures *;
  if "`method'"=="holm" {;
    qui replace `critical'=`puscal'/(`npvalue'-`rank'+1) if `touse';
  };
  else if "`method'"=="holland" {;
    tempvar inflevel;
    qui gene double `inflevel'=(1-`puscal')^(1/(`npvalue'-`rank'+1)) if `touse';
    qui replace `critical'=1-`inflevel' if `touse' & `inflevel'<1;
    qui replace `critical'=`puscal'/(`npvalue'-`rank'+1) if `touse' & `inflevel'>=1;
    drop `inflevel';
  };
  else if "`method'"=="liu1" {;
    qui {;
      tempvar revrank;
      gene long `revrank'=`npvalue'-`rank'+1 if `touse';
      compress `revrank';
      replace `critical' = 1 - ( 1 - min( 1 , (`npvalue'/`revrank')*`puscal') )^(1/`revrank')
       if `touse';
      drop `revrank';
    };
  };
  else if "`method'"=="liu2" {;
    qui {;
      tempvar revrank;
      gene long `revrank'=`npvalue'-`rank'+1 if `touse';
      compress `revrank';
      replace `critical' = min( 1 , (`npvalue'/(`revrank'*`revrank'))*`puscal' )
       if `touse';
      drop `revrank';
    };
  };
  * Common section for all step-down procedures *;
  qui {;
    replace `nhcred'=sum(`pvalue'<=`critical')<`rank' if `touse';
    * Calculate corrected P-value threshold *;
    count if `touse'&`nhcred';
    if r(N)==0 {;
      summ `critical' if `touse';
      scal `pcscal'=r(max);
    };
    else {;
      summ `critical' if `touse'&`nhcred';
      scal `pcscal'=r(min);
    };
  };
};
else if inlist("`method'","hochberg","rom")|inlist("`method'","simes","yekutieli","krieger") {;
  *
   Step-up procedure is specified
  *;
  * Procedure-specific section for step-up procedures *;
  if "`method'"=="hochberg" {;
    qui replace `critical'=`puscal'/(`npvalue'-`rank'+1) if `touse';
  };
  else if "`method'"=="rom" {;
    qui{;
      tempvar tmptuse tmprank tmpterm;
      *
       -tmptuse- is temporary to-use indicator
       -tmprank- is temporary rank
       -tmpterm- is temporary variable containing terms to be added
        when calculating c_{1:n} from c_{2:n} to c_{n:n}
      *;
      gene byte `tmptuse'=.;
      gene long `tmprank'=.;
      gene double `tmpterm'=.;
      replace `critical'=`puscal' if `touse'&(`rank'==`npvalue');
      local npvm1=`npvalue'-1;
      forv rankcur=`npvm1'(-1)1 {;
        local tmpnpv=`npvalue'-`rankcur'+1;
        replace `tmptuse'=`touse'&(`rank'>=`rankcur');
        replace `tmprank'=`rank'-`rankcur'+1 if `tmptuse';
        replace `tmpterm'=0 if `tmptuse';
        replace `tmpterm'=`tmpterm'+`puscal'^(`tmprank'-1) if `tmptuse'&(`tmprank'>=2)&(`tmprank'<=`tmpnpv');
        replace `tmpterm'=`tmpterm'
          - exp(lnfactorial(`tmpnpv')-lnfactorial(`tmprank')-lnfactorial(`tmpnpv'-`tmprank'))*`critical'^`tmprank'
          if `tmptuse'&(`tmprank'>=2)&(`tmprank'<=`tmpnpv'-1);
        summ `tmpterm' if `tmptuse';
        replace `critical'=r(mean) if `tmptuse'&(`tmprank'==1);
      };
    };
  };
  else if "`method'"=="simes" {;
    qui replace `critical'=`puscal'*`rank'/`npvalue' if `touse';
  };
  else if "`method'"=="yekutieli" {;
    qui {;
      tempvar invrank;tempname suminvr;
      gene double `invrank'=1/`rank' if `touse';
      summ `invrank' if `touse';
      scal `suminvr'=r(sum);
      replace `critical'=`puscal'*`rank'/`npvalue' if `touse';
      replace `critical'=`critical'/`suminvr' if `touse';
      drop `invrank';
      scal drop `suminvr';
    };
  };
  else if "`method'"=="krieger" {;
    qui {;
      tempname pprime;
      scal `pprime'=`puscal'/(1+`puscal');
      replace `critical'=`pprime'*`rank'/`npvalue' if `touse';
      gsort +`touse' -`pvalue' -`seqnum';
      replace `nhcred'=sum(`pvalue'<=`critical')==0 if `touse';
      sort `touse' `pvalue' `seqnum';
      count if `touse'&`nhcred';
      local mzero=r(N);
      if (`mzero'>0)&(`mzero'<`npvalue') {;
        replace `critical'=`pprime'*`rank'/`mzero' if `touse';
      };
    };
  };
  * Common section for all step-up procedures *;
  qui {;
    gsort +`touse' -`pvalue' -`seqnum';
    replace `nhcred'=sum(`pvalue'<=`critical')==0 if `touse';
    sort `touse' `pvalue' `seqnum';
    * Calculate corrected P-value threshold *;
    count if `touse'&(!`nhcred');
    if r(N)==0 {;
      summ `critical' if `touse';
      scal `pcscal'=r(min);
    };
    else {;
      summ `critical' if `touse'&(!`nhcred');
      scal `pcscal'=r(max);
    };
  };
};
else {;
  *
   Unrecognised procedure is specified
  *;
  disp as error `"Unrecognised method(`method')"';
  error 498;
};

*
 Assign uncorrected and corrected overall critical P-values
 to variables -gpuncor- and -gpcor- respectively
 and assign rejection status to variable -reject-
*;
qui{;
  replace `gpuncor'=`puscal' if `touse';
  replace `gpcor'=`pcscal' if `touse';
  replace `reject'=!`nhcred' if `touse';
};

* Label output variables (specifying method) *;
lab var `rank' "P-value rank";
lab var `gpuncor' "Uncorrected overall critical P by method(`method')";
lab var `critical' "Critical P by method(`method')";
lab var `gpcor' "Corrected overall critical P by method(`method')";
lab var `nhcred' "H0 credible by method(`method')";
lab var `reject' "H0 rejected by method(`method')";

* Sort back to original order *;
sort `seqnum';

* Test that -pcscal- has the required properties *;
cap assert `nhcred'==(`pvalue'>`pcscal') if `touse';
if _rc!=0 {;
  disp as text "Corrected overall critical P-value (" `pcscal'
   ") does not separate credible and incredible null hypotheses";
};

* Count rejected P-values *;
qui count if `touse'&`reject';
tempname nreject;
scal `nreject'=r(N);

* Output uncorrected and corrected overall critical P-value thresholds *;
disp _n as text "Method: " as result "`method'"
 _n as text "Uncorrected overall critical P-value: " as result `puscal'
 _n as text "Number of P-values: " as result `npvalue'
 _n as text "Corrected overall critical P-value: " as result `pcscal'
 _n as text "Number of rejected P-values: " as result `nreject';

* Save results to be returned *;
return scalar puncor=`puscal';
return scalar npvalue=`npvalue';
return scalar pcor=`pcscal';
return scalar nreject=`nreject';
return local method "`method'";

end;