*! Date        : 6 Dec 2006
*! Version     : 1.04
*! Authors     : Adrian Mander
*! Email       : adrian.mander@mrc-hnr.cam.ac.uk
*!
*! Description : clogit allowing weights to vary within group()
*! The weights are within strata weights.. NOT frequency weights!!!

/*
6Dec06 v1.04 - Solves one bug about dropping sets of non-varying exposure or outcome

NOTE use the union pre-load dataset to see problem with multiple events per paired set.. this is
still a problem

*/

/* Need to implement replay() */

program define wclogit, eclass
version 7.0
syntax [varlist] [if] [in] [aweight/] , Group(varname) [Level(int $S_level) OR ]
preserve

marksample touse

/* Check for explanatory variables that do not change within risksets */

tokenize `varlist'
local depvar "`1'"

local newvlist ""
_rmcoll `varlist'
local list "`r(varlist)'"
foreach var of local list {
   qui sort `group' `var'
   qui by `group': gen grpn = _N if `touse'
   qui by `group' `var': gen subgrpn = _N if `touse'
   qui count if subgrpn==grpn & `touse'
   if `r(N)'==_N {
     di as txt "note: `var' omitted due to no within-group variance."
   }
   if `r(N)'<_N {
     local newvlist "`newvlist' `var'"
   }
   drop grpn subgrpn
}

/* Disease must be binary!!! */

local i 1
foreach var of local newvlist {
  if `i'~=1 {
    local sublist "`sublist' `var'"
  }
  if `i'==1 {
    local case "`var'"
    local i=`i'+1
  }
  qui drop if `var'==.
}

tempname wght Xb sum theta grp ade

qui clogit `newvlist', group(`group') `or' nolog
mat beta = get(_b) 

/* The algorithm breaks if there is some perfect matched sets.. and the
weights shouldn't affect this... 
*/
qui gen `ade'=e(sample)
qui su `ade'
if r(min)==0 {
  di as error "Warning : Dropping lines of data due to all positive or all negative outcomes"
  drop if `ade'==0
  drop `ade'
}


cap estimates clear

if "`exp'"=="" {
	gen `wght' =1
}
else gen `wght'=`exp'

sort `group'

local names: colnames(beta)

local oldlik  0
local likhood 1
local i 1
di
while (abs(`likhood'-`oldlik')>0.000001) {

  /*
    1) Create the X by beta variable beta should contain all the variables in model 
    2) Then sum all WiQi per risk set for `sum'
    3) Then calc the likelihood of risk set w

matrix score creates Xb  given the current beta  and Xb is just a tempname
Theta therefore consists of the exponential of the previous data.. this is where a large beta messes up!!!!

  */

  matrix score double `Xb' = beta
  qui gen double `theta' = exp(`Xb')
  qui by `group': gen double `sum' = sum(`theta'*`wght')
  qui by `group': gen double w = `theta'*`wght'/`sum'[_N]

/*
A debug
su `Xb' w `sum' `theta'
mat list beta
*/

/* MY test on what is happening with score function... this is probably
only going to work when weights sum to 1 within group... i.e. probability weights.

  qui by `group': gen double l=sum(`theta'*`wght')
  qui by `group': gen double k=sum(`theta'*`wght'*e)
  qui by `group': gen double score = cond(d==1,e,.)-k/l
  qui by `group': gen kl = k/l if _n==_N
*/

/* I think this is another way of calculating the matrices 
NO need it to get `w'
*/

  parse "`sublist'", parse(" ")
  local sw ""
  local w  ""
  local no 1
  while "`1'"~="" {
/* square rooting w here because of mat accum later */
 /* OLDWAY   qui gen double w`no' = `1'*sqrt(w) */
/*Not sure this correct */
    qui gen double w`no' = `1'*sqrt(w) 

    qui by `group': gen double s`no' = sum(`1'*w)
    local sw = "`sw'"+" s`no'"
    local w = "`w'"+" w`no'"
    mac shift
    local no = `no'+1
  }

/*
   A contains the x-value for the case
   AA contains  W*x summed
   B = also W*x summed
   C = W^2*x^2
*/
  qui by `group': gen `grp'= _n==_N
  qui matrix accum A = `case' `sublist' , nocons
  qui matrix accum AA = w `sublist', nocons
  qui matrix accum B = `w', nocons
  qui matrix accum C = `sw' if `grp'==1, nocons

/*
di "theta `theta' weight `wght' sum `sum' Xb `Xb' "
blist
*/


/*
  Solution is when the score U = 0 
  change in gradient is U/V.... add this to previous estimate

U2 wrong

*/

  mat U1 = A[1,2...]
  mat U2 = AA[1,2...]
  matrix U = U1-U2
  matrix V = B-C
  mat Vinv = syminv(V)
  mat grad = U*Vinv
  mat nbeta = beta + grad
  mat beta = nbeta

  matrix colnames beta = `names'
  matrix colnames Vinv = `names'
  matrix rownames Vinv = `names'

  drop `Xb'
  drop `theta'
  drop `sum'
  drop w
  drop `sw'
  drop `w'
  drop `grp'

/* Calculate loglikelihood */

  qui matrix score `Xb' = beta
  qui gen double `theta' = exp(`Xb')
  qui by `group':gen temp=sum(`theta')
  qui by `group': gen double temp1 = cond(`case'==1, ln(`theta'[_n]/temp[_N]),0)
  qui replace temp1= sum(temp1)
  local oldlik = `likhood'
  local likhood = temp1[_N]
  di as txt "Iteration `i':   log likelihood =", as res round(`likhood',0.00000001)
  drop temp*
  drop `theta'
  drop `Xb'
  local i= `i'+1
}

/* The _N is your obs because weights are within strata weights and this should be individual data*/
local obs = _N


estimates post beta Vinv, depname("`case'") obs(`obs')
qui testparm `newvlist'
di
local len=79-length("`obs'")
di as txt "Strata-varying weighted clogit ", _col(51) "Number of obs", _col(67) "=", _col(`len') as res `obs'
di as txt _col(51) "Wald chi2(",as res `r(df)',as txt ")", _col(67) "=", _col(73) as res %6.3f `r(chi2)'
di as txt _col(51) "Prob > chi2", _col(67) "=", _col(74) as res %4.3f `r(p)'
di as txt "Log likelihood =", as res round(`likhood',0.00000001)
di
if "`or'"~="" { estimates display, eform(Odds Ratio) }
else { estimates display }

est scalar ll= `likhood'
est scalar df_m=colsof(nbeta)
/*est scalar r2_p = `r(p)'
*/
est local cmd = "wclogit"
est local group= "`group'"
est local predict "wclogit_p"
est local depvar "`depvar'"
est local chi2type "Wald"


restore
end