* ===========================================================================
* Detect separation through iterative least squares with equalities (LSE) applied on ReLU outcome
* ===========================================================================

mata:

`Integer' relu_fix_separation(`FixedEffects'	HDFE,
							  `Variables'		y,
							  `Variables'		x,
							  `Integer'			k,
							  `RowVector'		stdev_x,
							  `Variable'		true_w,
							  `String'			weight_type,
							  `String'			weight_var,
							  `Real'			target_inner_tol,
							  `Real'			tol,
							  `Integer'			maxiter,
							  `Varname'			sepname,
							  `Varname'			zname,
							  `Boolean'			debug,
							  `Boolean'			report_r2,
							  `Vector'			non_separated_obs,
							  `Boolean'			strict,
							  `Boolean'			accelerate,
							  `Boolean'			verbose)
{
	// Note: currently ignoring weights (weight_type, weight_var) as the algorithm does not care about them

	 // , num_drop, num_collinear
	`Integer'				n, num_boundary, iter, num_sep, v, col, backup_verbose, total_iter, ic
	`Real'					delta, uu, backup_tol, regtol, epsilon
	`Real'					ee, ee_cumulative, ee_boundary
	`Variable'				is_boundary, u, w, resid, xbd
	`Vector'				boundary_sample, interior_sample, idx, separated_obs, b
	`String'				end_of_table, table_row

	`Boolean'				convergence_is_stuck
	`Integer'				num_candidates, num_candidates1, num_candidates2
	`Real'					progress_ratio, progress_ratio1, progress_ratio2
	`Variable'				xbd_prev1, xbd_prev2
	`Vector'				accelerated_sample
	`Real'					acceleration_value
	`Real'					threshold

	`Vector'				utilde, u_last
	`Matrix'				xtilde

	// 1. Validate and initialize parameters
	v = sepname != "" ? max((verbose, 1)) : verbose // tagsep.ado mode will always have verbose>0
	assert(non_separated_obs==.) // We'll return info here
	regtol = tol ^ 2
	assert_msg(0 < tol & tol < 1, "tol outside limits")
	if (regtol < 1e-13) regtol = 1e-13 // We can't get too close to machine epsilon; 1e-13 or 1e-12 is about the max reghdfe can do

	// 2. Initialize variables
	n = rows(y)
	u = is_boundary = !y
	boundary_sample = selectindex(is_boundary)
	interior_sample = selectindex(!is_boundary)
	num_boundary = rows(boundary_sample)
	separated_obs = J(0, 1, .)
	accelerated_sample = J(0, 1, .)
	acceleration_value = 1 // Weight given to accelerated obs (obs where y==0 and xbd<0 for a few iterations)

	convergence_is_stuck = 0
	xbd = xbd_prev1 = xbd_prev2 = .

	if (v > 0) printf("{txt}\n $$ Starting -relu- separation test on {res}%g{txt} suspect obs.\n", num_boundary)
	if (v > 0) printf("{txt} $$ Parameters: regtol ={res}%2.0e{txt}  tol ={res}%2.0e{txt}  maxiter ={res}%3.0fc{txt}\n", regtol, tol, maxiter)
	
	if (!num_boundary) {
		if (v > 0) printf("{txt} $$ No boundary observations; exiting\n")
		relu_post_results(HDFE, sepname, zname, debug, report_r2, separated_obs, J(n, 1, 0), v)
		return(0)
	}
	
	backup_verbose = HDFE.verbose
	backup_tol = HDFE.tolerance
	HDFE.verbose = verbose - 1
	HDFE.tolerance = regtol

	uu = quadcross(u, u)
	num_sep = 0
	total_iter = 0 // Total iteration count (within HDFE)

	u_last = .

	if (v > 0) printf(" {txt}{c TLC}{hline 5}{c TT}{hline 11}{c TT}{hline 11}{c TT}{hline 7}{c TT}{hline 11}{c TT}{hline 15}{c TT}{hline 14}{c TRC}\n")
	if (v > 0) printf(" {txt}{c |}  i  {c |}    u'u    {c |}    e'e    {c |}  LB %% {c |}   min(e)  {c |}    Epsilon    {c |} # Candidates {c |}\n")
	if (v > 0) printf(" {txt}{c LT}{hline 5}{c +}{hline 11}{c +}{hline 11}{c +}{hline 7}{c +}{hline 11}{c +}{hline 15}{c +}{hline 14}{c RT}\n")
	end_of_table = " {txt}{c BLC}{hline 5}{c BT}{hline 11}{c BT}{hline 11}{c BT}{hline 7}{c BT}{hline 11}{c BT}{hline 15}{c BT}{hline 14}{c BRC}\n"

	for (iter=1; iter<=maxiter; iter++) {

		swap(xbd_prev2, xbd_prev1) 	// xbd_prev2 = xbd_prev1
		swap(xbd_prev1, xbd)		// xbd_prev1 = xbd

		if (iter == 1) {
			utilde = u
			xtilde = x
		}
		else {
			utilde = u + utilde - u_last
			// xtilde = xtilde
		}
		u_last = u

		// Weights
		b = solve_lse(HDFE, u, x, utilde, xtilde, boundary_sample, interior_sample,
		              accelerated_sample, acceleration_value,
		              resid=., epsilon=., ic=., verbose)
		total_iter = total_iter + ic
		delta = epsilon + tol
		xbd = u - resid

		uu = quadcross(u, u)
		ee = quadcross(resid, resid)

		if (v > 0 & iter == 1) {
			ee_cumulative = 0
			ee_boundary = uu
		}
		else {
			ee_cumulative = ee_cumulative + ee
		}


		// Output iteration output
		num_candidates = sum(xbd[boundary_sample]:>delta)
		ee_cumulative = ee_cumulative + ee
		progress_ratio = 100 * ee_cumulative / ee_boundary

		// Update criteria for stuck convergence
		// if (!convergence_is_stuck) convergence_is_stuck = accelerate & (iter> 3) & (progress_ratio < progress_ratio1 + 0.5) & (progress_ratio1 < progress_ratio2 + 0.5) & (num_candidates == num_candidates2)
		if (!convergence_is_stuck) convergence_is_stuck = accelerate & (iter> 3) & (progress_ratio - progress_ratio2 < 1.0) & (num_candidates == num_candidates2)
		accelerated_sample = convergence_is_stuck ? selectindex(!y :& (xbd_prev2 :< 1.01 * xbd_prev1) :& (xbd_prev1 :< 1.01 * xbd) :& (xbd :< (-0.1 * delta)  )) : J(0, 1, .)
		acceleration_value = convergence_is_stuck & rows(accelerated_sample) ? min((256, 4 * acceleration_value)) : 1
		// Question: once we turn acceleration on for a given obs, should we leave it on forever?

		if (v > 0) {
			col = 0
			table_row = sprintf("{txt} {c |} %3.0f {c |}", iter)
			//table_row = table_row + sprintf(" {col %2.0f}{c |}", subiter ? sprintf("%3.0f", subiter) : "", col = col + 14)
			table_row = table_row + sprintf("%10.6f {col %2.0f}{c |}", uu, col = col + 12)
			table_row = table_row + sprintf("%10.6f {col %2.0f}{c |}", ee, col = col + 12)
			table_row = table_row + sprintf(" %5.1f {col %2.0f}{c |}", progress_ratio, col = col + 8)
			table_row = table_row + sprintf("%10.6f {col %2.0f}{c |}", min(resid), col = col + 12)
			table_row = table_row + sprintf("%14.8f {col %2.0f}{c |}", delta, col = col + 16)
			table_row = table_row + sprintf("%12.0fc {col %2.0f}{c |}", num_candidates, col = col + 23)
			if (convergence_is_stuck & rows(accelerated_sample)) table_row = table_row + sprintf(" Accelerating %f obs (w=%f)", rows(accelerated_sample), acceleration_value)
			printf(table_row + "\n")
			//mm_matlist(xbd[1..10]', "%6.4f", 0, "", "")
			//accelerated_sample'
		}

		//mm_matlist((y, J(rows(y), 1, .), u, xbd, resid, is_ignored))
		// Update stuck-related variables
		progress_ratio2 = progress_ratio1
		progress_ratio1 = progress_ratio
		num_candidates2 = num_candidates1
		num_candidates1 = num_candidates
		progress_ratio = num_candidates = .

		// Update xbd -> 0 when y>0
		update_mask(xbd, interior_sample, 0)

		// Update xbd -> 0 when y=0 and xbd is within tolerance of zero
		// (the "0.1" is because I need to be more strict below zero, to avoid false positives)
		idx = select(boundary_sample, inrange(xbd[boundary_sample], -0.1 * delta, delta)) // Do I need to change the 0.1 to e.g. 0.5?
		//idx = select(boundary_sample, abs(xbd[boundary_sample]) :<= delta) // Do I need to change the 0.1 to e.g. 0.5?
		if (length(idx)) update_mask(xbd, idx, 0)

		// Declare separation if xbd>=0 when y==0 (of course, at this point xbd=0 when y>0)
		if (all(xbd[boundary_sample] :>= 0)) {
			num_sep = sum(xbd[boundary_sample] :> 0)
			if (v > 0) printf(end_of_table)
			if (v > 0) printf("{txt} $$ Stopping (no negative predicted values); separation found in {res}%f{txt} observations (%g iterations and %g subiterations)\n", num_sep, iter, total_iter)
			break
		}
		
		// Declare separation if there are no negative residuals
		// - This is due to the normal equations: X'e=0
		// - Since z=xγ>0, then z'e=0. When e_i>0 this is only possible if z_i=0 i.e. there is no more separated obs. left to uncover
		//   (there might be separated obs. when e_i=0; see the proof for more details on why this works)
		// - This case is particularly useful in accelerating convergence when there is *no separation*, see "example_negative_residuals.do"
		
		_edittozerotol(resid, 1e-8)
		threshold = 0 // delta might be better, due to numerical errors
		if (min(resid[boundary_sample]) >= threshold) {
			idx = select(boundary_sample, resid[boundary_sample] :> delta)
			if (length(idx)) update_mask(xbd, idx, 0)
			num_sep = sum(xbd[boundary_sample] :> 0)
			if (v > 0) printf(end_of_table)
			printf("{txt} $$ Stopping (no negative residuals); separation found in {res}%f{txt} observations (%g iterations and %g subiterations)\n", num_sep, iter, total_iter)
			break
		}

		if (iter == maxiter) {
			if (v > 0) printf(end_of_table)
			if (strict) {
				assert_msg(0, "separation via ReLU algorithm: maximum number of iterations reached; aborting", 9010, 0)
			}
			else {
				printf("{err}(ReLU separation check: maximum number of iterations reached; aborting)\n")
				return(0)
			}
		}

		// Apply ReLU function
		u[boundary_sample] = rowmax((xbd[boundary_sample], J(num_boundary, 1, 0) ))

	} // End for-loop

	HDFE.verbose = backup_verbose
	HDFE.tolerance = backup_tol

	if (!num_sep) {
		if (v > 0) printf("{txt} $$ - No separation found; stopping (%g iterations and %g subiterations)\n", iter, total_iter)
		relu_post_results(HDFE, sepname, zname, debug, report_r2, separated_obs, J(n, 1, 0), v)
		return(num_sep)
	}

	if (verbose > -1) printf(`"{txt}(ReLU method dropped %g {browse "http://scorreia.com/research/separation.pdf":separated observation%s} in %g iterations)\n"', num_sep, num_sep > 1 ? "s" : "", iter)
	separated_obs = select(boundary_sample, xbd[boundary_sample] :> 0)
	relu_post_results(HDFE, sepname, zname, debug, report_r2, separated_obs, xbd, v) // Do this BEFORE trimming the data!
	non_separated_obs = trim_separated_obs(HDFE, y, x, weight_type, weight_var, true_w, separated_obs, verbose)
	remove_collinears(HDFE, target_inner_tol, x, k, stdev_x, weight_type, weight_var, true_w, verbose)
	return(num_sep) // Also returns ok_obs_mask and ok_var_mask
}


`Void' relu_post_results(`FixedEffects'	HDFE,
						 `Varname'		sepname,
						 `Varname'		zname,
						 `Boolean'		debug,
						 `Boolean'		report_r2,
						 `Vector'		separated_obs,
						 `Variable'		z,
						 `Boolean'		verbose)
{
	`Variable'				is_separated
	`Boolean'				var_exists
	`Integer'				idx
	`String'				cmd

	if (sepname == "") return
	is_separated = create_mask(HDFE.N, 0, separated_obs, 1)
	var_exists = !missing(_st_varindex(sepname))
	if (var_exists) st_dropvar(sepname) // Drop the variable if already exists; dangerous but this is a secret feature anyways
	idx = st_addvar("byte", sepname)
	st_store(HDFE.sample, idx, is_separated)
	st_varlabel(idx, "[ppmlhdfe: ReLU] Obs. is separated")
	if (verbose > 0) printf("{txt} $$ - Saved separated observations in {res}%s\n", sepname)

	if (zname == "") return
	var_exists = !missing(_st_varindex(zname))
	if (var_exists) st_dropvar(zname)
	idx = st_addvar("double", zname)
	prettify_z(z)
	st_store(HDFE.sample, idx, z)
	st_varlabel(idx, "[ppmlhdfe: ReLU] Certificate of separation")
	if (verbose > 0) printf("{txt} $$ - Saved certificate of separation in {res}%s\n", zname)

	if (!report_r2 | !rows(separated_obs)) return
	cmd = HDFE.absvars == "" ? "noabsorb" : sprintf("absorb(%s)", invtokens(HDFE.absvars))
	cmd = sprintf("reghdfe %s %s, %s", zname, invtokens(HDFE.indepvars), cmd)
	printf("\n{inp}{hline 32} o {hline 32}\n")
	printf("{bf:Verifying certificate of separation:}\n")
	printf("{inp}{bf:. %s}\n", cmd)
	stata(cmd)
	printf("{inp}{hline 32} o {hline 32}\n\n")
}


`Void' prettify_z(`Vector' z)
{
	`Factor'				F
	`Vector'				zz
	`Integer'				i
	`Real'					rescale_factor

	// Find out the -mode- of z[z>0] and rescale by that
	zz = round(1e6 * select(z, z :> 0))
	if (!rows(zz)) return
	
	F = _factor(zz, 1, 0, "", 0, 1)
	maxindex(F.counts, 1, i=., .)
	rescale_factor = (1e6 / zz[i[1]])
	if (0.01 <= rescale_factor & rescale_factor <= 100) {
		z = z * rescale_factor
	}
}

end


/* Algorithm:

1) Define a working depvar u = (y == 0)
2) Solve LSE problem through method of weighting
	(i.e. run a weighted regression on u, with very high weights when y>0)
3) Update -u- with ReLU function
	(i.e. "predict uhat", and then "replace u = max(uhat, 0)")
4) Stop if there is no solution to LSE or if uhat>=0

References:

Method of weighting and least-squares with equality constraints:
- https://link.springer.com/article/10.1007/BF02510363
- https://www.jstor.org/stable/2157402

 */