capture mata mata drop data_views()
capture mata mata drop get_data_views()
mata
struct data_views {
    real matrix panelids, timeids, touse, y_data, X_data
    real scalar nrows
}

struct data_views scalar get_data_views(real matrix A) {
    struct data_views scalar long_data

    // Matrix A should contain the following in each column:
    // 1. panelid
    // 2. timeid
    // 3. touse (1 if the observation is valid for on any date, 0 otherwise)
    // 4. y (the independent variable, can be missing)
    // 5-end.  X variables  (optional)

    st_subview(long_data.panelids, A, .,1)
    st_subview(long_data.timeids, A, .,2)
    st_subview(long_data.touse, A, .,3)
    st_subview(long_data.y_data, A, .,4)
    st_subview(long_data.X_data, A, .,5\.)
    long_data.nrows = rows(A)
    return(long_data)
}
end


capture mata mata drop data_indexes()
capture mata mata drop get_data_indexes()
mata:
    struct data_indexes {
    real scalar index_date, gls_flag
    real matrix data_row_index, valid_touse, valid_y, rect_y
    }

    struct data_indexes scalar get_data_indexes(struct data_views scalar long_data, 
        string scalar gls) {
        struct data_indexes scalar full
        real scalar min_time, max_time, n_time, n_panels
        real colvector a, sequential_panelids
        real matrix data_row_index, valid_touse, valid_y, rect_y

        if (gls == "gls") {
            full.gls_flag = 1
        }
        else {
            full.gls_flag = 0
        }        

        // Step 1: Basic setup
        min_time = min(long_data.timeids)
        max_time = max(long_data.timeids)
        n_time = max_time - min_time + 1

        a = 1\ (long_data.panelids[2::long_data.nrows] :!= long_data.panelids[1::long_data.nrows-1])
        sequential_panelids = runningsum(a)

        n_panels = sequential_panelids[rows(sequential_panelids)]

        data_row_index = J(n_panels, n_time, .)
        valid_touse = J(n_panels, n_time, 0)
        if (full.gls_flag == 1) {
            // If GLS is used, we need to track valid y values
            // for the pre-event period
            valid_y = J(n_panels, n_time, 0)
            rect_y = J(n_panels, n_time, .)
        }
        else {
            valid_y = J(0,0,.)
            rect_y =  J(0,0,.)
        }


        // Step 2: Fill data_row_index, valid_touse, valid_y, rect_y in a loop
        for (i = 1; i <= long_data.nrows; i++) {
            row = sequential_panelids[i]    // since panelid is 1-based sequential
            col = long_data.timeids[i] - min_time + 1
            data_row_index[row, col] = i
            if (long_data.touse[i] == 1) valid_touse[row, col] = 1
            // If GLS is used, we need to track valid y values
            if (full.gls_flag == 1) {
                rect_y[row, col] = long_data.y_data[i]
                if (long_data.y_data[i] !=.) valid_y[row, col] = 1
            }
        }
        // Step 3: Fill in the full rectangular lookups

        full.index_date = min_time
        full.data_row_index = data_row_index
        full.valid_touse = valid_touse
        full.valid_y = valid_y
        full.rect_y = rect_y

        return(full)
    }
end


capture mata mata drop current_data_indexes()
capture mata mata drop get_current_indexes()
mata:
    struct current_data_indexes {
    real matrix touse_index, pre_event_touse_index
    real scalar pe_start_date, pe_end_date, pre_event_window_length
    }

    struct current_data_indexes scalar get_current_indexes( 
        struct data_indexes scalar full, real scalar current_date,
           | real scalar pe_end_date, real scalar pe_start_date ) {
        
        struct current_data_indexes scalar current
        real colvector current_valid_y, current_touse, nonzero_ys
        real rowvector col_selection
        real matrix current_rect_y
        real scalar current_col_number, pe_start_col, pe_end_col, total_cols
        
        current_col_number = current_date - full.index_date + 1

        if (full.gls_flag == 1) {
            pe_start_col = pe_start_date - full.index_date + 1
            pe_end_col = pe_end_date - full.index_date + 1
            col_selection = (pe_start_col..pe_end_col, current_col_number)
            total_cols = cols(col_selection)

            // Check if y is valid for all pre-event days
            current_valid_y = rowsum(full.valid_y[., col_selection]) :== 
                total_cols
            // check if y is 0 or close to 0 for all pre-event days
            current_rect_y = abs(full.rect_y[.,pe_start_col..pe_end_col])
            nonzero_ys = rowsum(current_rect_y) :>= .01
            current_valid_y = nonzero_ys :& current_valid_y
            
            
            current_touse = full.valid_touse[., current_col_number] :& current_valid_y
            current.touse_index = select(full.data_row_index[. , current_col_number],
                current_touse)
            current.pre_event_touse_index = vec((select(
                full.data_row_index[. , (pe_start_col..pe_end_col)],current_touse))')
        }
        else {
            current_touse = full.valid_touse[., current_col_number]
            current.touse_index = select(full.data_row_index[. , current_col_number],
                current_touse)
            
            current.pre_event_touse_index = J(0,0,.)
        }
        current.pe_start_date = pe_start_date
        current.pe_end_date = pe_end_date
        current.pre_event_window_length = pe_end_date - pe_start_date + 1
        return(current)
    }
end


capture mata mata drop _get_coefficients()
mata:
    void _get_coefficients(struct data_views scalar long_data, ///
        struct current_data_indexes scalar current, ///
        string scalar b_macro, ///
        string scalar nobs_macro, | ///
        string scalar gls_flag, ///
        real scalar num_principal_components, ///
        string scalar woodbury_flag
        ) {

        real matrix X, pre_event_y_rect, gls_outputs
        real colvector y, pre_event_y
        real scalar pre_event_window_length, nobs

        st_subview(y, long_data.y_data, current.touse_index, .)
        st_subview(X, long_data.X_data, current.touse_index, .)
        X = X, J(rows(X), 1, 1)

        nobs = rows(X)

        if (gls_flag == "gls") {
            pre_event_y = long_data.y_data[current.pre_event_touse_index]
            pre_event_y_rect = (colshape(pre_event_y, current.pre_event_window_length))'
            if (woodbury_flag == "woodbury") {
                b = gls_beta_woodbury(y, X, pre_event_y_rect, num_principal_components)
            }
            else {
                gls_outputs = gls_mat(y, X, pre_event_y_rect, num_principal_components)
                y = gls_outputs[.,1]
                X = gls_outputs[., (2..cols(gls_outputs))]
                b = beta_coefficients(y, X)
            }
        }
        else {
            b = beta_coefficients(y, X)
        }

        // Post the coefficients
        st_matrix(b_macro, b')
        st_numscalar(nobs_macro, nobs)
    }
end


capture mata mata drop _get_significance_stats()
mata:
    void _get_significance_stats(string scalar betas_mat, ///
        string scalar pcdf_mat, ///
        string scalar ts_zmat) {


        betas = st_matrix(betas_mat)
        event_coefs = betas[1,.]        
        pre_event_coefs = betas[2..rows(betas),.]
        sd = sqrt(diagonal(quadvariance(pre_event_coefs)))'
        mean_coefs = mean(pre_event_coefs)
    
        event_pctile = (colsum(abs(betas:-mean_coefs):>= abs(event_coefs:-mean_coefs))):/(rows(betas))


        ts_z =  abs(event_coefs - mean_coefs):/ (sd :* sqrt((rows(betas))/(rows(betas)-1)))
        ts_z = 2:*ttail(rows(betas)-2, ts_z)

        st_matrix(ts_zmat, ts_z)
        st_matrix(pcdf_mat, event_pctile)
    }
end


capture mata mata drop beta_coefficients()
mata:
    real colvector beta_coefficients(real colvector y, real matrix X) {
        // allocate matrices
        real matrix XX, Xy
        real colvector beta

        XX = quadcross(X,X)
        Xy = quadcross(X,y)
        beta = cholsolve(XX,Xy)
        return(beta)
    }
end


capture mata mata drop CholOmega()
mata:
    real matrix CholOmega(real matrix pre_event_y_rect, real scalar npc) {
        real matrix A, U, Vt, pca_coeff, pca_score
        real matrix sig2_e, Omega
        real vector s
        A = pre_event_y_rect :- mean(pre_event_y_rect)
        
        fullsvd(A,U,s,Vt)

        pca_coeff = Vt'[,1..npc]
        pca_score = A*pca_coeff
        sig2_e = variance(A - pca_score*pca_coeff')
        // Omega should be symmetric, but Mata doesn't recognize that it is
        // so we coerce it to be symmetric 
        Omega =  makesymmetric(pca_coeff * diag(variance(pca_score)) * // 
            pca_coeff' + diag(sig2_e))

        return (cholesky(Omega))
    }
end


capture mata mata drop gls_mat()
mata:
    real matrix gls_mat (
        real colvector y, ///
        real matrix X, ///
        real matrix pre_event_y_rect, ///
        real scalar npc
        ) {

        L = CholOmega(pre_event_y_rect, npc)
        y = solvelower_wrapper(L,y)
        X = solvelower_wrapper(L,X)

        return(y,X)
    }
end


capture mata mata drop gls_beta_woodbury()
mata:
    real colvector gls_beta_woodbury (
        real colvector y, ///
        real matrix X, ///
        real matrix pre_event_y_rect, ///
        real scalar npc
        ) {
        // Woodbury identity approach to GLS.
        // Computes beta = (X' Omega^{-1} X)^{-1} X' Omega^{-1} y directly
        // without forming or factoring the full N x N Omega matrix.
        //
        // Omega = V Lambda V' + D  where D = diag(sig2_e), V is N x k PCA loadings
        // Omega^{-1} = D^{-1} - D^{-1} V M V' D^{-1}
        //   where M = (Lambda^{-1} + V' D^{-1} V)^{-1}   (k x k)
        //
        // The only matrix inversion is k x k (k = npc), vs N x N for Cholesky.
        // Speedup is ~50-66% but with slightly less numerical precision due to
        // the wide range of idiosyncratic variances entering D^{-1}.

        real matrix A, U, Vt, V, pca_score
        real matrix Lambda, VtDinv, M, Oinv_X
        real colvector d, D_inv, Oinv_y, beta
        real vector s

        A = pre_event_y_rect :- mean(pre_event_y_rect)

        fullsvd(A, U, s, Vt)

        V = Vt'[, 1..npc]
        pca_score = A * V
        Lambda = diag(variance(pca_score))

        // Idiosyncratic variance (diagonal only)
        d = diagonal(variance(A - pca_score * V'))
        D_inv = 1 :/ d

        // Core Woodbury:  M = (Lambda^{-1} + V' D^{-1} V)^{-1}
        VtDinv = V' :* D_inv'
        M = invsym(invsym(Lambda) + VtDinv * V)

        // Apply Omega^{-1} to y and X:
        //   Omega^{-1} z = D^{-1} z - (D^{-1} V) M (V' D^{-1}) z
        Oinv_y = D_inv :* y - (D_inv :* V) * M * (VtDinv * y)
        Oinv_X = D_inv :* X - (D_inv :* V) * M * (VtDinv * X)

        // GLS beta = (X' Omega^{-1} X)^{-1} X' Omega^{-1} y
        beta = cholsolve(quadcross(X, Oinv_X), quadcross(X, Oinv_y))

        return(beta)
    }
end


if c(stata_version) >=17 {
    capture mata mata drop solvelower_wrapper()
    mata numeric matrix solvelower_wrapper(numeric matrix A, numeric matrix B) return(solvelowerlapacke(A,B))
}
else {
    capture mata mata drop solvelower_wrapper()
    mata numeric matrix solvelower_wrapper(numeric matrix A, numeric matrix B) return (solvelower(A,B))
}