#' Fixed effect jackknife IV ('FEJIV') estimation
#'
#' @description
#' fejiv implements the fixed effect jackknife IV (FEJIV) estimator of Chao, Swanson, and Woutersen (2023),
#' which enables consistent IV estimation with many (possibly weak) instruments, cluster fixed effects,
#' heteroskedastic errors, and possibly many exogenous explanatory variables.
#'
#' @param Y The dependent variable (numeric vector).
#' @param D The endogenous explanatory variable (numeric vector or matrix with one column).
#' @param Z The instrumental variables (numeric vector, matrix, or data frame).
#' @param X Optional exogenous explanatory variables (numeric vector, matrix, or data frame; Default \code{NULL}.).
#' @param absorb Optional categorical variable to be absorbed (vector or factor; Default \code{NULL}.).
#'               All categories should have a frequency of three or more.
#'
#' @details
#' Consistency of the FEJIV estimator requires that instrument strength satisfies a key growth condition: the concentration
#' parameter must grow faster than the square root of the number of instruments.  Mikusheva and Sun (2022) show that this
#' condition is necessary for the existence of a consistent test and also propose a test of this condition, implemented in
#' the Stata command manyweakivpretest, available at Liyang Sun's GitHub.
#'
#' Słoczyński (2024) recommends the FEJIV estimator as an alternative to two-stage least squares (2SLS) when estimating the
#' fully interacted specification of Angrist and Imbens (1995).  Within the local average treatment effect (LATE) framework,
#' when strong monotonicity is doubtful but weak monotonicity is plausible, the fully interacted specification eliminates the
#' problem of "negative weights."
#'
#' This is a companion software package for Słoczyński (2024).  If you use it,
#' please cite both Słoczyński (2024) and Chao, Swanson, and Woutersen (2023).
#'
#' @return A list of class \code{"fejiv"} with elements:
#' \item{coefficient}{Coefficient on the endogenous regressor.}
#' \item{vcov}{Estimated variance.}
#' \item{se}{Standard error.}
#' \item{N}{Number of observations.}
#' \item{treat}{Name of the endogenous variable.}
#' \item{call}{Matched function call.}
#' \item{title}{Character string for printing.}
#'
#' @references
#' Angrist, Joshua D., and Guido W. Imbens (1995).
#'   Two-Stage Least Squares Estimation of Average Causal Effects in Models
#'   with Variable Treatment Intensity.
#'   *Journal of the American Statistical Association*, 90(430), 431–442.
#'
#' Chao, John C., Norman R. Swanson, and Tiemen Woutersen (2023).
#'   Jackknife Estimation of a Cluster-Sample IV Regression Model with Many Weak Instruments.
#'   *Journal of Econometrics*, 235(2), 1747–1769.
#'
#' Mikusheva, Anna, and Liyang Sun (2022).
#'   Inference with Many Weak Instruments.
#'   *Review of Economic Studies*, 89(5), 2663–2686.
#'
#' Słoczyński, Tymon (2024).
#'   When Should We (Not) Interpret Linear IV Estimands as LATE?
#'   arXiv:2011.06695. <https://arxiv.org/abs/2011.06695>.
#'
#' @examples
#' # one fast example for demonstration
#' set.seed(2025)
#' n <- 100
#' Y <- rnorm(n)
#' D <- rnorm(n)
#' Z <- matrix(rnorm(n * 5), n, 5)
#'
#' # Basic usage - no fixed effects
#' result <- fejiv(Y, D, Z)
#' print(result)
#'
#' # Example with fixed effects
#' absorb_var <- rep(1:10, each = 10)
#' result_fe <- fejiv(Y, D, Z, absorb = absorb_var)
#' print(result_fe)
#'
#' \donttest{
#' # --------------------------------------------------------------------
#' # Example: Revisiting Card (1995) using fejiv
#' # --------------------------------------------------------------------
#' # Realistic example with a larger sample and fixed effects
#' # This takes longer due to the computational complexity of fejiv
#'
#' if (requireNamespace("haven", quietly = TRUE)) {
#'   library(haven)
#'
#'   # Load data directly from Tymon Słoczyński's GitHub
#'   data <- read_dta("https://tslocz.github.io/card.dta")
#'
#'   # Create a college dummy
#'   data$college <- as.numeric(data$educ > 12)
#'
#'   # Construct cluster groups following Słoczyński (2024)
#'   data$group <- interaction(
#'     data$black, data$smsa, data$smsa66, data$south, data$south66
#'   )
#'
#'   # Drop clusters with fewer than 3 observations
#'   data$gsize <- ave(rep(1, nrow(data)), data$group, FUN = length)
#'   data <- data[data$gsize >= 3, ]
#'
#'   # Run Fixed Effect Jackknife IV (FEJIV) regression
#'   # Instruments: nearc4 interacted with cluster group (no main effects)
#'   model <- fejiv(
#'     Y      = data$lwage,
#'     D      = data$college,
#'     Z      = model.matrix(~ nearc4:factor(group) - 1, data = data),
#'     absorb = data$group
#'   )
#'
#'   print(model)
#'  }
#' }
#'
#' @section Acknowledgments:
#' This command is based on MATLAB code for the estimators of Chao, Swanson, and Woutersen (2023), generously shared by Tiemen Woutersen.
#'
#' @section License:
#' This package is licensed under the MIT License.  See the LICENSE file included with the distribution.
#'
#' @author
#' Qihui Lei, University of Wisconsin, Email: \email{qlei9@@wisc.edu}
#' Tymon Słoczyński, Brandeis University, Email: \email{tslocz@@brandeis.edu}
#'
#' @importFrom MASS ginv
#' @importFrom Matrix crossprod
#' @importFrom stats na.omit pt model.matrix printCoefmat
#' @export
fejiv <- function(Y, D, Z, X = NULL, absorb = NULL) {

  # --- Input Validation and Preprocessing ---
  if (missing(Y) || missing(D) || missing(Z)) {
    stop("Y, D, and Z are required inputs!")
  }

  # Error checking Y
  if (!is.numeric(Y) || (is.matrix(Y) && ncol(Y) != 1) || (is.data.frame(Y) && ncol(Y) != 1)) {
    stop("Y is not a numeric vector, a 1-column matrix, or a 1-column data.frame.")
  }
  Y <- as.matrix(Y)
  Y_length <- nrow(Y)

  # Error checking D
  if (!is.numeric(D) || (is.matrix(D) && ncol(D) != 1) || (is.data.frame(D) && ncol(D) != 1)) {
    stop("D is not a numeric vector, a 1-column matrix, or a 1-column data.frame.")
  }
  D_mat <- as.matrix(D)
  if (nrow(D_mat) != Y_length) stop("Row Dimension of Y and D are not the same!")

  # Capture the variable name
  D_call_name <- deparse(substitute(D))
  if (grepl("\\$", D_call_name)) {
    endo_name <- sub(".*\\$", "", D_call_name)
  } else {
    endo_name <- D_call_name
  }

  # Error checking Z
  Z <- as.data.frame(Z)
  Z[] <- lapply(Z, function(x) if (is.character(x)) as.factor(x) else if (is.logical(x)) as.numeric(x) else x)
  if(!nrow(Z) == Y_length) stop("Row Dimension of Y and Z are not the same!")

  # Error checking X
  if (!is.null(X)) {
    X <- as.data.frame(X)
    X[] <- lapply(X, function(x) if (is.character(x)) as.factor(x) else if (is.logical(x)) as.numeric(x) else x)
    if(!nrow(X) == Y_length) stop("Row Dimension of Y and X are not the same!")
  }

  # Error checking absorb
  if (!is.null(absorb)) {
    absorb <- as.data.frame(absorb)
    if (ncol(absorb) != 1) stop("absorb must be a single-column variable.")
    if(!nrow(absorb) == Y_length) stop("Row Dimension of Y and absorb are not the same!")
  }

  # --- Combine and Omit NAs ---
  data_list <- list(Y = Y, D = D_mat, Z = Z)
  if (!is.null(X)) data_list$X <- X
  if (!is.null(absorb)) data_list$absorb <- absorb
  all_data <- na.omit(do.call(cbind, data_list))

  # --- Variable Matrices Setup and Core FEJIV Logic ---
  y <- as.matrix(all_data[,1])
  x <- as.matrix(all_data[,2])
  m <- nrow(y)

  # Instruments
  Z <- as.matrix(all_data[,3:(ncol(Z)+2)])

  # Covariates
  if (is.null(X)) {
    W <- matrix(1, m, 1)
  } else {
    W <- as.matrix(all_data[,(ncol(Z)+3):(ncol(Z)+ncol(X)+2)])
  }

  # Fixed Effects (if requested)
  if (!is.null(absorb)) {
    absorb <- as.matrix(all_data[,ncol(all_data)])
    absorb <- as.factor(absorb)
    if (any(table(absorb) < 3)) warning("All categories should have a frequency of three or more.")
    Q <- model.matrix(~ absorb - 1)
  } else {
    Q <- matrix(1, m, 1)
  }

  # --- FEJIV Calculation ---
  MQ <- diag(m) - Q %*% solve(crossprod(Q)) %*% t(Q)
  Zbar <- cbind(Z, W)
  MWQ <- MQ - ((MQ %*% W) %*% ginv(crossprod(W, MQ %*% W)) %*% t(W) %*% MQ)
  Portho <- (MWQ %*% Z) %*% ginv(crossprod(Z, MWQ %*% Z)) %*% t(Z) %*% MWQ
  dgP <- diag(Portho)
  MXV <- MQ - ((MQ %*% Zbar) %*% ginv(crossprod(Zbar, MQ %*% Zbar)) %*% t(Zbar) %*% MQ)

  # FEJIV Coefficient
  HPMXV <- MXV * MXV
  vtheta <- ginv(HPMXV) %*% dgP
  Dtheta <- diag(as.vector(vtheta))
  R <- Portho - (MXV %*% Dtheta %*% MXV)
  Denom <- crossprod(x, R %*% x)
  Num <- crossprod(x, R %*% y)
  b <- as.numeric(Num / Denom)

  # Standard Error
  ehat <- MXV %*% (y - x * b)
  inv_MQ_sq <- solve(MQ * MQ)
  vsig <- inv_MQ_sq %*% (ehat * ehat)
  Dvsig <- diag(as.vector(vsig))
  MZWQx <- MXV %*% x
  ue <- inv_MQ_sq %*% (ehat * MZWQx)
  Sig1 <- crossprod(x, R %*% Dvsig %*% R %*% x)
  Sig2 <- crossprod(ue, (R * R) %*% ue)
  Sig <- Sig1 + Sig2
  Vhat <- Sig / (Denom^2)
  se <- sqrt(as.numeric(Vhat))

  # Output
  names(b) <- names(se) <- rownames(Vhat) <- colnames(Vhat) <- endo_name
  out <- list(
    coefficients = b,
    vcov = Vhat,
    se = se,
    N = m,
    treat = endo_name,
    call = match.call(),
    title = "FEJIV estimation"
  )
  class(out) <- "fejiv"
  return(out)
}

#' @export
print.fejiv <- function(x, digits = max(3L, getOption("digits") - 3L), ...) {
  cat("\n", x$title, "\n")
  cat("\nCall:\n")
  cat(paste(deparse(x$call), collapse = "\n"), "\n\n")
  cat("Number of observations:", x$N, "\n\n")
  cat("Coefficients:\n")
  coef_tab <- cbind(
    Estimate = x$coefficients,
    `Std. Error` = x$se,
    `t value` = x$coefficients / x$se,
    `Pr(>|t|)` = 2 * pt(-abs(x$coefficients / x$se), df = x$N - 1)
  )
  printCoefmat(coef_tab, digits = digits)
  invisible(x)
}

#' @export
summary.fejiv <- function(object, ...) {
  print(object, ...)
}
