#' @title Time-Varying Mean (TVM)
#'
#' @description
#' This function detects influential subjects based on their response values at different time points.
#' It calculates the mean and standard deviation of responses at each time point and flags subjects whose
#' response values deviate significantly beyond a threshold. The function also generates plots to visualize
#' influential observations and their trends over time. It also computes the Influence Score (IS) and Partial
#' Influence Score (PIS) for each observation.
#'
#' @details
#' The function follows these steps:
#' \itemize{
#'   \item Computes the mean and standard deviation of response values at each time point.
#'   \item Calculates Influence Score (IS) and Partial Influence Score (PIS) for each observation.
#'   \item Identifies subjects whose response values exceed the threshold based on \code{k} standard deviations.
#'   \item Separates influential and non-influential subjects for further analysis.
#'   \item Generates visualizations of mean responses and highlights influential subjects in a longitudinal plot.
#' }
#'
#' This method is useful for identifying outliers and understanding variability in longitudinal studies.
#'
#' @param data A dataframe containing the longitudinal data.
#' @param subject_id A column specifying the column name for subject IDs.
#' @param time A column specifying different time points that observations are measured.
#' @param response A column specifying the column name for response values.
#' @param k A numeric value specifying the number of standard deviations to use as the threshold (default = 2).
#' @param verbose Logical; if TRUE, prints informative messages during execution.
#'
#' @return A list containing:
#' \item{influential_subjects}{A vector of subject IDs identified as influential.}
#' \item{influential_data}{A data frame containing data for influential subjects.}
#' \item{influential_time_data}{A data frame containing data for influential subjects with only the influential time points.}
#' \item{non_influential_data}{A data frame containing data for non-influential subjects.}
#' \item{mean_response_plot}{A plot visualizing the mean response values across time points.}
#' \item{longitudinal_plot}{A final plot highlighting influential subjects over time.}
#' \item{IS_table}{A data frame containing the Influence Score (IS) and the Partial Influence Score (PIS) values for each subject at each time point.}
#'
#' @examples
#' data(infsdata)
#' infsdata <- infsdata[1:5,]
#' result <- tvm(infsdata, "subject_id", "time", "response", 2)
#' print(result$influential_subjects)
#' head(result$influential_data)
#' head(result$non_influential_data)
#' head(result$influential_time_data)
#' head(result$IS_table)
#' head(result$PIS_table)
#' result$mean_response_plot
#' result$longitudinal_plot
#'
#' @export
#'
#' @seealso slm, wlm, sld, rld


tvm <- function(data, subject_id, time, response, k = 2, verbose = FALSE) {

  # Mean and SD at each time point
  data_summary <- data |>
    dplyr::group_by(.data[[time]]) |>
    dplyr::summarise(
      mean_response = mean(.data[[response]], na.rm = TRUE),
      sd_response   = stats::sd(.data[[response]], na.rm = TRUE),
      .groups = "drop"
    )

  # Merge with original data
  data_merged <- merge(data, data_summary, by = time, all.x = TRUE)

  # Thresholds
  data_merged$threshold_upper <- data_merged$mean_response + k * data_merged$sd_response
  data_merged$threshold_lower <- data_merged$mean_response - k * data_merged$sd_response

  # Influential observations
  data_merged$influential <-
    abs(data_merged[[response]] - data_merged$mean_response) >
    k * data_merged$sd_response

  influential_subjects <-
    unique(data_merged[data_merged$influential, subject_id])

  if (length(influential_subjects) == 0) {
    warning("No influential subjects detected based on the given threshold.")
    return(list(
      influential_subjects = integer(0),
      influential_data = data[0, ],
      non_influential_data = data,
      influential_time_data = data[0, ],
      IS_table = NULL,
      mean_response_plots = NULL,
      longitudinal_plot = NULL
    ))
  }

  influential_data <-
    data_merged[data_merged[[subject_id]] %in% influential_subjects, names(data)]

  non_influential_data <-
    data_merged[!data_merged[[subject_id]] %in% influential_subjects, names(data)]

  influential_data <- influential_data |>
    dplyr::arrange(.data[[subject_id]], .data[[time]])

  non_influential_data <- non_influential_data |>
    dplyr::arrange(.data[[subject_id]], .data[[time]])

  if (verbose) {
    message(
      "Influential subjects detected: ",
      paste(influential_subjects, collapse = ", ")
    )
  }

  ## Mean response plots by time
  plots <- list()
  time_points <- unique(data_merged[[time]])

  for (t in time_points) {

    subset_data <- data_merged[data_merged[[time]] == t, ]
    influential_subset <- subset_data[subset_data$influential, ]

    plots[[paste0("Time_", t)]] <-
      ggplot2::ggplot(
        subset_data,
        ggplot2::aes(x = .data[[subject_id]], y = .data[[response]])
      ) +
      ggplot2::geom_point(size = 3) +
      ggplot2::geom_point(
        data = influential_subset,
        color = "red",
        size = 4
      ) +
      ggplot2::geom_hline(
        yintercept = unique(subset_data$threshold_upper),
        linetype = "dashed",
        color = "red"
      ) +
      ggplot2::geom_hline(
        yintercept = unique(subset_data$threshold_lower),
        linetype = "dashed",
        color = "red"
      ) +
      ggplot2::labs(
        title = paste("Mean Responses at Time =", t),
        x = subject_id,
        y = "Response"
      ) +
      ggplot2::theme_minimal()
  }

  ## Longitudinal plot
  longitudinal_plot <-
    ggplot2::ggplot(
      data,
      ggplot2::aes(
        x = .data[[time]],
        y = .data[[response]],
        group = .data[[subject_id]]
      )
    ) +
    ggplot2::geom_line(alpha = 0.8) +
    ggplot2::geom_line(
      data = influential_data,
      color = "red",
      linewidth = 1.2
    ) +
    ggplot2::labs(
      title = "Longitudinal Data with Influential Subjects (TVM Method)",
      x = time,
      y = response
    ) +
    ggplot2::theme_minimal()

  influential_time_data <-
    data_merged[data_merged$influential, names(data)] |>
    dplyr::arrange(.data[[subject_id]], .data[[time]])

  # Influence scores
  data_merged$IS <-
    abs((data_merged[[response]] - data_merged$mean_response) /
          data_merged$sd_response)

  data_merged$PIS <-
    (data_merged[[response]] - data_merged$mean_response)^2 /
    (length(unique(data[[subject_id]])) * data_merged$sd_response^2)

  IS_table <- data_merged[, c(subject_id, time, "IS", "PIS")]

  list(
    influential_subjects = influential_subjects,
    influential_data = influential_data,
    non_influential_data = non_influential_data,
    influential_time_data = influential_time_data,
    IS_table = IS_table,
    mean_response_plots = plots,
    longitudinal_plot = longitudinal_plot
  )
}

utils::globalVariables(c(
  ".data", "mean_response", "sd_response",
  "threshold_upper", "threshold_lower"
))
