# Add metainformation derived from known_mf ####

#' @title Add metainformation derived from ume::known_mf
#' @name add_known_mf
#' @family Formula assignment
#' @description Join molecular formula data and metadata about known formulas
#' (e.g. annotate carboxylic-rich alicyclic molecules (CRAM)).
#' The name of the molecular formula column will be set to "mf".
#' @inheritParams main_docu
#' @param mf_col Name of the column in mfd that has the molecular formula information (default: "mf").
#' Formulas have upper case element symbols and elements in the formula are ordered according to the Hill system.
#' @keywords misc
#' @import data.table
#' @author Boris P. Koch
#' @references
#' **CRAM**
#' Hertkorn N., Benner R., Frommberger M., Schmitt-Kopplin P., Witt M.,
#' Kaiser K., Kettrup A., Hedges J.I. (2006). Characterization of a major
#' refractory component of marine dissolved organic matter.
#' *Geochimica et Cosmochimica Acta*, **70**, 2990-3010.
#' \doi{10.1016/j.gca.2006.03.021}
#' **Surfactants**
#' Lechtenfeld O.J., Koch B.P., Gasparovic B., Frka S., Witt M.,
#' Kattner G. (2013). The influence of salinity on the molecular and
#' optical properties of surface microlayers in a karstic estuary.
#' Marine Chemistry, 150, 25-38.
#' \doi{10.1016/j.marchem.2013.01.006}
#'
#' **Ideg**
#' Flerus R., Lechtenfeld O.J., Koch B.P., McCallister S.L., Schmitt-Kopplin P.,
#' Benner R., Kaiser K., Kattner G. (2012). A molecular perspective on
#' the ageing of marine dissolved organic matter. *Biogeosciences*, **9**,
#' 1935-1955.
#' \doi{10.5194/bg-9-1935-2012}
#'
#' **iTerr**
#' Medeiros P.M., Seidel M., Niggemann J., Spencer R.G.M., Hernes P.J.,
#' Yager P.L., Miller W.L., Dittmar T., Hansell D.A. (2016).
#' A novel molecular approach for tracing terrigenous dissolved organic matter
#' into the deep ocean. *Global Biogeochemical Cycles*, **30**, 689-699.
#' \doi{10.1002/2015gb005320}
#'
#' @return A data.table containing additional columns having information on formula categories
#' @examples add_known_mf(mfd = mf_data_demo)
#' @export

add_known_mf <- function(mfd, mf_col = "mf", known_mf = ume::known_mf, ...) {

  categories <- NULL

  if(!mf_col %in% names(mfd)) stop("There is no column '", mf_col, "' in table 'mfd'")

  setnames(mfd, old = "mf_col", new = "mf", skip_absent = TRUE)

  known_mf_tmp <-
    known_mf[category %in% ume::tab_ume_labels[use_in_ume == 1, unique(label)], ]

  unique_mfs <- unique(mfd$mf)
  categ <- ume::tab_ume_labels[use_in_ume == 1, unique(label)]
  known_mf_tmp <- known_mf[mf %in% unique_mfs & category %in% categ, .N, .(mf, category)]

  tmp_categ <- known_mf_tmp[, .(categories = paste(category, collapse = ", ")), mf]

# Restructure table containing known molecular formulas
  known_mf_tmp <-
    data.table::dcast(
      known_mf[mf %in% unique_mfs & category %in% categ],
      mf ~ category,
      fun.aggregate = length,
      fill = 0,
      value.var = "mf_kf_id"
    )

# Check if columns for known formulas are already existing in mfd.
# If true - remove existing columns
  rm_cols <- unique(known_mf[, category])
  existing_cols <- rm_cols[(rm_cols %in% colnames(mfd))]
  if (length(existing_cols) > 0) mfd[, (existing_cols) := NULL]

# Add information of known molecular formulas to formula table (mfd)
# if formula in known_mf_tmp does not exist in mfd, values are set to zero
  mfd <- known_mf_tmp[mfd, on = "mf"]
  mfd <- tmp_categ[mfd, on = "mf"]
  mfd[!is.na(categories) & categories %like% ","]

# Surfactant filter is part of the main filter function f_um_filter
# and requires a default of 0.
  mfd[is.na(surfactant), surfactant := 0]

  mfd <- remove_empty_columns(mfd)

  # if(any(names(mfd) %in% categ)){
  #   warning("The assignment of molecular formula classes (`add_known_mf()`) as additional columns will be depricated.\n",
  #           "In future ume versions only one column 'categories' will be added to 'mfd'.")
  # }

  return(mfd[])
}
