% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot_probabilities_ecdf.R
\name{plot_probabilities_ecdf}
\alias{plot_probabilities_ecdf}
\title{Plot ECDF for the predicted probabilities}
\usage{
plot_probabilities_ecdf(
  data,
  target_col,
  probability_cols,
  predicted_class_col = NULL,
  obs_id_col = NULL,
  group_col = NULL,
  probability_of = "target",
  positive = 2,
  theme_fn = ggplot2::theme_minimal,
  color_scale = ggplot2::scale_colour_brewer(palette = "Dark2"),
  apply_facet = length(probability_cols) > 1,
  add_caption = TRUE,
  ecdf_settings = list(),
  facet_settings = list(),
  xlim = c(0, 1)
)
}
\arguments{
\item{data}{\code{data.frame} with probabilities, target classes and (optional) predicted classes.
 Can also include observation identifiers and a grouping variable.

 Example for binary classification:

 \tabular{rrrrr}{
  \strong{Classifier} \tab \strong{Observation} \tab \strong{Probability} \tab \strong{Target} \tab \strong{Prediction}  \cr
  SVM \tab 1 \tab 0.3 \tab cl_1 \tab cl_1 \cr
  SVM \tab 2 \tab 0.7 \tab cl_1 \tab cl_2 \cr
  NB \tab 1 \tab 0.2 \tab cl_2 \tab cl_1 \cr
  NB \tab 2 \tab 0.8 \tab cl_2 \tab cl_2 \cr
  ... \tab ... \tab ... \tab ... \tab ... \cr
 }

 Example for multiclass classification:

 \tabular{rrrrrrr}{
  \strong{Classifier} \tab \strong{Observation} \tab \strong{cl_1} \tab \strong{cl_2} \tab \strong{cl_3} \tab \strong{Target} \tab \strong{Prediction}  \cr
  SVM \tab 1 \tab 0.2 \tab 0.1 \tab 0.7 \tab cl_1 \tab cl_3 \cr
  SVM \tab 2 \tab 0.3 \tab 0.5 \tab 0.2 \tab cl_1 \tab cl_2 \cr
  NB \tab 1 \tab 0.8 \tab 0.1 \tab 0.1 \tab cl_2 \tab cl_1 \cr
  NB \tab 2 \tab 0.1 \tab 0.6 \tab 0.3 \tab cl_3 \tab cl_2 \cr
  ... \tab ... \tab ... \tab ... \tab ... \tab ... \tab ... \cr
 }

 As created with the various validation functions in \code{cvms}, like
 \code{\link[cvms:cross_validate_fn]{cross_validate_fn()}}.}

\item{target_col}{Name of column with target levels.}

\item{probability_cols}{Name of columns with predicted probabilities.

 For \strong{binary} classification, this should be \strong{one column} with the probability of the
 \strong{second class} (alphabetically).

 For \strong{multiclass} classification, this should be \strong{one column per class}.
 These probabilities must sum to \code{1} row-wise.}

\item{predicted_class_col}{Name of column with predicted classes.

 This is required when \code{probability_of = "prediction"}.}

\item{obs_id_col}{Name of column with observation identifiers for averaging the
predicted probabilities per observation before computing the ECDF (\emph{when deemed meaningful}).
When \code{NULL}, each row is an observation.}

\item{group_col}{Name of column with groups. The plot elements
 are split by these groups and can be identified by their color.

 E.g. the \emph{classifier} responsible for the prediction.

 \strong{N.B.} With more than \strong{\code{8}} groups,
 the default \code{`color_scale`} might run out of colors.}

\item{probability_of}{Whether to plot the ECDF for the probabilities of the
 target classes (\code{"target"}) or the predicted classes (\code{"prediction"}).

 For each row, we extract the probability of either the
 \emph{target class} or the \emph{predicted class}. Both are useful
 to plot, as they show the behavior of the classifier in a way a confusion matrix doesn't.
 One classifier might be very certain in its predictions (whether wrong or right), whereas
 another might be less certain.}

\item{positive}{TODO}

\item{theme_fn}{The \code{ggplot2} theme function to apply.}

\item{color_scale}{\code{ggplot2} color scale object for adding discrete colors to the plot.

 E.g. the output of
 \code{\link[ggplot2:scale_colour_brewer]{ggplot2::scale_colour_brewer()}} or
 \code{\link[ggplot2:scale_colour_viridis_d]{ggplot2::scale_colour_viridis_d()}}.

 \strong{N.B.} The number of colors in the object's palette should be at least the same as
 the number of groups in the \code{`group_col`} column.}

\item{apply_facet}{Whether to use
 \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}}. (Logical)

 By default, faceting is applied when there are more than one probability column (multiclass).}

\item{add_caption}{Whether to add a caption explaining the plot. This is dynamically generated
 and intended as a starting point. (Logical)

 You can overwrite the text with \code{ggplot2::labs(caption = "...")}.}

\item{ecdf_settings}{Named list of arguments for \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}}.

  The \code{mapping} argument is set separately.

  Any argument not in the list will use the default value set by \code{cvms}.

  Defaults: \code{list(geom = "smooth", pad = FALSE)}.

  Common changes are to set \code{`geom = "step"`} and/or \code{`pad = TRUE`}.}

\item{facet_settings}{Named list of arguments for \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}}.

  The \code{facets} argument is set separately.

  Any argument not in the list will use its default value.

  Commonly set arguments are \code{nrow} and \code{ncol}.}

\item{xlim}{Limits for the x-scale.}
}
\value{
A \code{ggplot2} object with a faceted line plot. TODO
}
\description{
\Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}

 Plots the empirical cumulative distribution function (ECDF) for the
 probabilities of either the target classes or the predicted classes.

 Creates a \code{\link[ggplot2:ggplot]{ggplot2}} with the \code{\link[ggplot2:stat_ecdf]{stat_ecdf()}} geom.
}
\details{
TODO
}
\examples{
\donttest{
# Attach cvms
library(cvms)
library(ggplot2)
library(dplyr)

#
# Multiclass
#

# TODO: Go through and rewrite comments and code!

# Plot probabilities of target classes
# From repeated cross-validation of three classifiers

# plot_probabilities_ecdf(
#   data = predicted.musicians,
#   target_col = "Target",
#   probability_cols = c("A", "B", "C", "D"),
#   predicted_class_col = "Predicted Class",
#   group_col = "Classifier",
#   probability_of = "target"
# )

# Plot probabilities of predicted classes
# From repeated cross-validation of three classifiers

# plot_probabilities_ecdf(
#   data = predicted.musicians,
#   target_col = "Target",
#   probability_cols = c("A", "B", "C", "D"),
#   predicted_class_col = "Predicted Class",
#   group_col = "Classifier",
#   probability_of = "prediction"
# )

#
# Binary
#

# Filter the predicted.musicians dataset
# binom_data <- predicted.musicians \%>\%
#   dplyr::filter(
#     Target \%in\% c("A", "B")
#   ) \%>\%
#   # "B" is the second class alphabetically
#   dplyr::rename(Probability = B) \%>\%
#   dplyr::mutate(`Predicted Class` = ifelse(
#     Probability > 0.5, "B", "A")) \%>\%
#   dplyr::select(-dplyr::all_of(c("A","C","D")))

# Plot probabilities of predicted classes
# From repeated cross-validation of three classifiers

# plot_probabilities_ecdf(
#   data = binom_data,
#   target_col = "Target",
#   probability_cols = "Probability",
#   predicted_class_col = "Predicted Class",
#   group_col = "Classifier",
#   probability_of = "target"
# )

# plot_probabilities_ecdf(
#   data = binom_data,
#   target_col = "Target",
#   probability_cols = "Probability",
#   predicted_class_col = "Predicted Class",
#   group_col = "Classifier",
#   probability_of = "prediction",
#   xlim = c(0.5, 1)
# )

}
}
\seealso{
Other plotting functions: 
\code{\link{dynamic_font_color_settings}()},
\code{\link{font}()},
\code{\link{plot_confusion_matrix}()},
\code{\link{plot_metric_density}()},
\code{\link{plot_probabilities}()},
\code{\link{sum_tile_settings}()}
}
\author{
Ludvig Renbo Olsen, \email{r-pkgs@ludvigolsen.dk}
}
\concept{plotting functions}
\keyword{internal}
