% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/descript.R
\name{descript}
\alias{descript}
\alias{get_descriptFuns}
\title{Compute univariate descriptive statistics}
\usage{
descript(df, funs = get_descriptFuns(), discrete = FALSE)

get_descriptFuns()
}
\arguments{
\item{df}{typically a \code{data.frame} or \code{tibble}-like structure
 containing the variables of interest

 Note that \code{factor} and \code{character} vectors will be treated as
 discrete observations, and by default are omitted from the computation
 of the quantitative descriptive statistics specified in \code{funs}. However,
 setting \code{discrete = TRUE} will provide count-type information for these
 discrete variables, in which case arguments to \code{funs} are ignored}

\item{funs}{functions to apply when \code{discrete = FALSE}. Can be modified
 by the user to include or exclude further functions, however each supplied
 function must return a scalar. Use \code{get_discreteFuns()} to return
 the full list of functions, which may then be augmented or subsetted
 based on the user's requirements. Default descriptive statistic returned are:

 \describe{
  \item{\code{n}}{number of non-missing observations}
  \item{\code{mean}}{mean}
  \item{\code{trim}}{trimmed mean (10\%)}
  \item{\code{sd}}{standard deviation}
  \item{\code{skew}}{skewness (from \code{e1701})}
  \item{\code{kurt}}{kurtosis (from \code{e1071})}
  \item{\code{min}}{minimum}
  \item{\code{P25}}{25th percentile (a.k.a., 1st/lower quartile, Q1), returned from \code{\link{quantile}})}
  \item{\code{P50}}{median (50th percentile)}
  \item{\code{P75}}{75th percentile (a.k.a, 3rd/upper quartile, Q3), returned from \code{\link{quantile}})}
  \item{\code{max}}{maximum}
 }

 Note that by default the \code{na.rm} behavior is set to \code{TRUE}
 in each function call}

\item{discrete}{logical; include summary statistics for \code{discrete}
variables only? If \code{TRUE} then only count and proportion
information for the discrete variables will be returned. For greater flexibility
in creating cross-tabulated count/proportion information see \code{\link{xtabs}}}
}
\description{
Function returns univariate data summaries for each variable supplied. For presentation
purposes, discrete and continuous variables are treated separately, the former of which
reflects count/proportion information while the ladder are supplied to a (customizable) list
of univariate summary functions. As such, quantitative/continuous variable
information is kept distinct in the output, while discrete variables (e.g.,
\code{factors} and \code{character} vectors) are returned by using the
\code{discrete} argument.
}
\details{
The purpose of this function is to provide
a more pipe-friendly API for selecting and subsetting variables using the
\code{dplyr} syntax, where conditional statistics are evaluated
internally using the \code{\link{by}} function (when multiple variables are
to be summarised). As a special case,
if only a single variable is being summarised then the canonical output
from \code{dplyr::summarise} will be returned.

\emph{Conditioning}: As the function is intended to support
pipe-friendly code specifications, conditioning/group subset
specifications are declared using \code{\link[dplyr]{group_by}}
and subsequently passed to \code{descript}.
}
\examples{

library(dplyr)

data(mtcars)

if(FALSE){
  # run the following to see behavior with NA values in dataset
  mtcars[sample(1:nrow(mtcars), 3), 'cyl'] <- NA
  mtcars[sample(1:nrow(mtcars), 5), 'mpg'] <- NA
}

fmtcars <- within(mtcars, {
	cyl <- factor(cyl)
	am <- factor(am, labels=c('automatic', 'manual'))
	vs <- factor(vs)
})

# with and without factor variables
mtcars |> descript()
fmtcars |> descript()               # factors/discrete vars omitted
fmtcars |> descript(discrete=TRUE)  # discrete variables only

# for discrete variables, xtabs() is generally nicer as cross-tabs can
# be specified explicitly (though can be cumbersome)
xtabs(~ am, fmtcars)
xtabs(~ am, fmtcars) |> prop.table()
xtabs(~ am + cyl + vs, fmtcars)
xtabs(~ am + cyl + vs, fmtcars) |> prop.table()

# usual pipe chaining
fmtcars |> select(mpg, wt) |> descript()
fmtcars |> filter(mpg > 20) |> select(mpg, wt) |> descript()

# conditioning with group_by()
fmtcars |> group_by(cyl) |> descript()
fmtcars |> group_by(cyl, am) |> descript()
fmtcars |> group_by(cyl, am) |> select(mpg, wt) |> descript()

# with single variables, typical dplyr::summarise() output returned
fmtcars |> select(mpg) |> descript()
fmtcars |> group_by(cyl) |> select(mpg) |> descript()
fmtcars |> group_by(cyl, am) |> select(mpg) |> descript()

# discrete variables also work with group_by(), though again
#  xtabs() is generally more flexible
fmtcars |> group_by(cyl) |> descript(discrete=TRUE)
fmtcars |> group_by(am) |> descript(discrete=TRUE)
fmtcars |> group_by(cyl, am) |> descript(discrete=TRUE)

# only return a subset of summary statistics
funs <- get_descriptFuns()
sfuns <- funs[c('n', 'mean', 'sd')] # subset
fmtcars |> descript(funs=sfuns) # only n, miss, mean, and sd

# add a new functions
funs2 <- c(sfuns,
           trim_20 = \(x) mean(x, trim=.2, na.rm=TRUE),
           median= \(x) median(x, na.rm=TRUE))
fmtcars |> descript(funs=funs2)

}
\seealso{
\code{\link[dplyr]{summarise}}, \code{\link[dplyr]{group_by}}, \code{\link{xtabs}}
}
