% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MSmix_functions_package.R
\name{data_description}
\alias{data_description}
\alias{print.data_descr}
\title{Descriptive summaries for partial rankings}
\usage{
data_description(
  rankings,
  marg = TRUE,
  borda_ord = FALSE,
  paired_comp = TRUE,
  subset = NULL,
  item_names = NULL
)

\method{print}{data_descr}(x, ...)
}
\arguments{
\item{rankings}{Integer \eqn{N}\eqn{\times}{x}\eqn{n} matrix or data frame with partial rankings in each row. Missing positions must be coded as \code{NA}.}

\item{marg}{Logical: whether the first-order marginals have to be computed. Defaults to \code{TRUE}.}

\item{borda_ord}{Logical: whether, in the summary statistics, the items must be ordered according to the Borda ranking (i.e., mean rank vector). Defaults to \code{FALSE}.}

\item{paired_comp}{Logical: whether the pairwise comparison matrix has to be computed. Defaults to \code{TRUE}.}

\item{subset}{Optional logical or integer vector specifying the subset of observations, i.e. rows of \code{rankings}, to be kept. Missing values are taken as \code{FALSE}. Defaults to \code{NULL} meaning that all the rows are considered.}

\item{item_names}{Character vector with the names to be used for the items. Defaults to \code{NULL}, meaning that \code{colnames(rankings)} is used and, if not available, \code{item_names} is set equal to \code{"Item1","Item2",...}.}

\item{x}{An object of class \code{"data_descr"} returned by \code{\link{data_description}}.}

\item{...}{Further arguments passed to or from other methods (not used).}
}
\value{
An object of class \code{"data_descr"}, which is a list with the following named components:

\item{\code{n_ranked}}{Integer vector of length \eqn{N} with the number of items ranked in each partial sequence.}
\item{\code{n_ranked_distr}}{Frequency distribution of the \code{n_ranked} vector.}
\item{\code{n_ranks_by_item}}{Integer \eqn{3}\eqn{\times}{x}\eqn{n} matrix with the number of times that each item has been ranked or not. The last row contains the total by column, i.e. the sample size \eqn{N}.}
\item{\code{mean_rank}}{Mean rank vector.}
\item{\code{borda_ordering}}{Character vector corresponding to the Borda ordering. This is obtained from the ranking of the mean rank vector.}
\item{\code{marginals}}{Integer \eqn{n}\eqn{\times}{x}\eqn{n} matrix of the first-order marginals in each column: the \eqn{(j,i)}-th entry indicates the number of times that item \eqn{i} is ranked in position \eqn{j}.}
\item{\code{pc}}{Integer \eqn{n}\eqn{\times}{x}\eqn{n} pairwise comparison matrix: the \eqn{(i,i')}-th entry indicates the number of times that item \eqn{i} is preferred to item \eqn{i'}.}
\item{\code{rankings}}{When \code{borda_ord = TRUE}, an integer \eqn{N}\eqn{\times}{x}\eqn{n} matrix corresponding to \code{rankings} with columns rearranged according to the Borda ordering, otherwise the input \code{rankings}.}
}
\description{
Compute various data summaries for a partial ranking dataset. Differently from existing analogous functions supplied by other \code{R} packages, \code{data_description} supports partial observations with arbitrary patterns of censoring.

\code{print} method for class \code{"data_descr"}.
}
\details{
The implementation of \code{data_description} is similar to that of \code{rank_summaries} from the \code{PLMIX} package. Differently from the latter, \code{data_description} works with any kind of partial rankings (not only top rankings) and allows to summarize subsamples thanks to the additional \code{subset} argument.

The Borda ranking, obtained from the ordering of the mean rank vector, corresponds to the MLE of the consensus ranking of the Mallows model with Spearman distance. If \code{mean_rank} contains some \code{NA}s, the corresponding items occupy the bottom positions in the \code{borda_ordering} according to the order they appear in \code{item_names}.
}
\examples{

## Example 1. Sample statistics for the Antifragility dataset.
r_antifrag <- ranks_antifragility[, 1:7]
descr <- data_description(rankings = r_antifrag)
descr

## Example 2. Sample statistics for the Sports dataset.
r_sports <- ranks_sports[, 1:8]
descr <- data_description(rankings = r_sports, borda_ord = TRUE)
descr

## Example 3. Sample statistics for the Sports dataset by gender.
r_sports <- ranks_sports[, 1:8]
desc_f <- data_description(rankings = r_sports, subset = (ranks_sports$Gender == "Female"))
desc_m <- data_description(rankings = r_sports, subset = (ranks_sports$Gender == "Male"))
desc_f
desc_m

}
\references{
Mollica C and Tardella L (2020). PLMIX: An R package for modelling and clustering partially ranked data. \emph{Journal of Statistical Computation and Simulation}, \bold{90}(5), pages 925--959, ISSN: 0094-9655, DOI: 10.1080/00949655.2020.1711909.

Marden JI (1995). Analyzing and modeling rank data. \emph{Monographs on Statistics and Applied Probability} (64). Chapman & Hall, ISSN: 0-412-99521-2. London.
}
\seealso{
\code{\link{plot.data_descr}}, \code{\link{print.data_descr}}
}
