%  file man/Parkinsons_Features.Rd
%  Documentation for the Parkinsons_Features dataset
%---------------------
\name{Parkinsons_Features}
\alias{Parkinsons_Features}
\docType{data}
\encoding{UTF-8}
\title{Parkinson's Disease Voice Features Dataset}

\description{
  A dataset containing biomedical voice measurements from people with Parkinson's disease and healthy controls.
  The goal is to analyze voice signal features for detecting and monitoring Parkinson's disease.
}

\usage{data(Parkinsons_Features)}

\format{
  A data frame with 5,876 observations on 22 variables. Each row corresponds to a voice recording from a subject.

  \tabular{ll}{
    \code{subject_id}\tab Identifier for the subject (factor or character)\cr
    \code{age}\tab Age of the subject (numeric)\cr
    \code{sex}\tab Sex of the subject (factor: Male/Female)\cr
    \code{test_time}\tab Time of test (numeric, days since baseline)\cr
    \code{motor_UPDRS}\tab Unified Parkinson's Disease Rating Scale motor score (numeric)\cr
    \code{total_UPDRS}\tab Total UPDRS score (numeric)\cr
    \code{Jitter}\tab Measure of frequency variation (numeric)\cr
    \code{Shimmer}\tab Measure of amplitude variation (numeric)\cr
    \code{NHR}\tab Noise-to-harmonics ratio (numeric)\cr
    \code{HNR}\tab Harmonics-to-noise ratio (numeric)\cr
    \code{RPDE}\tab Recurrence period density entropy (numeric)\cr
    \code{DFA}\tab Detrended fluctuation analysis (numeric)\cr
    \code{PPE}\tab Pitch period entropy (numeric)\cr
    \code{...}\tab Additional voice signal features and measurements (numeric)\cr
  }

  All features are numerical except for identifiers and categorical variables.
}

\details{
  This dataset was collected from subjects with Parkinson's disease and healthy controls.
  Multiple biomedical voice measurements were recorded over time to evaluate disease progression.

  The features include various jitter, shimmer, noise, and entropy measures extracted from sustained vowel phonations.

  The dataset is widely used for classification and regression models aiming to predict Parkinson's disease severity or presence.
}

\source{
  UCI Machine Learning Repository:
  \emph{Parkinson's Disease Classification Data Set} \\
  \url{https://archive.ics.uci.edu/ml/datasets/Parkinsons+Telemonitoring}
}

\references{
  Tsanas, A., Little, M.A., McSharry, P.E., & Ramig, L.O. (2010).
  Accurate telemonitoring of Parkinson's disease progression by noninvasive speech tests.
  \emph{IEEE Transactions on Biomedical Engineering}, 57(4), 884–893.
}

\examples{
data(Parkinsons_Features)

if (all(startsWith(names(Parkinsons_Features), "V"))) {
  colnames(Parkinsons_Features) <- Parkinsons_Features[1, ]
  Parkinsons_Features <- Parkinsons_Features[-1, ]
}

Parkinsons_Features[] <- lapply(Parkinsons_Features, type.convert, as.is = TRUE)

summary(Parkinsons_Features$motor_UPDRS)
boxplot(motor_UPDRS ~ sex, data = Parkinsons_Features,
        main = "Motor UPDRS by Sex", ylab = "Motor UPDRS")
}

\keyword{datasets}
