% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spatial_nndm_cv.R
\name{spatial_nndm_cv}
\alias{spatial_nndm_cv}
\title{Nearest neighbor distance matching (NNDM) cross-validation}
\usage{
spatial_nndm_cv(
  data,
  prediction_sites,
  ...,
  autocorrelation_range = NULL,
  prediction_sample_size = 1000,
  min_analysis_proportion = 0.5
)
}
\arguments{
\item{data}{An object of class \code{sf} or \code{sfc}.}

\item{prediction_sites}{An \code{sf} or \code{sfc} object describing the areas to be
predicted. If \code{prediction_sites} are all points, then those points are
treated as the intended prediction points when calculating target nearest
neighbor distances. If \code{prediction_sites} is a single (multi-)polygon, then
points are sampled from within the boundaries of that polygon. Otherwise,
if \code{prediction_sites} is of length > 1 and not made up of points,
then points are sampled from within the bounding box of \code{prediction_sites}
and used as the intended prediction points.}

\item{...}{Additional arguments passed to \code{\link[sf:st_sample]{sf::st_sample()}}. Note that the
number of points to sample is controlled by \code{prediction_sample_size}; trying
to pass \code{size} via \code{...} will cause an error.}

\item{autocorrelation_range}{A numeric of length 1 representing the landscape
autocorrelation range ("phi" in the terminology of Milà et al. (2022)). If
\code{NULL}, the default, the autocorrelation range is assumed to be the distance
between the opposite corners of the bounding box of \code{prediction_sites}.}

\item{prediction_sample_size}{A numeric of length 1: the number of points to
sample when \code{prediction_sites} is not only composed of points. Note that this
argument is passed to \code{size} in \code{\link[sf:st_sample]{sf::st_sample()}}, meaning that no elements
of \code{...} can be named \code{size}.}

\item{min_analysis_proportion}{The minimum proportion of \code{data} that must
remain after removing points to match nearest neighbor distances. This
function will stop removing data from analysis sets once only
\code{min_analysis_proportion} of the original data remains in analysis sets, even
if the nearest neighbor distances between analysis and assessment sets are
still lower than those between training and prediction locations.}
}
\value{
A tibble with classes \code{spatial_nndm_cv},  \code{spatial_rset}, \code{rset},
\code{tbl_df}, \code{tbl}, and \code{data.frame}. The results include a column for the
data split objects and an identification variable \code{id}.
}
\description{
NNDM is a variant of leave-one-out cross-validation which assigns each
observation to a single assessment fold, and then attempts to remove data
from each analysis fold until the nearest neighbor distance distribution
between assessment and analysis folds matches the nearest neighbor distance
distribution between training data and the locations a model will be used to
predict.
Proposed by Milà et al. (2022), this method aims to provide accurate
estimates of how well models will perform in the locations they will actually
be predicting. This method was originally implemented in the CAST package.
}
\details{
Note that, as a form of leave-one-out cross-validation, this method can be
rather slow for larger data (and fitting models to these resamples will be
even slower).
}
\examples{
\dontshow{if (rlang::is_installed("modeldata")) withAutoprint(\{ # examplesIf}
data(ames, package = "modeldata")
ames_sf <- sf::st_as_sf(ames, coords = c("Longitude", "Latitude"), crs = 4326)

# Using a small subset of the data, to make the example run faster:
spatial_nndm_cv(ames_sf[1:100, ], ames_sf[2001:2100, ])
\dontshow{\}) # examplesIf}
}
\references{
C. Milà, J. Mateu, E. Pebesma, and H. Meyer. 2022. "Nearest Neighbour
Distance Matching Leave-One-Out Cross-Validation for map validation." Methods
in Ecology and Evolution 2022:13, pp 1304– 1316.
doi: 10.1111/2041-210X.13851.

H. Meyer and E. Pebesma. 2022. "Machine learning-based global maps of
ecological variables and the challenge of assessing them."
Nature Communications 13, pp 2208. doi: 10.1038/s41467-022-29838-9.
}
