% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bm_CrossValidation.R
\name{bm_CrossValidation}
\alias{bm_CrossValidation}
\alias{bm_CrossValidation_user.defined}
\alias{bm_CrossValidation_user.defined,BIOMOD.formated.data-method}
\alias{bm_CrossValidation_user.defined,BIOMOD.formated.data.PA-method}
\alias{bm_CrossValidation_random}
\alias{bm_CrossValidation_random,BIOMOD.formated.data-method}
\alias{bm_CrossValidation_random,BIOMOD.formated.data.PA-method}
\alias{bm_CrossValidation_kfold}
\alias{bm_CrossValidation_kfold,BIOMOD.formated.data-method}
\alias{bm_CrossValidation_kfold,BIOMOD.formated.data.PA-method}
\alias{bm_CrossValidation_block}
\alias{bm_CrossValidation_block,BIOMOD.formated.data-method}
\alias{bm_CrossValidation_block,BIOMOD.formated.data.PA-method}
\alias{bm_CrossValidation_strat}
\alias{bm_CrossValidation_strat,BIOMOD.formated.data-method}
\alias{bm_CrossValidation_strat,BIOMOD.formated.data.PA-method}
\alias{bm_CrossValidation_env}
\alias{bm_CrossValidation_env,BIOMOD.formated.data-method}
\alias{bm_CrossValidation_env,BIOMOD.formated.data.PA-method}
\title{Build cross-validation table}
\usage{
bm_CrossValidation(
  bm.format,
  strategy = "random",
  nb.rep = 0,
  perc = 0.8,
  k = 0,
  balance = "presences",
  env.var = NULL,
  strat = "both",
  user.table = NULL,
  do.full.models = FALSE
)

bm_CrossValidation_user.defined(bm.format, ...)

\S4method{bm_CrossValidation_user.defined}{BIOMOD.formated.data}(bm.format, user.table)

\S4method{bm_CrossValidation_user.defined}{BIOMOD.formated.data.PA}(bm.format, user.table)

bm_CrossValidation_random(bm.format, ...)

\S4method{bm_CrossValidation_random}{BIOMOD.formated.data}(bm.format, nb.rep, perc)

\S4method{bm_CrossValidation_random}{BIOMOD.formated.data.PA}(bm.format, nb.rep, perc)

bm_CrossValidation_kfold(bm.format, ...)

\S4method{bm_CrossValidation_kfold}{BIOMOD.formated.data}(bm.format, nb.rep, k)

\S4method{bm_CrossValidation_kfold}{BIOMOD.formated.data.PA}(bm.format, nb.rep, k)

bm_CrossValidation_block(bm.format, ...)

\S4method{bm_CrossValidation_block}{BIOMOD.formated.data}(bm.format)

\S4method{bm_CrossValidation_block}{BIOMOD.formated.data.PA}(bm.format)

bm_CrossValidation_strat(bm.format, ...)

\S4method{bm_CrossValidation_strat}{BIOMOD.formated.data}(bm.format, balance, strat, k)

\S4method{bm_CrossValidation_strat}{BIOMOD.formated.data.PA}(bm.format, balance, strat, k)

bm_CrossValidation_env(bm.format, ...)

\S4method{bm_CrossValidation_env}{BIOMOD.formated.data}(bm.format, balance, k, env.var)

\S4method{bm_CrossValidation_env}{BIOMOD.formated.data.PA}(bm.format, balance, k, env.var)
}
\arguments{
\item{bm.format}{a \code{\link{BIOMOD.formated.data}} or \code{\link{BIOMOD.formated.data.PA}} 
object returned by the \code{\link{BIOMOD_FormatingData}} function}

\item{strategy}{a \code{character} corresponding to the cross-validation selection strategy, 
must be among \code{random}, \code{kfold}, \code{block}, \code{strat}, \code{env} or 
\code{user.defined}}

\item{nb.rep}{(\emph{optional, default} \code{0}) \cr
If \code{strategy = 'random'} or \code{strategy = 'kfold'}, an \code{integer} corresponding 
to the number of sets (repetitions) of cross-validation points that will be drawn}

\item{perc}{(\emph{optional, default} \code{0}) \cr
If \code{strategy = 'random'}, a \code{numeric} between \code{0} and \code{1} defining the 
percentage of data that will be kept for calibration}

\item{k}{(\emph{optional, default} \code{0}) \cr
If \code{strategy = 'kfold'} or \code{strategy = 'strat'} or \code{strategy = 'env'}, an 
\code{integer} corresponding to the number of partitions}

\item{balance}{(\emph{optional, default} \code{'presences'}) \cr 
If \code{strategy = 'strat'} or \code{strategy = 'env'}, a \code{character} corresponding 
to how data will be balanced between partitions, must be either \code{presences} or 
\code{absence}}

\item{env.var}{(\emph{optional}) \cr 
If \code{strategy = 'env'}, a \code{character} corresponding to the environmental variables 
used to build the partition. \code{k} partitions will be built for each environmental 
variables. \emph{By default the function uses all environmental variables available.}}

\item{strat}{(\emph{optional, default} \code{'both'}) \cr 
If \code{strategy = 'env'}, a \code{character} corresponding to how data will partitioned 
along gradient, must be among \code{x}, \code{y}, \code{both}}

\item{user.table}{(\emph{optional, default} \code{NULL}) \cr
If \code{strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} defining for each 
repetition (in columns) which observation lines should be used for models calibration 
(\code{TRUE}) and validation (\code{FALSE})}

\item{do.full.models}{(\emph{optional, default} \code{TRUE}) \cr
A \code{logical} value defining whether models should be also calibrated and validated over 
the whole dataset (and pseudo-absence datasets) or not}

\item{\ldots}{(\emph{optional, one or several of the listed above arguments depending on the 
selected method})}
}
\value{
A \code{matrix} or \code{data.frame} defining for each repetition (in columns) which 
observation lines should be used for models calibration (\code{TRUE}) and validation 
(\code{FALSE}).
}
\description{
This internal \pkg{biomod2} function allows to build a cross-validation table 
according to 6 different methods : \code{random}, \code{kfold}, \code{block}, \code{strat}, 
\code{env} or \code{user.defined} (see Details).
}
\details{
Several parameters are available within the function and some of them can be used with 
different cross-validation strategies :

\code{| ....... | random | kfold | block | strat | env |} \cr
__________________________________________________ \cr
\code{| nb.rep. | x..... | x.... | ..... | ..... | ... |} \cr
\code{| perc... | x..... | ..... | ..... | ..... | ... |} \cr
\code{| k...... | ...... | x.... | ..... | x.... | x.. |} \cr
\code{| balance | ...... | ..... | ..... | x.... | x.. |} \cr
\code{| strat.. | ...... | ..... | ..... | x.... | ... |} \cr \cr \cr


\bold{Concerning column names of \code{matrix} output :}

The number of columns depends on the strategy selected. 
The column names are given \emph{a posteriori} of the selection, ranging from 1 to the 
number of columns. 
If \code{do.full.models = TRUE}, columns merging runs (and/or pseudo-absence datasets) 
are added at the end. \cr \cr


\bold{Concerning cross-validation strategies :}

\describe{
  \item{random}{Most simple method to calibrate and validate a model is to split the original 
  dataset in two datasets : one to calibrate the model and the other one to validate it. The 
  splitting can be repeated \code{nb.rep} times.}
  \item{k-fold}{The k-fold method splits the original dataset in \code{k} datasets of equal 
  sizes : each part is used successively as the validation dataset while the other \code{k-1} 
  parts are used for the calibration, leading to \code{k} calibration/validation ensembles. 
  This multiple splitting can be repeated \code{nb.rep} times.}
  \item{block}{It may be used to test for model overfitting and to assess transferability in 
  geographic space. \code{block} stratification was described in \emph{Muscarella et al. 2014} 
  (see References). Four bins of equal size are partitioned (bottom-left, bottom-right, 
  top-left and top-right).}
  \item{stratified}{It may be used to test for model overfitting and to assess transferability 
  in geographic space. \code{x} and \code{y} stratification was described in \emph{Wenger and 
  Olden 2012} (see References). \code{y} stratification uses \code{k} partitions along the 
  y-gradient, \code{x} stratification does the same for the x-gradient. \code{both} returns 
  \code{2k} partitions: \code{k} partitions stratified along the x-gradient and \code{k} 
  partitions stratified along the y-gradient.}
  \item{environmental}{It may be used to test for model overfitting and to assess 
  transferability in environmental space. It returns \code{k} partitions for each variable 
  given in \code{env.var}.}
  \item{user-defined}{Allow the user to give its own crossvalidation table. For a 
  presence-absence dataset, column names must be formatted as: \code{_allData_RUNx} with 
  \code{x} an integer. For a presence-only dataset for which several pseudo-absence dataset 
  were generated, column names must be formatted as: \code{_PAx_RUNy} with \code{x} an 
  integer and \code{PAx} an existing pseudo-absence dataset and \code{y} an integer \cr \cr}
}


\bold{Concerning balance parameter :}

If \code{balance = 'presences'}, presences are divided (balanced) equally over the partitions 
(e.g. \emph{Fig. 1b in Muscarelly et al. 2014}). 
Absences or pseudo-absences will however be unbalanced over the partitions especially if the 
presences are clumped on an edge of the study area.

If \code{balance = 'absences'}, absences (resp. pseudo-absences or background) are divided 
(balanced) as equally as possible between the partitions (geographical balanced bins given 
that absences are spread over the study area equally, approach similar to \emph{Fig. 1 in 
Wenger et Olden 2012}). Presences will however be unbalanced over the partitions especially
if the presences are clumped on an edge of the study area.
}
\examples{
library(terra)

# Load species occurrences (6 species available)
data(DataSpecies)
head(DataSpecies)

# Select the name of the studied species
myRespName <- 'GuloGulo'

# Get corresponding presence/absence data
myResp <- as.numeric(DataSpecies[, myRespName])

# Get corresponding XY coordinates
myRespXY <- DataSpecies[, c('X_WGS84', 'Y_WGS84')]

# Load environmental variables extracted from BIOCLIM (bio_3, bio_4, bio_7, bio_11 & bio_12)
data(bioclim_current)
myExpl <- terra::rast(bioclim_current)

\dontshow{
myExtent <- terra::ext(0,30,45,70)
myExpl <- terra::crop(myExpl, myExtent)
}

# --------------------------------------------------------------- #
# Format Data with true absences
myBiomodData <- BIOMOD_FormatingData(resp.name = myRespName,
                                     resp.var = myResp,
                                     resp.xy = myRespXY,
                                     expl.var = myExpl)

# --------------------------------------------------------------- #
# Create the different validation datasets

# random selection
cv.r <- bm_CrossValidation(bm.format = myBiomodData,
                           strategy = "random",
                           nb.rep = 3,
                           k = 0.8)

# k-fold selection
cv.k <- bm_CrossValidation(bm.format = myBiomodData,
                           strategy = "kfold",
                           nb.rep = 2,
                           k = 3)

# block selection
cv.b <- bm_CrossValidation(bm.format = myBiomodData,
                           strategy = "block")

# stratified selection (geographic)
cv.s <- bm_CrossValidation(bm.format = myBiomodData,
                           strategy = "strat",
                           k = 2,
                           balance = "presences",
                           strat = "x")

# stratified selection (environmental)
cv.e <- bm_CrossValidation(bm.format = myBiomodData,
                           strategy = "env",
                           k = 2,
                           balance = "presences")

head(cv.r)
apply(cv.r, 2, table)
head(cv.k)
apply(cv.k, 2, table)
head(cv.b)
apply(cv.b, 2, table)
head(cv.s)
apply(cv.s, 2, table)
head(cv.e)
apply(cv.e, 2, table)


}
\references{
\itemize{
  \item Muscarella R, Galante PJ, Soley-Guardia M, Boria RA, Kass JM, Uriarte M, Anderson 
  RP (\bold{2014}). \emph{ENMeval: An R package for conducting spatially independent 
  evaluations and estimating optimal model complexity for Maxent ecological niche models.} 
  Methods in Ecology and Evolution, 5, 1198-1205. \doi{10.1111/2041-210X.12261}
  \item Wenger SJ and Olden JD (\bold{2012}). \emph{Assessing transferability of ecological 
  models: an underappreciated aspect of statistical validation.} Methods in Ecology and 
  Evolution, 3, 260-267. \doi{10.1111/j.2041-210X.2011.00170.x}
}
}
\seealso{
\code{\link[ENMeval]{get.block}}, \code{\link[dismo]{kfold}}, 
\code{\link{BIOMOD_FormatingData}}, \code{\link{BIOMOD_Modeling}}

Other Secondary functions: 
\code{\link{bm_BinaryTransformation}()},
\code{\link{bm_FindOptimStat}()},
\code{\link{bm_MakeFormula}()},
\code{\link{bm_ModelAnalysis}()},
\code{\link{bm_ModelingOptions}()},
\code{\link{bm_PlotEvalBoxplot}()},
\code{\link{bm_PlotEvalMean}()},
\code{\link{bm_PlotRangeSize}()},
\code{\link{bm_PlotResponseCurves}()},
\code{\link{bm_PlotVarImpBoxplot}()},
\code{\link{bm_PseudoAbsences}()},
\code{\link{bm_RangeSize}()},
\code{\link{bm_RunModelsLoop}()},
\code{\link{bm_SRE}()},
\code{\link{bm_SampleBinaryVector}()},
\code{\link{bm_SampleFactorLevels}()},
\code{\link{bm_Tuning}()},
\code{\link{bm_VariablesImportance}()}
}
\author{
Maya Guéguen
}
\concept{Secondary functions}
