#' Get an overview of the given root scans
#'
#' \code{getOverviewInput} - This function filters a set of root scan
#' directories by checking if they comply with the given naming convention and
#' then returns overview data about these directories.
#' @param data_dir (Optional, default = NULL) String specifying the name
#' (full path) of the directory containing all root scan directories of
#' interest.
#' @param data_dirs (Optional, default = NULL) Character vector specifying all
#' of the individual root scan directories of interest. This is only used if
#' \code{data_dir} is set to NULL.
#' @param naming_conv A string specifying the naming convention, i.e., what
#' information is provided within the names of the root scans. The file format
#' can be png, tiff, jpg, or jpeg (upper or lowercase).
#' Available are: \cr
#' - "standard" (default): This is a commonly used naming convention and has the
#' following structure: \cr
#' project_tube_depth_date_ID_session_operator \cr
#' Example: \cr
#' Testproject_T007_L004_12.12.2025_123394_016_Testoperator.jpg \cr
#' Explanation of the abbreviations: \cr
#' -- project: Project name ('unlimited' letters or digits)\cr
#' -- tube: ID of the minirhizotron ("T"+3 digits) \cr
#' -- depth: ID of the depth-level/window ("L"+3 digits) \cr
#' -- date: Date of the scanning (format day.month.year,
#' 2 digits +"."+ 2 digits +"."+ 2 or 4 digits) \cr
#' -- ID: ID of the scan (6 digits) or timecode 14:23:10 -> 142310 \cr
#' -- session: ID of the scan session (3 digits). \cr
#' -- operator: ID of the person that scanned the root ('unlimited'
#' letters or digits)
#'
#' @return \code{getOverviewInput} A data frame containing the information
#' about the various root scan directories (see also
#' \code{getDepthLvlInfo()} for further explanations).
#'
#' @export
#' @rdname getOverviewInput
#'
#' @examples
#' getOverviewInput(data_dir = NULL, naming_conv = "standard")
getOverviewInput <- function(data_dir, data_dirs = NULL,
                             naming_conv = "standard"){

  # Initialize the data frame.
  df_dirs <- data.frame(
    "dir_name_full" = character(),
    "dir_name" = character(),
    "project" = character(),
    "tube" = character(),
    "depth_window" = integer(),
    "date" = as.Date(character(), format = "%d.%m.%y"),
    "ID" = integer(),
    "session" = integer(),
    "operator" = character(),
    "top_side" = character(),
    "depth_highpoint_cm" = numeric(),
    "pos_highpoint_px" = character(),
    "tube_angle" = numeric(),
    "ppcm" = numeric(),
    "ppi" = numeric(),
    "overlap_px" = numeric(),
    "max_shift_px" = numeric(),
    "gap_cm" = numeric(),
    "gap_deg" = numeric(),
    stringsAsFactors = FALSE
  )
  # Check the given data directory.
  if(!is.null(data_dir)){
    # List all sub-directories in the directory.
    dirs <- list.dirs(data_dir, full.names = TRUE, recursive = FALSE)
  } else if(!is.null(data_dirs)){
    dirs <- data_dirs
  } else {
    message("No directories specified. Returning empty data frame.\n")
    return(df_dirs)
  }
  dirs <- standardizePaths(dirs)

  # Define reg-ex pattern to match the naming convention
  pattern <- NULL
  if(naming_conv == "standard"){
    pattern <- paste0("^",                             # Start, (full match 1)
                      "([A-Za-z0-9 ]+)_",               # project 2
                      "(T\\d{3})_L(\\d{3})_",          # tube 3, depth 4
                      "(\\d{2}\\.\\d{2}\\.(?:\\d{2}|\\d{4}))_", # date 5
                      "(\\d{6})_(\\d{3})_",            # ID 6, session 7
                      "([A-Za-z0-9]+)",                # operator 8
                      "\\.(png|tiff|jpg|jpeg)$")       # file extension 9
  } else {
    stop("Unknown naming convention.")
  }
  # Filter files matching the pattern.
  matching_dirs <- dirs[stringr::str_detect(basename(dirs), pattern)]
  # Extract components.
  if (length(matching_dirs) > 0) {
    # A matrix, where each column corresponds to a captured group.
    extracted_data <- stringr::str_match(basename(matching_dirs),
                                         stringr::regex(pattern,
                                                        ignore_case = TRUE))
    # Create the data frame with information.
    df_dirs <- data.frame(
      "dir_name_full" = as.character(matching_dirs),
      "dir_name" = as.character(extracted_data[,1]),
      "project" = as.character(extracted_data[,2]),
      "tube" = as.character(extracted_data[,3]),
      "depth_window" = as.integer(extracted_data[,4]),
      "date" = as.Date(as.character(extracted_data[,5]), format = "%d.%m.%y"),
      "ID" = as.integer(extracted_data[,6]),
      "session" = as.integer(extracted_data[,7]),
      "operator" = as.character(extracted_data[,8]),
      "top_side" = as.character(rep(NA, length(matching_dirs))),
      "depth_highpoint_cm" = as.numeric(rep(NA, length(matching_dirs))),
      "pos_highpoint_px" = as.character(rep(NA, length(matching_dirs))),
      "tube_angle" = as.numeric(rep(NA, length(matching_dirs))),
      "ppcm" = as.numeric(rep(NA, length(matching_dirs))),
      "ppi" = as.numeric(rep(NA, length(matching_dirs))),
      "overlap_px" = as.numeric(rep(NA, length(matching_dirs))),
      "max_shift_px" = as.numeric(rep(NA, length(matching_dirs))),
      "gap_cm" = as.numeric(rep(NA, length(matching_dirs))),
      "gap_deg" = as.numeric(rep(NA, length(matching_dirs))),
      stringsAsFactors = FALSE
    )
    message(paste(length(matching_dirs),"of the",length(dirs),
                  "directories matched the naming convention."))
    return(df_dirs)
  } else {
    message(paste("No directories matched the naming convention.",
                  "Returning empty data frame."))
    return(df_dirs)
  }
}

# Function which standardizes a vector of paths.
standardizePaths <- function(paths){
  paths <- gsub("\\", "/", paths, fixed = TRUE)
  # Remove / if necessary to simplify the next steps:
  paths <- unname(sapply(paths,
                         FUN = function(X){
                              if(substr(X, nchar(X), nchar(X))=="/"){
                                X <- substr(X, 1, nchar(X)-1)
                              }
                              return(X)}))
  return(paths)
}
