#' Generate Response
#'
#' @description
#' randomly generate response matrix according to certain conditions,
#' including attributes distribution, item quality, sample size, Q-matrix and cognitive diagnosis models (CDMs).
#'
#' @param Q The Q-matrix. A random 30 × 5 Q-matrix (\code{\link[Qval]{sim.Q}}) will be used if \code{Q = NULL}.
#' @param N Sample size. Default = 500.
#' @param IQ A list containing two \eqn{I}-length vectors: \code{P0} and \code{P1}.  
#'           \code{P0} represents the probability of examinees who have not mastered any attributes  
#'           (\eqn{[00...0]}) correctly answering the item, while \code{P1} represents the probability  
#'           of examinees who have mastered all attributes (\eqn{[11...1]}) correctly answering the item.  
#' @param att.str Specify attribute structures. \code{NULL}, by default, means there is no structure. Attribute structure 
#'                needs be specified as a list - which will be internally handled by \code{\link[GDINA]{att.structure}} function. 
#'                See examples. It can also be a matrix giving all permissible attribute profiles.
#' @param model Type of model to be fitted; can be \code{"GDINA"}, \code{"LCDM"}, \code{"DINA"}, \code{"DINO"},
#'              \code{"ACDM"}, \code{"LLM"}, or \code{"rRUM"}.
#' @param distribute Attribute distributions; can be \code{"uniform"} for the uniform distribution,
#'                   \code{"mvnorm"} for the multivariate normal distribution (Chiu, Douglas, & Li,
#'                   2009) and \code{"horder"} for the higher-order distribution (Tu et al., 2022).
#' @param control A list of control parameters with elements:
#' \itemize{
#'     \item \code{sigma}  A positive-definite symmetric matrix specifying the variance-covariance
#'                        matrix when \code{distribute = "mvnorm"}. Default = 0.5 (Chiu, Douglas, & Li, 2009).
#'     \item \code{cutoffs}  A vector giving the cutoff for each attribute when \code{distribute = "mvnorm"}.
#'                          Default = \eqn{k/(1+K)} (Chiu, Douglas, & Li, 2009).
#'     \item \code{theta} A vector of length N representing the higher-order ability for each examinee.
#'                       By default, generate randomly from the standard normal distribution (Tu et al, 2022).
#'     \item \code{a} The slopes for the higher-order model when \code{distribute = "horder"}.
#'                   Default = 1.5 (Tu et al, 2022).
#'     \item \code{b} The intercepts when \code{distribute = "horder"}. By default, select equally spaced
#'                   values between -1.5 and 1.5 according to the number of attributes (Tu et al, 2022).
#'     \item \code{alpha} Used to generate a structured parameter distribution with a hierarchical structure 
#'                        when att.str is not NULL. This distribution is randomly drawn from a Dirichlet 
#'                        distribution, where alpha denotes the parameters of the Dirichlet distribution, 
#'                        and its length equals the number \code{L.str} of all valid attribute profiles 
#'                        \eqn{\boldsymbol{\alpha}} under the hierarchical structure. Default by .
#'                        \code{alpha = rep(1, L.str)}.
#'  }
#' @param verbose Logical indicating to print information or not. Default is \code{TRUE}
#'
#' @return Object of class \code{sim.data}.
#' An \code{sim.data} object initially gained by \code{\link[GDINA]{simGDINA}} function form \code{GDINA} package.
#' Elements that can be extracted using method extract include:
#' \describe{
#'  \item{\code{dat}}{An \code{N} × \code{I} simulated item response matrix.}
#'  \item{\code{Q}}{The Q-matrix.}
#'  \item{\code{attribute}}{An \code{N} × \code{K} matrix for inviduals' attribute patterns.}
#'  \item{\code{catprob.parm}}{A list of non-zero success probabilities for each attribute mastery pattern.}
#'  \item{\code{delta.parm}}{A list of delta parameters.}
#'  \item{\code{higher.order.parm}}{Higher-order parameters.}
#'  \item{\code{mvnorm.parm}}{Multivariate normal distribution parameters.}
#'  \item{\code{LCprob.parm}}{A matrix of success probabilities for each attribute mastery pattern.}
#' }
#'
#' @author Haijiang Qin <Haijiang133@outlook.com>
#'
#' @references
#' Chiu, C.-Y., Douglas, J. A., & Li, X. (2009). Cluster Analysis for Cognitive Diagnosis: Theory and Applications. Psychometrika, 74(4), 633-665. DOI: 10.1007/s11336-009-9125-0.
#'
#' Tu, D., Chiu, J., Ma, W., Wang, D., Cai, Y., & Ouyang, X. (2022). A multiple logistic regression-based (MLR-B) Q-matrix validation method for cognitive diagnosis models:A confirmatory approach. Behavior Research Methods. DOI: 10.3758/s13428-022-01880-x.
#'
#' @examples
#'
#'################################################################
#'#                           Example 1                          #
#'#          generate data follow the uniform distrbution        #
#'################################################################
#' library(Qval)
#'
#' set.seed(123)
#'
#' K <- 5
#' I <- 10
#' Q <- sim.Q(K, I)
#'
#' IQ <- list(
#'   P0 = runif(I, 0.0, 0.2),
#'   P1 = runif(I, 0.8, 1.0)
#' )
#'
#' data.obj <- sim.data(Q = Q, N = 100, IQ=IQ, model = "GDINA", distribute = "uniform")
#'
#' print(data.obj$dat)
#'
#'################################################################
#'#                           Example 2                          #
#'#          generate data follow the mvnorm distrbution         #
#'################################################################
#' set.seed(123)
#' K <- 5
#' I <- 10
#' Q <- sim.Q(K, I)
#'
#' IQ <- list(
#'   P0 = runif(I, 0.0, 0.2),
#'   P1 = runif(I, 0.8, 1.0)
#' )
#'
#' cutoffs <- sample(qnorm(c(1:K)/(K+1)), ncol(Q))
#' data.obj <- sim.data(Q = Q, N = 10, IQ=IQ, model = "GDINA", distribute = "mvnorm",
#'                  control = list(sigma = 0.5, cutoffs = cutoffs))
#'
#' print(data.obj$dat)
#'
#'#################################################################
#'#                            Example 3                          #
#'#           generate data follow the horder distrbution         #
#'#################################################################
#' set.seed(123)
#' K <- 5
#' I <- 10
#' Q <- sim.Q(K, I)
#'
#' IQ <- list(
#'   P0 = runif(I, 0.0, 0.2),
#'   P1 = runif(I, 0.8, 1.0)
#' )
#'
#' theta <- rnorm(10, 0, 1)
#' b <- seq(-1.5,1.5,length.out=K)
#' data.obj <- sim.data(Q = Q, N = 10, IQ=IQ, model = "GDINA", distribute = "horder",
#'                  control = list(theta = theta, a = 1.5, b = b))
#'
#' print(data.obj$dat)
#'
#' @export
#' @importFrom GDINA attributepattern att.structure simGDINA
#' @importFrom stats runif qnorm rnorm sd
#' @importFrom gtools  rdirichlet
#' 
sim.data <- function(Q=NULL, N=NULL, IQ=list(P0=NULL, P1=NULL), att.str=NULL, 
                     model="GDINA", distribute="uniform", control = NULL, 
                     verbose = TRUE){
  
  if(!is.null(att.str) & !class(att.str) %in% c("list", "matrix")){
    stop("att.str must be NULL, a list or a matrix ! \n")
  }else if(!is.null(att.str) & !model %in% c("GDINA", "DINA", "DINO") ){
    stop("model must be one of 'GDINA', 'DINA' and 'DINO' when att.str is not NULL !\n")
  }
  
  simCall <- match.call()
  
  if(is.null(Q))
    Q <- sim.Q(5, 30, att.str)
  K <- ncol(Q)
  I <- nrow(Q)
  if(is.null(N))
    N <- 500
  if(is.null(IQ$P0))
    IQ$P0 <- runif(I, 0.0, 0.3)
  if(is.null(IQ$P1))
    IQ$P1 <- runif(I, 0.7, 1.0)
  gs <- cbind(IQ$P0, 1 - IQ$P1)
  
  if(!is.null(att.str)){
    distribute <- paste0(sapply(att.str, 
                                function(x) return(paste0(paste0("A", x[1]), "->", paste0("A", x[2])))), 
                         collapse = ", ")
    wrapped.distribute <- strwrap(distribute, width = 80)
    if(verbose)
      cat("attribute structure: \n", paste(wrapped.distribute, collapse = "\n"), "\n")
  }
  
  if(verbose){
    if(is.null(att.str)){
      cat("distribute = ",distribute,"\n")
    }
    cat("model = ",model,"\n",
        "number of attributes: ", K, "\n",
        "number of items: ", I, "\n",
        "num of examinees: ", N, "\n",
        "average of P0 = ", round(mean(IQ$P0), 3), "\n",
        "average of P1 = ", round(mean(IQ$P1), 3), "\n")
  }
  
  if(!is.null(att.str)){
    pattern <- attributepattern(K)
    pattern.str <- att.structure(att.str, K)$att.str
    L <- nrow(pattern)
    
    pattern.str.pattern <- sort(apply(pattern.str, 1, function(x) get_Pattern(x, pattern)))
    att.prior <- rep(0, L)
    
    if(is.null(control$alpha)){
      control$alpha <- rep(3, nrow(pattern.str))
    }
    att.prior[pattern.str.pattern] <- rdirichlet(n = 1, alpha = control$alpha)
    
    att.prior.names <- apply(pattern[pattern.str.pattern, ], 1, function(x) paste(x, collapse=""))
    
    if (verbose) {
      alpha.str <- paste("Dirichlet's alpha =", paste(control$alpha, collapse = ", "))
      cat(paste(strwrap(alpha.str, width = 80), collapse = "\n"), "\n")
      names_vec <- att.prior.names
      values_vec <- round(att.prior[pattern.str.pattern], 3)
      max_name_width <- max(nchar(names_vec))
      lines <- mapply(function(name, val) {
        sprintf("%-*s : %.3f", max_name_width, name, val)
      }, names_vec, values_vec, SIMPLIFY = TRUE)
      cat("att.prior:\n")
      cat(lines, sep = "\n")
      cat("\n")
    }
    
    data <- simGDINA(N, Q, gs.parm = gs, model = model, att.prior=att.prior, att.dist = "categorical", 
                     gs.args = list(type = "random", mono.constraint = TRUE))
  }
  
  if(all(distribute == "mvnorm")) {
    if(is.null(control$sigma)){
      sigma <- 0.5
    }else{
      sigma <- control$sigma
    }
    if(is.null(control$cutoffs)){
      cutoffs <- sample(qnorm(c(1:K)/(K+1)), ncol(Q), replace = FALSE)
    }else{
      cutoffs <-control$cutoffs
    }
    if(verbose){
      cat("sigma =", round(sigma, 3), "\n", "cutoffs =", round(cutoffs, 3), "\n")
    }
    
    vcov <- matrix(sigma,K,K)
    diag(vcov) <- 1
    data <- simGDINA(N, Q, gs.parm = gs, model = model, att.dist = "mvnorm", 
                     gs.args = list(type = "random", mono.constraint = TRUE),
                     mvnorm.parm=list(mean = rep(0,K), sigma = vcov, cutoffs = cutoffs))
  }
  
  if(all(distribute == "horder")) {
    if(is.null(control$theta)){
      theta <- rnorm(N, 0, 1)
    }else{
      theta <- control$theta
    }
    if(is.null(control$a)){
      a <- runif(K,1.5, 1.5)
    }else{
      a <- control$a
    }
    if(is.null(control$b)){
      b <- sample(seq(-1.5,1.5,length.out=K), K, replace = FALSE)
    }else{
      b <-control$b
    }
    if(verbose){
      cat("theta_mean = ", round(mean(theta), 3), ", theta_sd =", round(sd(theta), 3), "\n",
          "a = ", round(a, 3), "\n", "b = ", round(b, 3), "\n")
    }

    data <- simGDINA(N, Q, gs.parm = gs, model = model, att.dist = "higher.order", 
                     gs.args = list(type = "random", mono.constraint = TRUE),
                     higher.order.parm = list(theta = theta,lambda = data.frame(a=a, b=b)))
  }

  if(all(distribute == "uniform")){
    data <- simGDINA(N, Q, gs.parm = gs, model = model, 
                     gs.args = list(type = "random", mono.constraint = TRUE))
  }
  
   dat = data$dat
   Q = data$Q
   attribute = data$attribute
   catprob.parm = data$catprob.parm
   delta.parm = data$delta.parm
   higher.order.parm = data$higher.order.parm
   mvnorm.parm = data$mvnorm.parm
   LCprob.parm = data$LCprob.parm
   
   out = list(dat=dat, Q=Q, 
              attribute=attribute, 
              catprob.parm=catprob.parm, 
              delta.parm=delta.parm, 
              higher.order.parm=higher.order.parm, 
              mvnorm.parm=mvnorm.parm, 
              att.str=att.str, 
              LCprob.parm=LCprob.parm, 
              call = simCall, 
              arguments = list(
                Q=Q, N=N, IQ=IQ,
                att.str=att.str, 
                model=model, 
                distribute=distribute, 
                control=control,
                verbose=verbose
              ))
   
   class(out) <- "sim.data"
  
  return(out)
}
