#' Covariance estimator with missing data
#'
#' @description
#' Estimates the covariance matrix of a design matrix in the presence of
#' missing values. Each covariance entry is computed using all observations
#' for which the corresponding pair of covariates is jointly observed.
#'
#' @usage
#' estimate_cov_raw(X)
#'
#' @param X
#' Numeric matrix (or object coercible to a matrix) containing covariates.
#' Rows correspond to observations and columns to variables. Missing values
#' (\code{NA}) are allowed.
#'
#' @details
#' Let \eqn{X_{ij}} denote the \eqn{j}-th covariate for observation \eqn{i}.
#' For each pair of variables \eqn{(j, k)}, the covariance estimate is
#' \deqn{
#' \hat{\Sigma}_{jk}
#' =
#' \frac{1}{n_{jk}} \sum_{i : X_{ij}, X_{ik} \ \mathrm{observed}} X_{ij} X_{ik},
#' }
#' where \eqn{n_{jk}} is the number of observations for which both entries are
#' observed. If no such observations exist, the corresponding covariance entry
#' is set to \code{NA}.
#'
#' This estimator is symmetric by construction and reduces to the usual sample
#' second-moment matrix when the data contain no missing values.
#'
#' @return
#' A numeric \code{p x p} matrix containing the estimated covariance matrix,
#' where \code{p = ncol(X)}. Entries corresponding to variable pairs that are
#' never jointly observed are \code{NA}.
#'
#' @examples
#' set.seed(1)
#' X <- matrix(rnorm(25), 25, 5)
#' X[sample(length(X), 10)] <- NA
#' Sigma_hat <- estimate_cov_raw(X)
#' Sigma_hat
#'
#'
#' @export
estimate_cov_raw <- function(X) {
  # Ensure matrix
  if (!is.matrix(X)) X <- as.matrix(X)
  p <- ncol(X)
  if (p == 0) return(matrix(numeric(0), 0, 0))
  
  # Replace NA with 0 for sum-of-products
  X0 <- X
  X0[is.na(X0)] <- 0
  
  # Indicator of observed entries
  obs <- !is.na(X)
  
  # Sum of products and count matrices 
  sum_mat   <- crossprod(X0)    # t(X0) %*% X0
  count_mat <- crossprod(obs)   # t(obs) %*% obs  (counts of pairs observed)
  
  # Build Sigma: elementwise division where count > 0
  Sigma_hat <- matrix(NA_real_, nrow = p, ncol = p)
  positive <- count_mat > 0
  Sigma_hat[positive] <- sum_mat[positive] / count_mat[positive]
  
  # Symmetrize for numerical safety (should already be symmetric)
  Sigma_hat[lower.tri(Sigma_hat)] <- t(Sigma_hat)[lower.tri(Sigma_hat)]
  
  return(Sigma_hat)
}
