% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/string-cleaning.R
\name{diagnose_strings}
\alias{diagnose_strings}
\alias{print.diagnose_strings}
\title{Diagnose String Column Quality}
\usage{
diagnose_strings(x, name = NULL)

\method{print}{diagnose_strings}(x, ...)
}
\arguments{
\item{x}{Character vector to diagnose.}

\item{name}{Optional name for the variable (used in output). If \code{NULL},
captures the variable name from the call.}

\item{...}{Additional arguments (currently unused).}
}
\value{
An S3 object of class \code{diagnose_strings} containing:
\describe{
\item{name}{Name of the variable}
\item{n_total}{Total number of elements}
\item{n_na}{Count of NA values}
\item{n_empty}{Count of empty strings}
\item{n_whitespace_only}{Count of whitespace-only strings}
\item{n_leading_ws}{Count of strings with leading whitespace}
\item{n_trailing_ws}{Count of strings with trailing whitespace}
\item{n_non_ascii}{Count of strings with non-ASCII characters}
\item{n_case_variants}{Number of unique values with case variants}
\item{n_case_variant_groups}{Number of groups of case-insensitive duplicates}
\item{case_variant_examples}{Data.frame with examples of case variants}
}
}
\description{
Audits a character vector for common data quality issues including missing
values, empty strings, whitespace problems, non-ASCII characters, and case
inconsistencies. Requires the stringi package (in Suggests).
}
\examples{
firms <- c("Apple", "APPLE", "apple", "  Microsoft ", "Google", NA, "")
diagnose_strings(firms)

}
\seealso{
Other data quality: 
\code{\link{audit_transform}()},
\code{\link{diagnose_nas}()},
\code{\link{get_summary_table}()},
\code{\link{summarize_column}()}
}
\concept{data quality}
