% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csv_to_parquet.R
\name{csv_to_parquet}
\alias{csv_to_parquet}
\title{Convert a csv or a txt file to parquet format}
\usage{
csv_to_parquet(
  path_to_file,
  url_to_csv = lifecycle::deprecated(),
  csv_as_a_zip = lifecycle::deprecated(),
  filename_in_zip,
  path_to_parquet,
  columns = "all",
  compression = "snappy",
  compression_level = NULL,
  partition = "no",
  encoding = "UTF-8",
  read_delim_args = list(),
  ...
)
}
\arguments{
\item{path_to_file}{String that indicates the path to the input file (don't forget the extension).}

\item{url_to_csv}{DEPRECATED use path_to_file instead}

\item{csv_as_a_zip}{DEPRECATED}

\item{filename_in_zip}{name of the csv/txt file in the zip. Required if several csv/txt are included in the zip.}

\item{path_to_parquet}{String that indicates the path to the directory where the parquet files will be stored.}

\item{columns}{Character vector of columns to select from the input file (by default, all columns are selected).}

\item{compression}{compression algorithm. Default "snappy".}

\item{compression_level}{compression level. Meaning depends on compression algorithm.}

\item{partition}{String ("yes" or "no" - by default) that indicates whether you want to create a partitioned parquet file.
If "yes", \code{"partitioning"} argument must be filled in. In this case, a folder will be created for each modality of the variable filled in \code{"partitioning"}.
Be careful, this argument can not be "yes" if \code{max_memory} or \code{max_rows} argument are not NULL.}

\item{encoding}{String that indicates the character encoding for the input file.}

\item{read_delim_args}{list of arguments for \code{read_delim}.}

\item{...}{additional format-specific arguments, see \href{https://arrow.apache.org/docs/r/reference/write_parquet.html}{arrow::write_parquet()}
and \href{https://arrow.apache.org/docs/r/reference/write_dataset.html}{arrow::write_dataset()} for more informations.}
}
\value{
A parquet file, invisibly
}
\description{
This function allows to convert a csv or a txt file to parquet format. \cr

Two conversions possibilities are offered :

\itemize{

\item{Convert to a single parquet file. Argument \code{path_to_parquet} must then be used;}
\item{Convert to a partitioned parquet file. Additionnal arguments \code{partition} and \code{partitioning} must then be used;}

}
}
\note{
Be careful, if the zip size exceeds 4 GB, the function may truncate
the data (because unzip() won't work reliably in this case -
see \href{https://rdrr.io/r/utils/unzip.html}{here}).
In this case, it's advised to unzip your csv/txt file by hand
(for example with \href{https://www.7-zip.org/}{7-Zip})
then use the function with the argument \code{path_to_file}.
}
\examples{

# Conversion from a local csv file to a single parquet file :

csv_to_parquet(
  path_to_file = parquetize_example("region_2022.csv"),
  path_to_parquet = tempfile(fileext=".parquet")
)

# Conversion from a local txt file to a single parquet file :

csv_to_parquet(
  path_to_file = parquetize_example("region_2022.txt"),
  path_to_parquet = tempfile(fileext=".parquet")
)

# Conversion from a local csv file to a single parquet file and select only
# few columns :

csv_to_parquet(
  path_to_file = parquetize_example("region_2022.csv"),
  path_to_parquet = tempfile(fileext = ".parquet"),
  columns = c("REG","LIBELLE")
)

# Conversion from a local csv file to a partitioned parquet file  :

csv_to_parquet(
  path_to_file = parquetize_example("region_2022.csv"),
  path_to_parquet = tempfile(fileext = ".parquet"),
  partition = "yes",
  partitioning =  c("REG")
)

# Conversion from a URL and a zipped file (csv) :

csv_to_parquet(
  path_to_file = "https://www.nomisweb.co.uk/output/census/2021/census2021-ts007.zip",
  filename_in_zip = "census2021-ts007-ctry.csv",
  path_to_parquet = tempfile(fileext = ".parquet")
)

\dontrun{
# Conversion from a URL and a zipped file (txt) :

csv_to_parquet(
  path_to_file = "https://sourceforge.net/projects/irisdss/files/latest/download",
  filename_in_zip = "IRIS TEST data.txt",
  path_to_parquet = tempfile(fileext=".parquet")
)

# Conversion from a URL and a csv file with "gzip" compression :

csv_to_parquet(
  path_to_file =
  "https://github.com/sidsriv/Introduction-to-Data-Science-in-python/raw/master/census.csv",
  path_to_parquet = tempfile(fileext = ".parquet"),
  compression = "gzip",
  compression_level = 5
)
}
}
