% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/parser.R
\name{curl_parse_url}
\alias{curl_parse_url}
\alias{curl_modify_url}
\title{Normalizing URL parser}
\usage{
curl_parse_url(
  url,
  baseurl = NULL,
  decode = TRUE,
  params = TRUE,
  default_scheme = FALSE
)

curl_modify_url(
  url = NULL,
  scheme = NULL,
  host = NULL,
  port = NULL,
  path = NULL,
  query = NULL,
  fragment = NULL,
  user = NULL,
  password = NULL,
  params = NULL
)
}
\arguments{
\item{url}{either URL string or list returned by \link{curl_parse_url}.
Use this to modify a URL using the other parameters.}

\item{baseurl}{use this as the parent if \code{url} may be a relative path}

\item{decode}{automatically \link[=curl_escape]{url-decode} output into the actual
values. If set to \code{FALSE}, values for \code{query}, \code{path}, \code{fragment}, \code{user} and \code{password} are returned in url-encoded format.}

\item{params}{named character vector with http GET parameters. This will automatically
be converted to \code{application/x-www-form-urlencoded} and override \code{query},}

\item{default_scheme}{when \code{url} is provided without a scheme prefix, assume \verb{https://}.}

\item{scheme}{string with e.g. \code{https}. Required if no \code{url} parameter was given.}

\item{host}{string with hostname. Required if no \code{url} parameter was given.}

\item{port}{string or number with port, e.g. \code{"443"}.}

\item{path}{piece of the url starting with \code{/} up till \verb{?} or \verb{#}}

\item{query}{piece of url starting with \verb{?} up till \verb{#}. Only used if no \code{params} is given.}

\item{fragment}{part of url starting with \verb{#}.}

\item{user}{string with username}

\item{password}{string with password}
}
\description{
Interfaces the libcurl \href{https://curl.se/libcurl/c/libcurl-url.html}{URL parser}.
URLs are automatically normalized where possible, such as in the case of
relative paths or url-encoded queries (see examples).
When parsing hyperlinks from a HTML document, it is possible to set \code{baseurl}
to the location of the document itself such that relative links can be resolved.
}
\details{
A valid URL contains at least a scheme and a host, other pieces are optional.
If these are missing, the parser raises an error. Otherwise it returns
a list with the following elements:
\itemize{
\item \emph{url}: the normalized input URL
\item \emph{scheme}: the protocol part before the \verb{://} (required)
\item \emph{host}: name of host without port (required)
\item \emph{port}: decimal between 0 and 65535
\item \emph{path}: normalized path up till the \verb{?} of the url
\item \emph{query}: search query: part between the \verb{?} and \verb{#} of the url. Use \code{params} below to get individual parameters from the query.
\item \emph{fragment}: the hash part after the \verb{#} of the url
\item \emph{user}: authentication username
\item \emph{password}: authentication password
\item \emph{params}: named vector with parameters from \code{query} if set
}

Each element above is either a string or \code{NULL}, except for \code{params} which
is always a character vector with the length equal to the number of parameters.

Note that the \code{params} field is only usable if the \code{query} is in the usual
\code{application/x-www-form-urlencoded} format which is technically not part of
the RFC. Some services may use e.g. a json blob as the query, in which case
the parsed \code{params} field here can be ignored. There is no way for the parser
to automatically infer or validate the query format, this is up to the caller.

For more details on the URL format see
\href{https://datatracker.ietf.org/doc/html/rfc3986}{rfc3986}
or the steps explained in the \href{https://url.spec.whatwg.org/#concept-basic-url-parser}{whatwg basic url parser}.

You can use \code{\link[=curl_modify_url]{curl_modify_url()}} both to modify an existing URL, or to
create new URL from scratch. Arguments get automatically URL-encoded where
needed, unless wrapped in \code{I()}. If \code{params} is given, this gets converted
into a \code{application/x-www-form-urlencoded} string which overrides \code{query}.
When modifying a URL, use an empty string \code{""} to unset a piece of the URL.
}
\examples{
url <- "https://jerry:secret@google.com:888/foo/bar?test=123#bla"
curl_parse_url(url)

# Resolve relative links from a baseurl
curl_parse_url("/somelink", baseurl = url)

# Paths get normalized
curl_parse_url("https://foobar.com/foo/bar/../baz/../yolo")$url

# Also normalizes URL-encoding (these URLs are equivalent):
url1 <- "https://ja.wikipedia.org/wiki/\u5bff\u53f8"
url2 <- "https://ja.wikipedia.org/wiki/\%e5\%af\%bf\%e5\%8f\%b8"
curl_parse_url(url1)$path
curl_parse_url(url2)$path
curl_parse_url(url1, decode = FALSE)$path
curl_parse_url(url1, decode = FALSE)$path
}
