README

Rcpp is a powerful tool to write fast C++ code to speed up R programs. However, it is not easy, or at least not straightforward, to compute numerical integration or do optimization using pure C++ code inside Rcpp.

RcppNumerical integrates a number of open source numerical computing libraries into Rcpp, so that users can call convenient functions to accomplish such tasks.

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]

in the C++ source file. - To use RcppNumerical in your package, add Imports: RcppNumerical and LinkingTo: Rcpp, RcppEigen, RcppNumerical to the DESCRIPTION file, and import(RcppNumerical) to the NAMESPACE file.

Numerical Integration

One-dimensional

To compute integration of a function, first define a functor derived from the Func class (under the namespace Numer):

class Func
{
public:
    virtual double operator()(const double& x) const = 0;
    virtual void eval(double* x, const int n) const
    {
        for(int i = 0; i < n; i++)
            x[i] = this->operator()(x[i]);
    }

    virtual ~Func() {}
};

The first function evaluates one point at a time, and the second version overwrites each point in the array by the corresponding function values. Only the second function will be used by the integration code, but usually it is easier to implement the first one.

RcppNumerical provides a wrapper function for the NumericalIntegration library with the following interface:

inline double integrate(
    const Func& f, const double& lower, const double& upper,
    double& err_est, int& err_code,
    const int subdiv = 100, const double& eps_abs = 1e-8, const double& eps_rel = 1e-6,
    const Integrator<double>::QuadratureRule rule = Integrator<double>::GaussKronrod41
)

See a full example below, which can be compiled using the Rcpp::sourceCpp function in Rcpp.

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]
#include <RcppNumerical.h>
using namespace Numer;

// P(0.3 < X < 0.8), X ~ Beta(a, b)
class BetaPDF: public Func
{
private:
    double a;
    double b;
public:
    BetaPDF(double a_, double b_) : a(a_), b(b_) {}

    double operator()(const double& x) const
    {
        return R::dbeta(x, a, b, 0);
    }
};

// [[Rcpp::export]]
Rcpp::List integrate_1d_test()
{
    const double a = 3, b = 10;
    const double lower = 0.3, upper = 0.8;
    const double true_val = R::pbeta(upper, a, b, 1, 0) -
                            R::pbeta(lower, a, b, 1, 0);

    BetaPDF f(a, b);
    double err_est;
    int err_code;
    const double res = integrate(f, lower, upper, err_est, err_code);
    return Rcpp::List::create(
        Rcpp::Named("true") = true_val,
        Rcpp::Named("approximate") = res,
        Rcpp::Named("error_estimate") = err_est,
        Rcpp::Named("error_code") = err_code
    );
}

integrate_1d_test()
## $true
## [1] 0.2528108
##
## $approximate
## [1] 0.2528108
##
## $error_estimate
## [1] 2.806764e-15
##
## $error_code
## [1] 0

Multi-dimensional

Multi-dimensional integration in RcppNumerical is done by the Cuba library developed by Thomas Hahn.

To calculate the integration of a multivariate function, one needs to define a functor that inherits from the MFunc class:

class MFunc
{
public:
    virtual double operator()(Constvec& x) = 0;

    virtual ~MFunc() {}
};

// Constant reference to a vector
using Constvec = const Eigen::Ref<const Eigen::VectorXd>;

(Basically you can treat Constvec as a const Eigen::VectorXd. Using Eigen::Ref is mainly to avoid memory copy. See the explanation here.)

inline double integrate(
    MFunc& f, Constvec& lower, Constvec& upper,
    double& err_est, int& err_code,
    const int maxeval = 1000,
    const double& eps_abs = 1e-6, const double& eps_rel = 1e-6
)

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]
#include <RcppNumerical.h>
using namespace Numer;

// P(a1 < X1 < b1, a2 < X2 < b2), (X1, X2) ~ N([0], [1   rho])
//                                            ([0], [rho   1])
class BiNormal: public MFunc
{
private:
    const double rho;
    double const1;  // 2 * (1 - rho^2)
    double const2;  // 1 / (2 * PI) / sqrt(1 - rho^2)
public:
    BiNormal(const double& rho_) : rho(rho_)
    {
        const1 = 2.0 * (1.0 - rho * rho);
        const2 = 1.0 / (2 * M_PI) / std::sqrt(1.0 - rho * rho);
    }

    // PDF of bivariate normal
    double operator()(Constvec& x)
    {
        double z = x[0] * x[0] - 2 * rho * x[0] * x[1] + x[1] * x[1];
        return const2 * std::exp(-z / const1);
    }
};

// [[Rcpp::export]]
Rcpp::List integrate_md_test()
{
    BiNormal f(0.5);  // rho = 0.5
    Eigen::VectorXd lower(2);
    lower << -1, -1;
    Eigen::VectorXd upper(2);
    upper << 1, 1;
    double err_est;
    int err_code;
    const double res = integrate(f, lower, upper, err_est, err_code);
    return Rcpp::List::create(
        Rcpp::Named("approximate") = res,
        Rcpp::Named("error_estimate") = err_est,
        Rcpp::Named("error_code") = err_code
    );
}

library(mvtnorm)
trueval = pmvnorm(c(-1, -1), c(1, 1), sigma = matrix(c(1, 0.5, 0.5, 1), 2))
integrate_md_test()
## $approximate
## [1] 0.4979718
##
## $error_estimate
## [1] 4.612333e-09
##
## $error_code
## [1] 0
as.numeric(trueval) - integrate_md_test()$approximate
## [1] 2.893336e-11

Handling Infinite Limits

Infinite intagral limits are also supported. In the case of one-dimensional integration:

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]
#include <RcppNumerical.h>
using namespace Numer;

class TestInf: public Func
{
public:
    double operator()(const double& x) const
    {
        return x * x * R::dnorm(x, 0.0, 1.0, 0);
    }
};

// [[Rcpp::export]]
Rcpp::List integrate_1d_inf_test(const double& lower, const double& upper)
{
    TestInf f;
    double err_est;
    int err_code;
    const double res = integrate(f, lower, upper, err_est, err_code);
    return Rcpp::List::create(
        Rcpp::Named("approximate") = res,
        Rcpp::Named("error_estimate") = err_est,
        Rcpp::Named("error_code") = err_code
    );
}

# integrate() in R
integrate(function(x) x^2 * dnorm(x), 0.5, Inf)
## 0.4845702 with absolute error < 3e-08
integrate_1d_inf_test(0.5, Inf)
## $approximate
## [1] 0.4845702
##
## $error_estimate
## [1] 1.633995e-08
##
## $error_code
## [1] 0

Similarly, for multi-dimensional integration, infinite limits are supported in each dimension by specifying std::numeric_limits<double>::infinity() or -std::numeric_limits<double>::infinity():

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]
#include <RcppNumerical.h>
using namespace Numer;

// Test 1: Semi-infinite [0, +Inf) x [0, 1]
// Integrate exp(-x) over x in [0, +Inf) and y in [0, 1]
// True value: 1.0
class SemiInfiniteTest: public MFunc
{
public:
    double operator()(Constvec& x)
    {
        return std::exp(-x[0]);
    }
};

// Test 2: Doubly-infinite (-Inf, +Inf) x [0, 1]
// Integrate exp(-x^2) over x in (-Inf, +Inf) and y in [0, 1]
// True value: sqrt(pi)
class DoublyInfiniteTest: public MFunc
{
public:
    double operator()(Constvec& x)
    {
        return std::exp(-x[0] * x[0]);
    }
};

// Test 3: All infinite bounds
// Integrate exp(-x^2 - y^2) over (-Inf, +Inf) x (-Inf, +Inf)
// Expected: pi
class Gaussian2D: public MFunc
{
public:
    double operator()(Constvec& x)
    {
        return std::exp(-x[0] * x[0] - x[1] * x[1]);
    }
};

// [[Rcpp::export]]
Rcpp::List integrate_md_inf_test()
{
    constexpr double Inf = std::numeric_limits<double>::infinity();
    double err_est;
    int err_code;

    Eigen::VectorXd lower(2), upper(2);

    // Test 1: Semi-infinite
    lower[0] = 0.0; upper[0] = Inf;
    lower[1] = 0.0; upper[1] = 1.0;
    SemiInfiniteTest f1;
    double res1 = integrate(f1, lower, upper, err_est, err_code);

    // Test 2: Doubly-infinite
    lower[0] = -Inf; upper[0] = Inf;
    lower[1] = 0.0; upper[1] = 1.0;
    DoublyInfiniteTest f2;
    double res2 = integrate(f2, lower, upper, err_est, err_code);

    // Test 3: All infinite
    lower[0] = -Inf; upper[0] = Inf;
    lower[1] = -Inf; upper[1] = Inf;
    Gaussian2D f3;
    double res3 = integrate(f3, lower, upper, err_est, err_code);

    return Rcpp::List::create(
        Rcpp::Named("semi_infinite") = res1,
        Rcpp::Named("doubly_infinite") = res2,
        Rcpp::Named("all_infinite") = res3
    );
}

integrate_md_inf_test()
## $semi_infinite
## [1] 1
##
## $doubly_infinite
## [1] 1.772454
##
## $all_infinite
## [1] 3.141542

Numerical Optimization

Unconstrained Minimization

Currently RcppNumerical uses the L-BFGS algorithm to solve unconstrained minimization problems based on the LBFGS++ library.

Again, one needs to first define a functor to represent the multivariate function to be minimized.

class MFuncGrad
{
public:
    virtual double f_grad(Constvec& x, Refvec grad) = 0;

    virtual ~MFuncGrad() {}
};

Same as the case in multi-dimensional integration, Constvec represents a read-only vector and Refvec a writable vector. Their definitions are

// Reference to a vector
using RefVec = Eigen::Ref<Eigen::VectorXd>;
using Constvec = const Eigen::Ref<const Eigen::VectorXd>;

The f_grad() member function returns the function value on vector x, and overwrites grad by the gradient.

inline int optim_lbfgs(
    MFuncGrad& f, Refvec x, double& fx_opt,
    const int maxit = 300, const double& eps_f = 1e-6, const double& eps_g = 1e-5
)

Below is an example that illustrates the optimization of the Rosenbrock function f(x1, x2) = 100 * (x2 - x1^2)^2 + (1 - x1)^2:

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]

#include <RcppNumerical.h>

using namespace Numer;

// f = 100 * (x2 - x1^2)^2 + (1 - x1)^2
// True minimum: x1 = x2 = 1
class Rosenbrock: public MFuncGrad
{
public:
    double f_grad(Constvec& x, Refvec grad)
    {
        double t1 = x[1] - x[0] * x[0];
        double t2 = 1 - x[0];
        grad[0] = -400 * x[0] * t1 - 2 * t2;
        grad[1] = 200 * t1;
        return 100 * t1 * t1 + t2 * t2;
    }
};

// [[Rcpp::export]]
Rcpp::List optim_test()
{
    Eigen::VectorXd x(2);
    x[0] = -1.2;
    x[1] = 1;
    double fopt;
    Rosenbrock f;
    int res = optim_lbfgs(f, x, fopt);
    return Rcpp::List::create(
        Rcpp::Named("xopt") = x,
        Rcpp::Named("fopt") = fopt,
        Rcpp::Named("status") = res
    );
}

optim_test()
## $xopt
## [1] 0.9999683 0.9999354
##
## $fopt
## [1] 1.150395e-09
##
## $status
## [1] 0

Box-constrained Minimization

For optimization problems with box constraints (i.e., each variable has a lower and upper bound), RcppNumerical provides the L-BFGS-B algorithm, also based on the LBFGS++ library.

The functor definition is the same as that in the unconstrained minimization problems, i.e., inheriting from MFuncGrad.

inline int optim_lbfgsb(
    MFuncGrad& f, Refvec x, double& fx_opt,
    Constvec& lb, Constvec& ub,
    const int maxit = 300, const double& eps_f = 1e-6, const double& eps_g = 1e-5
)

Below is an example that minimizes the same Rosenbrock function, but this time with box constraints that force the first variable in [-2, 0.5] and second variable in [0, +Inf).

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]

#include <RcppNumerical.h>

using namespace Numer;

// f = 100 * (x2 - x1^2)^2 + (1 - x1)^2
class Rosenbrock: public MFuncGrad
{
public:
    double f_grad(Constvec& x, Refvec grad)
    {
        double t1 = x[1] - x[0] * x[0];
        double t2 = 1 - x[0];
        grad[0] = -400 * x[0] * t1 - 2 * t2;
        grad[1] = 200 * t1;
        return 100 * t1 * t1 + t2 * t2;
    }
};

// [[Rcpp::export]]
Rcpp::List optim_box_test()
{
    Eigen::VectorXd x(2), lb(2), ub(2);
    // Initial guess
    x[0] = -1.2;
    x[1] = 1;
    // Bounds [-2, 0.5] for firsts variable
    lb[0] = -2;
    ub[0] = 0.5;
    // Bounds [0, +Inf) for second variable -- infinite values are supported
    lb[1] = 0;
    ub[1] = std::numeric_limits<double>::infinity();

    double fopt;
    Rosenbrock f;
    int res = optim_lbfgsb(f, x, fopt, lb, ub);
    return Rcpp::List::create(
        Rcpp::Named("xopt") = x,
        Rcpp::Named("fopt") = fopt,
        Rcpp::Named("status") = res
    );
}

optim_box_test()
## $xopt
## [1] 0.50 0.25
##
## $fopt
## [1] 0.25
##
## $status
## [1] 0

A Practical Example

It may be more meaningful to look at a real application of the RcppNumerical package. Below is an example to fit logistic regression using the L-BFGS algorithm. It also demonstrates the performance of the library.

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]

#include <RcppNumerical.h>

using namespace Numer;

using MapMat = Eigen::Map<Eigen::MatrixXd>;
using MapVec = Eigen::Map<Eigen::VectorXd>;

class LogisticReg: public MFuncGrad
{
private:
    const MapMat X;
    const MapVec Y;
public:
    LogisticReg(const MapMat x_, const MapVec y_) : X(x_), Y(y_) {}

    double f_grad(Constvec& beta, Refvec grad)
    {
        // Negative log likelihood
        //   sum(log(1 + exp(X * beta))) - y' * X * beta

        Eigen::VectorXd xbeta = X * beta;
        const double yxbeta = Y.dot(xbeta);
        // X * beta => exp(X * beta)
        xbeta = xbeta.array().exp();
        const double f = (xbeta.array() + 1.0).log().sum() - yxbeta;

        // Gradient
        //   X' * (p - y), p = exp(X * beta) / (1 + exp(X * beta))

        // exp(X * beta) => p
        xbeta.array() /= (xbeta.array() + 1.0);
        grad.noalias() = X.transpose() * (xbeta - Y);

        return f;
    }
};

// [[Rcpp::export]]
Rcpp::NumericVector logistic_reg(Rcpp::NumericMatrix x, Rcpp::NumericVector y)
{
    const MapMat xx = Rcpp::as<MapMat>(x);
    const MapVec yy = Rcpp::as<MapVec>(y);
    // Negative log likelihood
    LogisticReg nll(xx, yy);
    // Initial guess
    Eigen::VectorXd beta(xx.cols());
    beta.setZero();

    double fopt;
    int status = optim_lbfgs(nll, beta, fopt);
    if(status < 0)
        Rcpp::stop("fail to converge");

    return Rcpp::wrap(beta);
}

set.seed(123)
n = 5000
p = 100
x = matrix(rnorm(n * p), n)
beta = runif(p)
xb = c(x %*% beta)
p = exp(xb) / (1 + exp(xb))
y = rbinom(n, 1, p)

system.time(res1 <- glm.fit(x, y, family = binomial())$coefficients)
##   user  system elapsed
##  0.229   0.006   0.234
system.time(res2 <- logistic_reg(x, y))
##   user  system elapsed
##  0.005   0.000   0.006
max(abs(res1 - res2))
## [1] 0.0001873564

It is much faster than the standard glm.fit() function in R! (Although glm.fit() calculates some other quantities besides beta.)

RcppNumerical also provides the fastLR() function to run fast logistic regression, which is a modified and more stable version of the code above.

Rcpp Integration for Numerical Computing Libraries

Introduction