Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid non-API calls when truncating overallocated vectors #362

Merged
merged 6 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# cpp11 (development version)

* Removed usage of the following non-API functions:
* `SETLENGTH()`
* `SET_TRUELENGTH()`
* `SET_GROWABLE_BIT()`

These functions were used as part of the efficient growable vectors that
cpp11 offered, i.e. what happens under the hood when you use `push_back()`.
The removal of these non-API functions means that cpp11 writable vectors that
have been pushed to with `push_back()` will likely force 1 extra allocation
when the conversion from `cpp11::writable::r_vector<T>` to `SEXP` occurs
(typically when you return a result back to R). This does not affect the
performance of `push_back()` itself, and in general these growable vectors
are still quite efficient (#362).

* Fixed a memory leak with the `cpp11::writable::r_vector` move assignment
operator (#338).

Expand Down
8 changes: 8 additions & 0 deletions cpp11test/R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,18 @@ rcpp_grow_ <- function(n_sxp) {
.Call(`_cpp11test_rcpp_grow_`, n_sxp)
}

rcpp_push_and_truncate_ <- function(size_sxp) {
.Call(`_cpp11test_rcpp_push_and_truncate_`, size_sxp)
}

test_destruction_inner <- function() {
invisible(.Call(`_cpp11test_test_destruction_inner`))
}

test_destruction_outer <- function() {
invisible(.Call(`_cpp11test_test_destruction_outer`))
}

cpp11_push_and_truncate_ <- function(size_sexp) {
.Call(`_cpp11test_cpp11_push_and_truncate_`, size_sexp)
}
20 changes: 20 additions & 0 deletions cpp11test/bench/truncate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
pkgload::load_all("cpp11test")

bench::press(len = as.integer(10 ^ (0:6)),
{
bench::mark(
cpp11 = cpp11_push_and_truncate_(len),
rcpp = rcpp_push_and_truncate_(len),
check = FALSE,
min_iterations = 1000
)
}
)[c("expression", "len", "min", "mem_alloc", "n_itr", "n_gc")]

# Longer benchmark, lots of gc
len <- as.integer(10 ^ 7)
bench::mark(
cpp11 = cpp11_push_and_truncate_(len),
rcpp = rcpp_push_and_truncate_(len),
min_iterations = 200
)[c("expression", "min", "mem_alloc", "n_itr", "n_gc")]
16 changes: 16 additions & 0 deletions cpp11test/src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,13 @@ extern "C" SEXP _cpp11test_rcpp_grow_(SEXP n_sxp) {
return cpp11::as_sexp(rcpp_grow_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(n_sxp)));
END_CPP11
}
// sum_rcpp.cpp
SEXP rcpp_push_and_truncate_(SEXP size_sxp);
extern "C" SEXP _cpp11test_rcpp_push_and_truncate_(SEXP size_sxp) {
BEGIN_CPP11
return cpp11::as_sexp(rcpp_push_and_truncate_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(size_sxp)));
END_CPP11
}
// test-protect-nested.cpp
void test_destruction_inner();
extern "C" SEXP _cpp11test_test_destruction_inner() {
Expand All @@ -438,6 +445,13 @@ extern "C" SEXP _cpp11test_test_destruction_outer() {
return R_NilValue;
END_CPP11
}
// truncate.cpp
SEXP cpp11_push_and_truncate_(SEXP size_sexp);
extern "C" SEXP _cpp11test_cpp11_push_and_truncate_(SEXP size_sexp) {
BEGIN_CPP11
return cpp11::as_sexp(cpp11_push_and_truncate_(cpp11::as_cpp<cpp11::decay_t<SEXP>>(size_sexp)));
END_CPP11
}

extern "C" {
/* .Call calls */
Expand All @@ -447,6 +461,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_cpp11test_col_sums", (DL_FUNC) &_cpp11test_col_sums, 1},
{"_cpp11test_cpp11_add_vec_for_", (DL_FUNC) &_cpp11test_cpp11_add_vec_for_, 2},
{"_cpp11test_cpp11_insert_", (DL_FUNC) &_cpp11test_cpp11_insert_, 1},
{"_cpp11test_cpp11_push_and_truncate_", (DL_FUNC) &_cpp11test_cpp11_push_and_truncate_, 1},
{"_cpp11test_cpp11_release_", (DL_FUNC) &_cpp11test_cpp11_release_, 1},
{"_cpp11test_cpp11_safe_", (DL_FUNC) &_cpp11test_cpp11_safe_, 1},
{"_cpp11test_data_frame_", (DL_FUNC) &_cpp11test_data_frame_, 0},
Expand Down Expand Up @@ -481,6 +496,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_cpp11test_protect_one_preserve_", (DL_FUNC) &_cpp11test_protect_one_preserve_, 2},
{"_cpp11test_protect_one_sexp_", (DL_FUNC) &_cpp11test_protect_one_sexp_, 2},
{"_cpp11test_rcpp_grow_", (DL_FUNC) &_cpp11test_rcpp_grow_, 1},
{"_cpp11test_rcpp_push_and_truncate_", (DL_FUNC) &_cpp11test_rcpp_push_and_truncate_, 1},
{"_cpp11test_rcpp_release_", (DL_FUNC) &_cpp11test_rcpp_release_, 1},
{"_cpp11test_rcpp_sum_dbl_accumulate_", (DL_FUNC) &_cpp11test_rcpp_sum_dbl_accumulate_, 1},
{"_cpp11test_rcpp_sum_dbl_for_", (DL_FUNC) &_cpp11test_rcpp_sum_dbl_for_, 1},
Expand Down
12 changes: 12 additions & 0 deletions cpp11test/src/sum_rcpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,15 @@

return x;
}

[[cpp11::register]] SEXP rcpp_push_and_truncate_(SEXP size_sxp) {
R_xlen_t size = INTEGER(size_sxp)[0];

// Allocate `size` worth of doubles (filled with garbage data)
Rcpp::NumericVector out(size);

// Push 1 more past the existing capacity
out.push_back(0);

return out;
}
15 changes: 15 additions & 0 deletions cpp11test/src/truncate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "cpp11/doubles.hpp"

[[cpp11::register]] SEXP cpp11_push_and_truncate_(SEXP size_sexp) {
R_xlen_t size = INTEGER(size_sexp)[0];

// Allocate `size` worth of doubles (filled with garbage data)
cpp11::writable::doubles out(size);

// Push 1 more past the existing length/capacity,
// doubling the capacity for cpp11 vectors
out.push_back(0);

// Truncate back to `size + 1` size and return result.
return SEXP(out);
}
33 changes: 15 additions & 18 deletions inst/include/cpp11/r_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -897,33 +897,30 @@ inline void r_vector<T>::clear() {
length_ = 0;
}

inline SEXP truncate(SEXP x, R_xlen_t length, R_xlen_t capacity) {
#if R_VERSION >= R_Version(3, 4, 0)
SETLENGTH(x, length);
SET_TRUELENGTH(x, capacity);
SET_GROWABLE_BIT(x);
#else
x = safe[Rf_lengthgets](x, length);
#endif
return x;
}

template <typename T>
inline r_vector<T>::operator SEXP() const {
// Throwing away the const-ness is a bit gross, but we only modify
// internal details here, and updating the internal data after we resize allows
// statements like `Rf_setAttrib(<r_vector>, name, value)` to make sense, where
// people expect that the SEXP inside the `<r_vector>` gets the updated attribute.
auto* p = const_cast<r_vector<T>*>(this);

if (data_ == R_NilValue) {
// Specially call out the `NULL` case, which can occur if immediately
// returning a default constructed writable `r_vector` as a `SEXP`.
p->resize(0);
return data_;
}

if (length_ < capacity_) {
p->data_ = truncate(p->data_, length_, capacity_);
SEXP nms = names();
auto nms_size = Rf_xlength(nms);
if ((nms_size > 0) && (length_ < nms_size)) {
nms = truncate(nms, length_, capacity_);
names() = nms;
}
// Truncate the vector to its `length_`. This unfortunately typically forces
// an allocation if the user has called `push_back()` on a writable
// `r_vector`. Importantly, going through `resize()` updates: `data_` and
// protection of it, `data_p_`, and `capacity_`.
p->resize(length_);
return data_;
}

return data_;
}

Expand Down
3 changes: 2 additions & 1 deletion vignettes/motivations.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,8 @@ b_grow <- bench::press(.grid = grid,
{
fun = match.fun(sprintf("%sgrow_", ifelse(pkg == "cpp11", "", paste0(pkg, "_"))))
bench::mark(
fun(len)
fun(len),
min_iterations = 100
)
}
)[c("len", "pkg", "min", "mem_alloc", "n_itr", "n_gc")]
Expand Down
Loading