Skip to content

Commit

Permalink
ARROW-12098: [R] Catch cpp build failures on linux
Browse files Browse the repository at this point in the history
The installation looks like this now in the default case if the build script errors:

```
* installing *source* package ‘arrow’ ...
** using staged installation
*** Found local C++ source
*** Building C++ libraries
**** cmake
**** arrow
**** Error building Arrow C++. Re-run with ARROW_R_DEV=true for debug information.
------------------------- NOTE ---------------------------
See https://arrow.apache.org/docs/r/articles/install.html
for help installing Arrow C++ libraries
---------------------------------------------------------
```

This PR also (1) restores the arrow-without-arrow wrapping (from apache#9689) and (2) adds an .onAttach message for the arrow-without-arrow case to hopefully alert users earlier that they have an incomplete/useless build. If you do get a without-arrow build, this is what the loading message looks like:

```
> library(arrow)
The Arrow C++ library is not available. To retry installation with debug output, run:
    install_arrow(verbose = TRUE)
See https://arrow.apache.org/docs/r/articles/install.html for more guidance and troubleshooting.

Attaching package: ‘arrow’

The following object is masked from ‘package:utils’:

    timestamp

```

It *also* adds an .onAttach message if you have a build with optional features disabled (e.g. S3, lz4, etc.):

```
> library(arrow)
See arrow_info() for available features

Attaching package: ‘arrow’

The following object is masked from ‘package:utils’:

    timestamp
```

`arrow_info()` will then (on Linux only) also print a message pointing you to the installation vignette if there are missing features:

```
> arrow_info()
Arrow package version: 3.0.0.9000

Capabilities:

dataset    TRUE
parquet    TRUE
s3         TRUE
utf8proc   TRUE
re2        TRUE
snappy     TRUE
gzip       TRUE
brotli     TRUE
zstd       TRUE
lz4        TRUE
lz4_frame  TRUE
lzo       FALSE
bz2        TRUE
jemalloc   TRUE
mimalloc  FALSE

To reinstall with more features enabled, see
  https://arrow.apache.org/docs/r/articles/install.html

...
```

Certain compression libraries (like lzo) are on a blocklist that excludes them from this extra messaging. The purpose of all of this is to give more hints to users when they have limited builds and give them guidance on how to enhance them, while at the same time not overly broadcasting this (which would promote FUD) and trying to be clear that you don't *always* have to `install_arrow()` after `install.packages()`.

Closes apache#9896 from nealrichardson/nix-install-debug

Authored-by: Neal Richardson <[email protected]>
Signed-off-by: Neal Richardson <[email protected]>
  • Loading branch information
nealrichardson committed Apr 9, 2021
1 parent 75c8cd6 commit c0ce2b1
Show file tree
Hide file tree
Showing 9 changed files with 2,419 additions and 85 deletions.
19 changes: 12 additions & 7 deletions ci/scripts/r_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,17 @@ if [ "$ARROW_R_DEV" = "TRUE" ]; then
# Note that NOT_CRAN=true means (among other things) that optional dependencies are built
export NOT_CRAN=true
fi
export TEST_R_WITH_ARROW=TRUE
export _R_CHECK_TESTS_NLINES_=0
: ${TEST_R_WITH_ARROW:=TRUE}
export TEST_R_WITH_ARROW=$TEST_R_WITH_ARROW

export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE
# --run-donttest was used in R < 4.0, this is used now
export _R_CHECK_DONTTEST_EXAMPLES_=$TEST_R_WITH_ARROW
# Not all Suggested packages are needed for checking, so in case they aren't installed don't fail
export _R_CHECK_FORCE_SUGGESTS_=FALSE
export _R_CHECK_LIMIT_CORES_=FALSE
export _R_CHECK_TESTS_NLINES_=0

# By default, aws-sdk tries to contact a non-existing local ip host
# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
export AWS_EC2_METADATA_DISABLED=TRUE
Expand All @@ -49,9 +56,6 @@ export AWS_EC2_METADATA_DISABLED=TRUE
export TEXMFCONFIG=/tmp/texmf-config
export TEXMFVAR=/tmp/texmf-var

# Not all Suggested packages are needed for checking, so in case they aren't installed don't fail
export _R_CHECK_FORCE_SUGGESTS_=FALSE

if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then
# enable the devtoolset version to use it
source /opt/rh/devtoolset-$DEVTOOLSET_VERSION/enable
Expand All @@ -61,8 +65,9 @@ fi
BEFORE=$(ls -alh ~/)

SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true')
if (as_cran) {
rcmdcheck::rcmdcheck(args = c('--as-cran', '--run-donttest'), error_on = 'warning', check_dir = 'check', timeout = 3600)
rcmdcheck::rcmdcheck(args = c('--as-cran', if (run_donttest) '--run-donttest'), error_on = 'warning', check_dir = 'check', timeout = 3600)
} else {
if (nzchar(Sys.which('minio'))) {
message('Running minio for S3 tests (if build supports them)')
Expand All @@ -71,7 +76,7 @@ SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
pid <- sys::exec_background('minio', c('server', minio_dir))
on.exit(tools::pskill(pid))
}
rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', '--run-donttest'), error_on = 'warning', check_dir = 'check', timeout = 3600)
rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', if (run_donttest) '--run-donttest'), error_on = 'warning', check_dir = 'check', timeout = 3600)
}"
echo "$SCRIPT" | ${R_BIN} --no-save

Expand Down
6 changes: 4 additions & 2 deletions dev/tasks/r/azure.linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ jobs:
export R_ORG={{ r_org }}
export R_IMAGE={{ r_image }}
export R_TAG={{ r_tag }}
# we have to export this (right?) because we need it in the build env
export ARROW_R_DEV={{ not_cran }}
export ARROW_R_DEV={{ not_cran|default("TRUE") }}
# Note that ci/scripts/r_test.sh sets NOT_CRAN=true if ARROW_R_DEV=TRUE
docker-compose run \
-e ARROW_DATASET={{ arrow_dataset|default("") }} \
Expand All @@ -57,6 +56,9 @@ jobs:
-e ARROW_WITH_RE2={{ arrow_with_re2|default("") }} \
-e ARROW_WITH_UTF8PROC={{ arrow_with_utf8proc|default("") }} \
-e LIBARROW_MINIMAL={{ libarrow_minimal|default("") }} \
-e LIBARROW_DOWNLOAD={{ libarrow_download|default("") }} \
-e LIBARROW_BUILD={{ libarrow_build|default("") }} \
-e TEST_R_WITH_ARROW={{ with_arrow|default("TRUE") }} \
r
displayName: Docker run
Expand Down
20 changes: 12 additions & 8 deletions dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1763,7 +1763,6 @@ tasks:
r_org: rhub
r_image: ubuntu-gcc-release
r_tag: latest
not_cran: "TRUE"

test-r-rocker-r-base-latest:
ci: azure
Expand All @@ -1772,7 +1771,6 @@ tasks:
r_org: rocker
r_image: r-base
r_tag: latest
not_cran: "TRUE"

test-r-rstudio-r-base-3.6-bionic:
ci: azure
Expand All @@ -1781,7 +1779,6 @@ tasks:
r_org: rstudio
r_image: r-base
r_tag: 3.6-bionic
not_cran: "TRUE"

test-r-rstudio-r-base-3.6-centos8:
ci: azure
Expand All @@ -1790,7 +1787,6 @@ tasks:
r_org: rstudio
r_image: r-base
r_tag: 3.6-centos8
not_cran: "TRUE"

test-r-rstudio-r-base-3.6-centos7-devtoolset-8:
ci: azure
Expand All @@ -1799,7 +1795,6 @@ tasks:
r_org: rstudio
r_image: r-base
r_tag: 3.6-centos7
not_cran: "TRUE"
devtoolset_version: 8

test-r-rstudio-r-base-3.6-opensuse15:
Expand All @@ -1809,7 +1804,6 @@ tasks:
r_org: rstudio
r_image: r-base
r_tag: 3.6-opensuse15
not_cran: "TRUE"

test-r-rstudio-r-base-3.6-opensuse42:
ci: azure
Expand All @@ -1818,7 +1812,6 @@ tasks:
r_org: rstudio
r_image: r-base
r_tag: 3.6-opensuse42
not_cran: "TRUE"

test-r-minimal-build:
ci: azure
Expand All @@ -1827,14 +1820,25 @@ tasks:
r_org: rocker
r_image: r-base
r_tag: latest
not_cran: "TRUE"
arrow_dataset: "OFF"
arrow_parquet: "OFF"
arrow_s3: "OFF"
arrow_with_re2: "OFF"
arrow_with_utf8proc: "OFF"
libarrow_minimal: "TRUE"

test-r-without-arrow:
ci: azure
template: r/azure.linux.yml
params:
r_org: rhub
r_image: ubuntu-gcc-release
r_tag: latest
libarrow_download: "FALSE"
libarrow_build: "FALSE"
with_arrow: "FALSE"
not_cran: "FALSE"

test-ubuntu-18.04-r-sanitizer:
ci: azure
template: docker-tests/azure.linux.yml
Expand Down
54 changes: 46 additions & 8 deletions r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,38 @@
}

# Create these once, at package build time
dplyr_functions$dataset <- build_function_list(build_dataset_expression)
dplyr_functions$array <- build_function_list(build_array_expression)

if (arrow_available()) {
dplyr_functions$dataset <- build_function_list(build_dataset_expression)
dplyr_functions$array <- build_function_list(build_array_expression)
}
invisible()
}

.onAttach <- function(libname, pkgname) {
if (!arrow_available()) {
msg <- paste(
"The Arrow C++ library is not available. To retry installation with debug output, run:",
" install_arrow(verbose = TRUE)",
"See https://arrow.apache.org/docs/r/articles/install.html for more guidance and troubleshooting.",
sep = "\n"
)
packageStartupMessage(msg)
} else {
# Just to be extra safe, let's wrap this in a try();
# we don't a failed startup message to prevent the package from loading
try({
features <- arrow_info()$capabilities
# That has all of the #ifdef features, plus the compression libs and the
# string libraries (but not the memory allocators, they're added elsewhere)
#
# Let's print a message if some are off
if (some_features_are_off(features)) {
packageStartupMessage("See arrow_info() for available features")
}
})
}
}

#' Is the C++ Arrow library available?
#'
#' You won't generally need to call these function, but they're made available
Expand All @@ -74,25 +100,25 @@
#' `vignette("install", package = "arrow")` for guidance on reinstalling the
#' package.
arrow_available <- function() {
.Call(`_arrow_available`)
tryCatch(.Call(`_arrow_available`), error = function(e) return(FALSE))
}

#' @rdname arrow_available
#' @export
arrow_with_dataset <- function() {
.Call(`_dataset_available`)
tryCatch(.Call(`_dataset_available`), error = function(e) return(FALSE))
}

#' @rdname arrow_available
#' @export
arrow_with_parquet <- function() {
.Call(`_parquet_available`)
tryCatch(.Call(`_parquet_available`), error = function(e) return(FALSE))
}

#' @rdname arrow_available
#' @export
arrow_with_s3 <- function() {
.Call(`_s3_available`)
tryCatch(.Call(`_s3_available`), error = function(e) return(FALSE))
}

option_use_threads <- function() {
Expand Down Expand Up @@ -143,6 +169,14 @@ arrow_info <- function() {
structure(out, class = "arrow_info")
}

some_features_are_off <- function(features) {
# `features` is a named logical vector (as in arrow_info()$capabilities)
# Let's exclude some less relevant ones
blocklist <- c("lzo", "bz2", "brotli")
# Return TRUE if any of the other features are FALSE
!all(features[setdiff(names(features), blocklist)])
}

#' @export
print.arrow_info <- function(x, ...) {
print_key_values <- function(title, vals, ...) {
Expand All @@ -161,6 +195,10 @@ print.arrow_info <- function(x, ...) {
jemalloc = "jemalloc" %in% x$memory_pool$available_backends,
mimalloc = "mimalloc" %in% x$memory_pool$available_backends
))
if (some_features_are_off(x$capabilities) && identical(tolower(Sys.info()[["sysname"]]), "linux")) {
# Only on linux because (e.g.) we disable certain features on purpose on rtools35 and solaris
cat("To reinstall with more optional capabilities enabled, see\n https://arrow.apache.org/docs/r/articles/install.html\n\n")
}

if (length(x$options)) {
print_key_values("Arrow options()", map_chr(x$options, format))
Expand All @@ -180,7 +218,7 @@ print.arrow_info <- function(x, ...) {
`Detected SIMD Level` = x$runtime_info$detected_simd_level
))
} else {
cat("Arrow C++ library not available\n")
cat("Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html for troubleshooting.\n")
}
invisible(x)
}
Expand Down
92 changes: 40 additions & 52 deletions r/R/install-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,55 +62,50 @@ install_arrow <- function(nightly = FALSE,
sysname <- tolower(Sys.info()[["sysname"]])
conda <- isTRUE(grepl("conda", R.Version()$platform))

if (sysname %in% c("windows", "darwin", "linux")) {
if (conda) {
if (nightly) {
system("conda install -y -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow")
} else {
system("conda install -y -c conda-forge --strict-channel-priority r-arrow")
}
if (conda) {
if (nightly) {
system("conda install -y -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow")
} else {
Sys.setenv(
LIBARROW_DOWNLOAD = "true",
LIBARROW_BINARY = binary,
LIBARROW_MINIMAL = minimal,
ARROW_R_DEV = verbose,
ARROW_USE_PKG_CONFIG = use_system
)
# On the M1, we can't use the usual autobrew, which pulls Intel dependencies
apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform)
# On Rosetta, we have to build without JEMALLOC, so we also can't autobrew
rosetta <- identical(sysname, "darwin") && identical(system("sysctl -n sysctl.proc_translated", intern = TRUE), "1")
if (rosetta) {
Sys.setenv(ARROW_JEMALLOC = "OFF")
}
if (apple_m1 || rosetta) {
Sys.setenv(FORCE_BUNDLED_BUILD = "true")
}
system("conda install -y -c conda-forge --strict-channel-priority r-arrow")
}
} else {
Sys.setenv(
LIBARROW_DOWNLOAD = "true",
LIBARROW_BINARY = binary,
LIBARROW_MINIMAL = minimal,
ARROW_R_DEV = verbose,
ARROW_USE_PKG_CONFIG = use_system
)
# On the M1, we can't use the usual autobrew, which pulls Intel dependencies
apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform)
# On Rosetta, we have to build without JEMALLOC, so we also can't autobrew
rosetta <- identical(sysname, "darwin") && identical(system("sysctl -n sysctl.proc_translated", intern = TRUE), "1")
if (rosetta) {
Sys.setenv(ARROW_JEMALLOC = "OFF")
}
if (apple_m1 || rosetta) {
Sys.setenv(FORCE_BUNDLED_BUILD = "true")
}

opts <- list()
if (apple_m1 || rosetta) {
# Skip binaries (esp. for rosetta)
opts$pkgType <- "source"
} else if (isTRUE(binary)) {
# Unless otherwise directed, don't consider newer source packages when
# options(pkgType) == "both" (default on win/mac)
opts$install.packages.check.source <- "no"
opts$install.packages.compile.from.source <- "never"
}
if (length(opts)) {
old <- options(opts)
on.exit(options(old))
}
install.packages("arrow", repos = arrow_repos(repos, nightly), ...)
opts <- list()
if (apple_m1 || rosetta) {
# Skip binaries (esp. for rosetta)
opts$pkgType <- "source"
} else if (isTRUE(binary)) {
# Unless otherwise directed, don't consider newer source packages when
# options(pkgType) == "both" (default on win/mac)
opts$install.packages.check.source <- "no"
opts$install.packages.compile.from.source <- "never"
}
if ("arrow" %in% loadedNamespaces()) {
# If you've just sourced this file, "arrow" won't be (re)loaded
reload_arrow()
if (length(opts)) {
old <- options(opts)
on.exit(options(old))
}
} else {
# Solaris
message(SEE_README)
install.packages("arrow", repos = arrow_repos(repos, nightly), ...)
}
if ("arrow" %in% loadedNamespaces()) {
# If you've just sourced this file, "arrow" won't be (re)loaded
reload_arrow()
}
}

Expand Down Expand Up @@ -142,10 +137,3 @@ reload_arrow <- function() {
message("Please restart R to use the 'arrow' package.")
}
}

SEE_README <- paste(
"Refer to the R package README",
"<https://github.com/apache/arrow/blob/master/r/README.md>",
"and `vignette('install', package = 'arrow')`",
"for installation guidance."
)
11 changes: 6 additions & 5 deletions r/data-raw/codegen.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
# #if defined(ARROW_R_WITH_FEATURE)
# and each feature is written to its own set of export files.

# Ensure that all machines are sorting the same way
invisible(Sys.setlocale("LC_COLLATE", "C"))

features <- c("arrow", "dataset", "parquet", "s3")
Expand Down Expand Up @@ -111,17 +112,17 @@ all_decorations <- cpp_decorations()
arrow_exports <- get_exported_functions(all_decorations, features)

arrow_classes <- c(
"Table" = "arrow::Table",
"Table" = "arrow::Table",
"RecordBatch" = "arrow::RecordBatch"
)

# This takes a cpp11 C wrapper and conditionally makes it available based on
# a feature decoration
ifdef_wrap <- function(cpp11_wrapped, name, sexp_signature, decoration) {
if (identical(decoration, "arrow")) {
# Arrow is now required
return(cpp11_wrapped)
}
# if (identical(decoration, "arrow")) {
# # Arrow is now required
# return(cpp11_wrapped)
# }
glue('
#if defined(ARROW_R_WITH_{toupper(decoration)})
{cpp11_wrapped}
Expand Down
Loading

0 comments on commit c0ce2b1

Please sign in to comment.