Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into fea/remove-decimal-conversion-pq-writer
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 authored Feb 4, 2025
2 parents 83d21b3 + 99b207f commit 6e4aeba
Show file tree
Hide file tree
Showing 79 changed files with 1,255 additions and 1,073 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@nvks-runners
with:
enable_check_generated_files: false
ignored_pr_jobs: "telemetry-summarize"
ignored_pr_jobs: "telemetry-summarize spark-rapids-jni"
conda-cpp-build:
needs: checks
secrets: inherit
Expand Down Expand Up @@ -342,6 +342,10 @@ jobs:
node_type: "cpu4"
build_type: pull-request
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
spark-rapids-jni:
needs: changed-files
uses: ./.github/workflows/spark-rapids-jni.yaml
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java

telemetry-summarize:
# This job must use a self-hosted runner to record telemetry traces.
Expand Down
22 changes: 22 additions & 0 deletions .github/workflows/spark-rapids-jni.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: spark-rapids-jni

on:
workflow_call:

jobs:
spark-rapids-jni-build:
runs-on: linux-amd64-cpu8
container:
image: rapidsai/ci-spark-rapids-jni:rockylinux8-cuda12.2.0
steps:
- uses: actions/checkout@v4
with:
repository: NVIDIA/spark-rapids-jni
submodules: recursive
- uses: actions/checkout@v4
with:
path: thirdparty/cudf
- name: "Build spark-rapids-jni"
run: |
mkdir target
GPU_ARCHS=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on scl enable gcc-toolset-11 build/buildcpp.sh
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ repos:
args: ["--fix"]
- id: ruff-format
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.5.0
rev: v0.6.0
hooks:
- id: verify-copyright
exclude: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ runtest() {
local lib=$1
local mode=$2

echo "Running tests for $lib in $mode mode"
local plugin=""
if [ "$mode" = "cudf" ]; then
plugin="-p cudf.pandas"
Expand Down
4 changes: 2 additions & 2 deletions ci/cudf_pandas_scripts/third-party-integration/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ main() {
lib=$(echo "$lib" | tr -d '""')
echo "Running tests for library $lib"

CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
CUDA_VERSION=$(if [ "$lib" = "tensorflow" ]; then echo "11.8"; else echo "${RAPIDS_CUDA_VERSION%.*}"; fi)

. /opt/conda/etc/profile.d/conda.sh

Expand All @@ -36,7 +36,7 @@ main() {
--config "$dependencies_yaml" \
--output conda \
--file-key "test_${lib}" \
--matrix "cuda=${CUDA_MAJOR};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
--matrix "cuda=${CUDA_VERSION};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --yes -f env.yaml -n test

Expand Down
11 changes: 11 additions & 0 deletions ci/run_pylibcudf_pytests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
# Copyright (c) 2025, NVIDIA CORPORATION.

set -euo pipefail

# It is essential to cd into python/pylibcudf/pylibcudf as `pytest-xdist` + `coverage` seem to work only at this directory level.

# Support invoking run_cudf_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibcudf/pylibcudf/

pytest --cache-clear --ignore="benchmarks" "$@" tests
1 change: 1 addition & 0 deletions ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
"cudf=${RAPIDS_VERSION}" \
"pylibcudf=${RAPIDS_VERSION}" \
"libcudf=${RAPIDS_VERSION}"
14 changes: 8 additions & 6 deletions ci/test_python_cudf.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.

# Support invoking test_python_cudf.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../;
Expand All @@ -15,12 +15,14 @@ trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest pylibcudf"
pushd python/pylibcudf/pylibcudf/tests
python -m pytest \
--cache-clear \
./ci/run_pylibcudf_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-pylibcudf.xml" \
--numprocesses=8 \
--dist=worksteal \
.
popd
--cov-config=../.coveragerc \
--cov=pylibcudf \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/pylibcudf-coverage.xml" \
--cov-report=term

rapids-logger "pytest cudf"
./ci/run_cudf_pytests.sh \
Expand Down
10 changes: 5 additions & 5 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -190,10 +190,10 @@ static void bench_query(nvbench::state& state)
{
srand(5236);

auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
auto const query = state.get_int64("query");
auto const json_path = queries[query];
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
auto const query = state.get_int64("query");
std::string_view const json_path = queries[query];

auto const stream = cudf::get_default_stream();
auto input = build_json_string_column(desired_bytes, num_rows);
Expand Down
6 changes: 3 additions & 3 deletions cpp/benchmarks/string/join_strings.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,8 +41,8 @@ static void bench_join(nvbench::state& state)
state.add_global_memory_reads<nvbench::int8_t>(chars_size); // all bytes are read;
state.add_global_memory_writes<nvbench::int8_t>(chars_size); // all bytes are written

std::string separator(":");
std::string narep("null");
std::string_view separator(":");
std::string_view narep("null");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = cudf::strings::join_strings(input, separator, narep);
});
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/like.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ static void bench_like(nvbench::state& state)
auto input = cudf::strings_column_view(col->view());

// This pattern forces reading the entire target string (when matched expected)
auto pattern = std::string("% 5W4_"); // regex equivalent: ".* 5W4.$"
auto pattern = std::string_view("% 5W4_"); // regex equivalent: ".* 5W4.$"

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
// gather some throughput statistics as well
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/replace_re.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,7 +49,7 @@ static void bench_replace(nvbench::state& state)
cudf::strings::replace_with_backrefs(input, *program, replacement);
});
} else {
auto replacement = std::string("77");
auto replacement = std::string_view("77");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::replace_re(input, *program, replacement);
});
Expand Down
4 changes: 2 additions & 2 deletions cpp/examples/strings/libcudf_apis.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,7 +53,7 @@ std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,

auto const last_initial_first = cudf::table_view({last_initial->view(), first});

auto result = cudf::strings::concatenate(last_initial_first, std::string(" "));
auto result = cudf::strings::concatenate(last_initial_first, std::string_view(" "));

cudaStreamSynchronize(0);

Expand Down
20 changes: 10 additions & 10 deletions cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -916,7 +916,7 @@ class csv_reader_options_builder {
*/
csv_reader_options_builder& prefix(std::string pfx)
{
options._prefix = pfx;
options._prefix = std::move(pfx);
return *this;
}

Expand Down Expand Up @@ -1450,7 +1450,7 @@ class csv_writer_options {
*
* @return string to used for null entries
*/
[[nodiscard]] std::string get_na_rep() const { return _na_rep; }
[[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }

/**
* @brief Whether to write headers to csv.
Expand All @@ -1471,7 +1471,7 @@ class csv_writer_options {
*
* @return Character used for separating lines
*/
[[nodiscard]] std::string get_line_terminator() const { return _line_terminator; }
[[nodiscard]] std::string const& get_line_terminator() const { return _line_terminator; }

/**
* @brief Returns character used for separating column values.
Expand All @@ -1485,14 +1485,14 @@ class csv_writer_options {
*
* @return string used for values != 0 in INT8 types
*/
[[nodiscard]] std::string get_true_value() const { return _true_value; }
[[nodiscard]] std::string const& get_true_value() const { return _true_value; }

/**
* @brief Returns string used for values == 0 in INT8 types.
*
* @return string used for values == 0 in INT8 types
*/
[[nodiscard]] std::string get_false_value() const { return _false_value; }
[[nodiscard]] std::string const& get_false_value() const { return _false_value; }

/**
* @brief Returns the quote style for the writer.
Expand All @@ -1519,7 +1519,7 @@ class csv_writer_options {
*
* @param val String to represent null value
*/
void set_na_rep(std::string val) { _na_rep = val; }
void set_na_rep(std::string val) { _na_rep = std::move(val); }

/**
* @brief Enables/Disables headers being written to csv.
Expand All @@ -1540,7 +1540,7 @@ class csv_writer_options {
*
* @param term Character to represent line termination
*/
void set_line_terminator(std::string term) { _line_terminator = term; }
void set_line_terminator(std::string term) { _line_terminator = std::move(term); }

/**
* @brief Sets character used for separating column values.
Expand All @@ -1554,14 +1554,14 @@ class csv_writer_options {
*
* @param val String to represent values != 0 in INT8 types
*/
void set_true_value(std::string val) { _true_value = val; }
void set_true_value(std::string val) { _true_value = std::move(val); }

/**
* @brief Sets string used for values == 0 in INT8 types.
*
* @param val String to represent values == 0 in INT8 types
*/
void set_false_value(std::string val) { _false_value = val; }
void set_false_value(std::string val) { _false_value = std::move(val); }

/**
* @brief (Re)sets the table being written.
Expand Down
12 changes: 6 additions & 6 deletions cpp/include/cudf/io/orc_metadata.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -171,12 +171,12 @@ using statistics_type = std::variant<no_statistics,
timestamp_statistics>;

//! Orc I/O interfaces
namespace orc {
// forward declare the type that ProtobufReader uses. The `cudf::io::column_statistics` objects,
namespace orc::detail {
// forward declare the type that protobuf_reader uses. The `cudf::io::column_statistics` objects,
// returned from `read_parsed_orc_statistics`, are constructed from
// `cudf::io::orc::column_statistics` objects that `ProtobufReader` initializes.
// `cudf::io::orc::detail::column_statistics` objects that `protobuf_reader` initializes.
struct column_statistics;
} // namespace orc
} // namespace orc::detail

/**
* @brief Contains per-column ORC statistics.
Expand All @@ -194,7 +194,7 @@ struct column_statistics {
*
* @param detail_statistics The statistics to initialize the object with
*/
column_statistics(orc::column_statistics&& detail_statistics);
column_statistics(orc::detail::column_statistics&& detail_statistics);
};

/**
Expand Down
9 changes: 5 additions & 4 deletions cpp/include/cudf/io/text/detail/trie.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,7 @@
#include <algorithm>
#include <queue>
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>

Expand Down Expand Up @@ -128,7 +129,7 @@ struct trie {
/**
* @brief Insert the string in to the trie tree, growing the trie as necessary
*/
void insert(std::string s) { insert(s.c_str(), s.size(), 0); }
void insert(std::string_view s) { insert(s.data(), s.size(), 0); }

private:
trie_builder_node& insert(char const* s, uint16_t size, uint8_t depth)
Expand Down Expand Up @@ -164,12 +165,12 @@ struct trie {
* @param mr Memory resource to use for the device memory allocation
* @return The trie.
*/
static trie create(std::string const& pattern,
static trie create(std::string pattern,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)

{
return create(std::vector<std::string>{pattern}, stream, mr);
return create(std::vector<std::string>{std::move(pattern)}, stream, mr);
}

/**
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cudf/io/text/multibyte_split.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,7 @@

#include <memory>
#include <optional>
#include <string_view>

namespace CUDF_EXPORT cudf {
namespace io {
Expand Down Expand Up @@ -90,7 +91,7 @@ struct parse_options {
*/
std::unique_ptr<cudf::column> multibyte_split(
data_chunk_source const& source,
std::string const& delimiter,
std::string_view delimiter,
parse_options options = {},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
Expand Down
Loading

0 comments on commit 6e4aeba

Please sign in to comment.