Skip to content

Commit

Permalink
Adding float to string kernel (#1508)
Browse files Browse the repository at this point in the history
* wip

Signed-off-by: Haoyang Li <[email protected]>

* wip

Signed-off-by: Haoyang Li <[email protected]>

* Add float to string kernel

Signed-off-by: Haoyang Li <[email protected]>

* Update src/main/cpp/src/cast_float_to_string.cu

Co-authored-by: Mike Wilson <[email protected]>

* Update src/main/cpp/src/cast_float_to_string.cu

Co-authored-by: Mike Wilson <[email protected]>

* address comments and use different precision for float

Signed-off-by: Haoyang Li <[email protected]>

* rewrite the solution with ryu

Signed-off-by: Haoyang Li <[email protected]>

* update license

Signed-off-by: Haoyang Li <[email protected]>

* clean up

Signed-off-by: Haoyang Li <[email protected]>

* Split ftos_converter out

Signed-off-by: Haoyang Li <[email protected]>

* clean up

Signed-off-by: Haoyang Li <[email protected]>

* resolve cudf conflicts

Signed-off-by: Haoyang Li <[email protected]>

* resolve cudf conflicts

Signed-off-by: Haoyang Li <[email protected]>

* resolve cudf conflicts

Signed-off-by: Haoyang Li <[email protected]>

* resolve cudf conflicts

Signed-off-by: Haoyang Li <[email protected]>

* remove cudf changes

Signed-off-by: Haoyang Li <[email protected]>

* remove cudf changes

Signed-off-by: Haoyang Li <[email protected]>

* Add copyright and notice

Signed-off-by: Haoyang Li <[email protected]>

* Fix copyrights and license

Signed-off-by: Haoyang Li <[email protected]>

* cudf conflict resolve

Signed-off-by: Haoyang Li <[email protected]>

* Add nv apache license to ftos_converter

Signed-off-by: Haoyang Li <[email protected]>

* Update src/main/cpp/src/ftos_converter.cu

Co-authored-by: Jason Lowe <[email protected]>

* address some comments

Signed-off-by: Haoyang Li <[email protected]>

* cudf conflict

Signed-off-by: Haoyang Li <[email protected]>

* Update src/main/cpp/src/cast_float_to_string.cu

Co-authored-by: Nghia Truong <[email protected]>

* addressed comments

Signed-off-by: Haoyang Li <[email protected]>

* clang format

Signed-off-by: Haoyang Li <[email protected]>

* Address comments

Signed-off-by: Haoyang Li <[email protected]>

* Address comments

Signed-off-by: Haoyang Li <[email protected]>

* sync

Signed-off-by: Haoyang Li <[email protected]>

* address comments

Signed-off-by: Haoyang Li <[email protected]>

---------

Signed-off-by: Haoyang Li <[email protected]>
Co-authored-by: Mike Wilson <[email protected]>
Co-authored-by: Jason Lowe <[email protected]>
Co-authored-by: Nghia Truong <[email protected]>
  • Loading branch information
4 people authored Dec 8, 2023
1 parent 844a336 commit 4c20e3a
Show file tree
Hide file tree
Showing 9 changed files with 1,449 additions and 4 deletions.
20 changes: 20 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
RAPIDS Accelerator JNI For Apache Spark
Copyright (c) 2022-2023, NVIDIA CORPORATION

--------------------------------------------------------------------------------

This project includes code from ryu (https://github.com/ulfjack/ryu).

Copyright (2018) Ulf Adams and contributors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
1 change: 1 addition & 0 deletions src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ add_library(
src/ZOrderJni.cpp
src/bloom_filter.cu
src/cast_decimal_to_string.cu
src/cast_float_to_string.cu
src/cast_string.cu
src/cast_string_to_float.cu
src/datetime_rebase.cu
Expand Down
20 changes: 18 additions & 2 deletions src/main/cpp/src/CastStringJni.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -109,6 +109,22 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_toFloat(
CATCH_CAST_EXCEPTION(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloat(JNIEnv* env,
jclass,
jlong input_column)
{
JNI_NULL_CHECK(env, input_column, "input column is null", 0);

try {
cudf::jni::auto_set_device(env);

auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
return cudf::jni::release_as_jlong(
spark_rapids_jni::float_to_string(cv, cudf::get_default_stream()));
}
CATCH_CAST_EXCEPTION(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal(JNIEnv* env,
jclass,
jlong input_column)
Expand All @@ -118,7 +134,7 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal
try {
cudf::jni::auto_set_device(env);

cudf::column_view cv{*reinterpret_cast<cudf::column_view const*>(input_column)};
auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
return cudf::jni::release_as_jlong(
spark_rapids_jni::decimal_to_non_ansi_string(cv, cudf::get_default_stream()));
}
Expand Down
127 changes: 127 additions & 0 deletions src/main/cpp/src/cast_float_to_string.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "cast_string.hpp"
#include "ftos_converter.cuh"

#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

namespace spark_rapids_jni {

namespace detail {
namespace {

template <typename FloatType>
struct float_to_string_fn {
cudf::column_device_view d_floats;
cudf::size_type* d_offsets;
char* d_chars;

__device__ cudf::size_type compute_output_size(cudf::size_type idx) const
{
auto const value = d_floats.element<FloatType>(idx);
bool constexpr is_float = std::is_same_v<FloatType, float>;
return static_cast<cudf::size_type>(
ftos_converter::compute_ftos_size(static_cast<double>(value), is_float));
}

__device__ void float_to_string(cudf::size_type idx) const
{
auto const value = d_floats.element<FloatType>(idx);
bool constexpr is_float = std::is_same_v<FloatType, float>;
auto const output = d_chars + d_offsets[idx];
ftos_converter::float_to_string(static_cast<double>(value), is_float, output);
}

__device__ void operator()(cudf::size_type idx) const
{
if (d_floats.is_null(idx)) {
if (d_chars == nullptr) { d_offsets[idx] = 0; }
return;
}
if (d_chars != nullptr) {
float_to_string(idx);
} else {
d_offsets[idx] = compute_output_size(idx);
}
}
};

/**
* @brief This dispatch method is for converting floats into strings.
*
* The template function declaration ensures only float types are allowed.
*/
struct dispatch_float_to_string_fn {
template <typename FloatType, CUDF_ENABLE_IF(std::is_floating_point_v<FloatType>)>
std::unique_ptr<cudf::column> operator()(cudf::column_view const& floats,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const strings_count = floats.size();
if (strings_count == 0) { return cudf::make_empty_column(cudf::type_id::STRING); }

auto const input_ptr = cudf::column_device_view::create(floats, stream);

auto [offsets, chars] = cudf::strings::detail::make_strings_children(
float_to_string_fn<FloatType>{*input_ptr}, strings_count, stream, mr);

return make_strings_column(strings_count,
std::move(offsets),
std::move(chars),
floats.null_count(),
cudf::detail::copy_bitmask(floats, stream, mr));
}

// non-float types throw an exception
template <typename T, CUDF_ENABLE_IF(not std::is_floating_point_v<T>)>
std::unique_ptr<cudf::column> operator()(cudf::column_view const&,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*)
{
CUDF_FAIL("Values for float_to_string function must be a float type.");
}
};

} // namespace

// This will convert all float column types into a strings column.
std::unique_ptr<cudf::column> float_to_string(cudf::column_view const& floats,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return type_dispatcher(floats.type(), dispatch_float_to_string_fn{}, floats, stream, mr);
}

} // namespace detail

// external API
std::unique_ptr<cudf::column> float_to_string(cudf::column_view const& floats,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::float_to_string(floats, stream, mr);
}

} // namespace spark_rapids_jni
7 changes: 6 additions & 1 deletion src/main/cpp/src/cast_string.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -115,6 +115,11 @@ std::unique_ptr<cudf::column> string_to_float(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

std::unique_ptr<cudf::column> float_to_string(
cudf::column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

std::unique_ptr<cudf::column> decimal_to_non_ansi_string(
cudf::column_view const& input,
rmm::cuda_stream_view stream,
Expand Down
Loading

0 comments on commit 4c20e3a

Please sign in to comment.