Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pylibcudf.Scalar.from_py for construction from Python strings, bool, int, float #17898

Open
wants to merge 12 commits into
base: branch-25.04
Choose a base branch
from
Open
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from pylibcudf.exception_handler cimport libcudf_exception_handler
from pylibcudf.libcudf.types cimport data_type
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.scalar.scalar cimport scalar

Expand All @@ -13,7 +14,9 @@ cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil:
cdef unique_ptr[scalar] make_fixed_width_scalar[T](
T value
) except +libcudf_exception_handler

cdef unique_ptr[scalar] make_numeric_scalar(
data_type type_
) except +libcudf_exception_handler
cdef unique_ptr[scalar] make_empty_scalar_like(
const column_view &
) except +libcudf_exception_handler
93 changes: 90 additions & 3 deletions python/pylibcudf/pylibcudf/scalar.pyx
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

from cpython cimport bool as py_bool, datetime
from cython cimport no_gc_clear
from libc.stdint cimport int64_t
from libcpp cimport bool as cbool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.scalar.scalar_factories cimport make_empty_scalar_like
from pylibcudf.libcudf.scalar.scalar cimport (
scalar,
numeric_scalar,
)
from pylibcudf.libcudf.scalar.scalar_factories cimport (
make_empty_scalar_like,
make_string_scalar,
make_numeric_scalar,
)
from pylibcudf.libcudf.types cimport type_id


from rmm.pylibrmm.memory_resource cimport get_current_device_resource

from .column cimport Column
from .types cimport DataType

from functools import singledispatch

__all__ = ["Scalar"]


Expand Down Expand Up @@ -79,3 +93,76 @@ cdef class Scalar:
s.c_obj.swap(libcudf_scalar)
s._data_type = DataType.from_libcudf(s.get().type())
return s

@classmethod
def from_py(cls, py_val):
"""
Convert a Python standard library object to a Scalar.

Parameters
----------
py_val: bool, int, float, str, datetime.datetime, datetime.timedelta, list, dict
Value to convert to a pylibcudf.Scalar

Returns
-------
Scalar
New pylibcudf.Scalar
"""
return _from_py(py_val)

cdef Scalar _new_scalar(unique_ptr[scalar] c_obj, DataType dtype):
cdef Scalar s = Scalar.__new__(Scalar)
s.c_obj.swap(c_obj)
s._data_type = dtype
return s


@singledispatch
def _from_py(py_val):
raise TypeError(f"{type(py_val).__name__} cannot be converted to pylibcudf.Scalar")


@_from_py.register(dict)
@_from_py.register(list)
@_from_py.register(datetime.datetime)
@_from_py.register(datetime.timedelta)
def _(py_val):
raise NotImplementedError(
f"Conversion from {type(py_val).__name__} is currently not supported."
)


@_from_py.register(float)
def _(py_val):
cdef DataType dtype = DataType(type_id.FLOAT64)
cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
(<numeric_scalar[double]*>c_obj.get()).set_value(py_val)
cdef Scalar slr = _new_scalar(move(c_obj), dtype)
return slr


@_from_py.register(int)
def _(py_val):
cdef DataType dtype = DataType(type_id.INT64)
cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
(<numeric_scalar[int64_t]*>c_obj.get()).set_value(py_val)
cdef Scalar slr = _new_scalar(move(c_obj), dtype)
return slr


@_from_py.register(py_bool)
def _(py_val):
cdef DataType dtype = DataType(type_id.BOOL8)
cdef unique_ptr[scalar] c_obj = make_numeric_scalar(dtype.c_obj)
(<numeric_scalar[cbool]*>c_obj.get()).set_value(py_val)
cdef Scalar slr = _new_scalar(move(c_obj), dtype)
return slr


@_from_py.register(str)
def _(py_val):
cdef DataType dtype = DataType(type_id.STRING)
cdef unique_ptr[scalar] c_obj = make_string_scalar(py_val.encode())
cdef Scalar slr = _new_scalar(move(c_obj), dtype)
return slr
30 changes: 30 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_scalar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
import datetime

import pyarrow as pa
import pytest

import pylibcudf as plc


@pytest.mark.parametrize(
"val", [True, False, -1, 0, 1 - 1.0, 0.0, 1.52, "", "a1!"]
)
def test_from_py(val):
result = plc.Scalar.from_py(val)
expected = pa.scalar(val)
assert plc.interop.to_arrow(result).equals(expected)


@pytest.mark.parametrize(
"val", [datetime.datetime(2020, 1, 1), datetime.timedelta(1), [1], {1: 1}]
)
def test_from_py_notimplemented(val):
with pytest.raises(NotImplementedError):
plc.Scalar.from_py(val)


@pytest.mark.parametrize("val", [object, None])
def test_from_py_typeerror(val):
with pytest.raises(TypeError):
plc.Scalar.from_py(val)
Loading