From aa420375e24777fd52f04af5141c13637989a202 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Fri, 23 May 2025 22:47:32 +0700 Subject: [PATCH 1/8] Implemented NumbaExecutionEngine --- pandas/core/apply.py | 73 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2c96f1ef020ac..fe87b1d2beaa6 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -45,9 +45,9 @@ ABCSeries, ) -from pandas.core._numba.executor import generate_apply_looper import pandas.core.common as com from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core._numba.executor import generate_apply_looper from pandas.core.util.numba_ import ( get_jit_arguments, prepare_function_arguments, @@ -178,6 +178,57 @@ def apply( """ +class NumbaExecutionEngine(BaseExecutionEngine): + """ + Numba-based execution engine for pandas apply and map operations. + """ + + @staticmethod + def map( + data: np.ndarray | Series | DataFrame, + func, + args: tuple, + kwargs: dict, + engine_kwargs: dict | None, + skip_na: bool, + ): + """ + Elementwise map for the Numba engine. Currently not supported. + """ + raise NotImplementedError("Numba map is not implemented yet.") + + @staticmethod + def apply( + data: np.ndarray | Series | DataFrame, + func, + args: tuple, + kwargs: dict, + engine_kwargs: dict | None, + axis: int | str, + ): + """ + Apply `func` along the given axis using Numba. + """ + + looper_args, looper_kwargs = prepare_function_arguments( + func, # type: ignore[arg-type] + args, + kwargs, + num_required_args=1, + ) + # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has + # incompatible type "Callable[..., Any] | str | list[Callable + # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str | + # list[Callable[..., Any] | str]]"; expected "Hashable" + nb_looper = generate_apply_looper( + func, # type: ignore[arg-type] + **get_jit_arguments(engine_kwargs) + ) + result = nb_looper(data, axis, *looper_args) + # If we made the result 2-D, squeeze it back to 1-D + return np.squeeze(result) + + def frame_apply( obj: DataFrame, func: AggFuncType, @@ -1094,23 +1145,15 @@ def wrapper(*args, **kwargs): return wrapper if engine == "numba": - args, kwargs = prepare_function_arguments( - self.func, # type: ignore[arg-type] + engine_obj = NumbaExecutionEngine() + result = engine_obj.apply( + self.values, + self.func, self.args, self.kwargs, - num_required_args=1, - ) - # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has - # incompatible type "Callable[..., Any] | str | list[Callable - # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str | - # list[Callable[..., Any] | str]]"; expected "Hashable" - nb_looper = generate_apply_looper( - self.func, # type: ignore[arg-type] - **get_jit_arguments(engine_kwargs), + engine_kwargs, + self.axis, ) - result = nb_looper(self.values, self.axis, *args) - # If we made the result 2-D, squeeze it back to 1-D - result = np.squeeze(result) else: result = np.apply_along_axis( wrap_function(self.func), From db9f3b000f237a1fc580f3361e0984b410ee9d3e Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 24 May 2025 06:51:23 +0700 Subject: [PATCH 2/8] whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ab3316e7fca4c..6948ffcde40b2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -31,6 +31,7 @@ Other enhancements - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`) - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`) - :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface `_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`) +- Added :class:`pandas.core.apply.NumbaExecutionEngine` as the built-in ``numba`` execution engine for ``apply`` and ``map`` operations (:issue:`61458`) - Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`) - Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`) - :class:`pandas.api.typing.NoDefault` is available for typing ``no_default`` From 4cb240d95c139ef8956a0430287559a5d75a73bc Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 24 May 2025 06:56:06 +0700 Subject: [PATCH 3/8] precommit --- pandas/core/apply.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index fe87b1d2beaa6..ba240813d3229 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -45,9 +45,9 @@ ABCSeries, ) +from pandas.core._numba.executor import generate_apply_looper import pandas.core.common as com from pandas.core.construction import ensure_wrapped_if_datetimelike -from pandas.core._numba.executor import generate_apply_looper from pandas.core.util.numba_ import ( get_jit_arguments, prepare_function_arguments, @@ -211,7 +211,7 @@ def apply( """ looper_args, looper_kwargs = prepare_function_arguments( - func, # type: ignore[arg-type] + func, # type: ignore[arg-type] args, kwargs, num_required_args=1, @@ -221,8 +221,8 @@ def apply( # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str | # list[Callable[..., Any] | str]]"; expected "Hashable" nb_looper = generate_apply_looper( - func, # type: ignore[arg-type] - **get_jit_arguments(engine_kwargs) + func, # type: ignore[arg-type] + **get_jit_arguments(engine_kwargs), ) result = nb_looper(data, axis, *looper_args) # If we made the result 2-D, squeeze it back to 1-D From 97d9063dcc65968956b282b13fbb49337f0388b2 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 24 May 2025 07:21:10 +0700 Subject: [PATCH 4/8] Match function arguments --- pandas/core/apply.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index ba240813d3229..3d760eaa8705a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -189,7 +189,7 @@ def map( func, args: tuple, kwargs: dict, - engine_kwargs: dict | None, + decorator: Callable | None, skip_na: bool, ): """ @@ -203,7 +203,7 @@ def apply( func, args: tuple, kwargs: dict, - engine_kwargs: dict | None, + decorator: Callable, axis: int | str, ): """ @@ -222,7 +222,7 @@ def apply( # list[Callable[..., Any] | str]]"; expected "Hashable" nb_looper = generate_apply_looper( func, # type: ignore[arg-type] - **get_jit_arguments(engine_kwargs), + **get_jit_arguments(decorator), ) result = nb_looper(data, axis, *looper_args) # If we made the result 2-D, squeeze it back to 1-D From 69e0e355e14312e19b1341157d1b6e100f8dcb3d Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 24 May 2025 07:54:50 +0700 Subject: [PATCH 5/8] Fix CI --- pandas/core/apply.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 3d760eaa8705a..b765088308b2d 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -209,9 +209,12 @@ def apply( """ Apply `func` along the given axis using Numba. """ + engine_kwargs: dict[str, bool] | None = ( + decorator if isinstance(decorator, dict) else None + ) looper_args, looper_kwargs = prepare_function_arguments( - func, # type: ignore[arg-type] + func, args, kwargs, num_required_args=1, @@ -221,8 +224,8 @@ def apply( # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str | # list[Callable[..., Any] | str]]"; expected "Hashable" nb_looper = generate_apply_looper( - func, # type: ignore[arg-type] - **get_jit_arguments(decorator), + func, + **get_jit_arguments(engine_kwargs), ) result = nb_looper(data, axis, *looper_args) # If we made the result 2-D, squeeze it back to 1-D From 736507949fbc217fae93d061f02ab3f9e2899f05 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Wed, 28 May 2025 16:41:30 +0700 Subject: [PATCH 6/8] updated whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6948ffcde40b2..ea9b06a58be92 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -30,8 +30,8 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`) - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`) +- :meth:`DataFrame.apply` accepts Numba as an engine by passing the JIT decorator directly, e.g. ``df.apply(func, engine=numba.jit)`` (:issue:`61458`) - :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface `_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`) -- Added :class:`pandas.core.apply.NumbaExecutionEngine` as the built-in ``numba`` execution engine for ``apply`` and ``map`` operations (:issue:`61458`) - Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`) - Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`) - :class:`pandas.api.typing.NoDefault` is available for typing ``no_default`` From c605857d16bde78f6a4b0cc04556bcf24f7844bc Mon Sep 17 00:00:00 2001 From: arthurlw Date: Thu, 29 May 2025 22:18:39 +0700 Subject: [PATCH 7/8] Updated conditions and delegate method to numba.jit --- pandas/core/apply.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index b765088308b2d..a4cce45758feb 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -13,6 +13,7 @@ cast, ) +import numba import numpy as np from pandas._libs.internals import BlockValuesRefs @@ -1148,8 +1149,9 @@ def wrapper(*args, **kwargs): return wrapper if engine == "numba": - engine_obj = NumbaExecutionEngine() - result = engine_obj.apply( + if not hasattr(numba.jit, "__pandas_udf__"): + numba.jit.__pandas_udf__ = NumbaExecutionEngine + result = numba.jit.__pandas_udf__.apply( self.values, self.func, self.args, From 24a06150e01028a38f3466ded5c85e143ea41aef Mon Sep 17 00:00:00 2001 From: arthurlw Date: Tue, 3 Jun 2025 18:37:58 +0700 Subject: [PATCH 8/8] Added try and except to catch ImportError --- pandas/core/apply.py | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a4cce45758feb..760fd111f21ce 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -13,7 +13,6 @@ cast, ) -import numba import numpy as np from pandas._libs.internals import BlockValuesRefs @@ -1149,16 +1148,31 @@ def wrapper(*args, **kwargs): return wrapper if engine == "numba": - if not hasattr(numba.jit, "__pandas_udf__"): - numba.jit.__pandas_udf__ = NumbaExecutionEngine - result = numba.jit.__pandas_udf__.apply( - self.values, - self.func, - self.args, - self.kwargs, - engine_kwargs, - self.axis, - ) + try: + import numba + + if not hasattr(numba.jit, "__pandas_udf__"): + numba.jit.__pandas_udf__ = NumbaExecutionEngine + result = numba.jit.__pandas_udf__.apply( + self.values, + self.func, + self.args, + self.kwargs, + engine_kwargs, + self.axis, + ) + else: + raise ImportError + except ImportError: + engine_obj = NumbaExecutionEngine() + result = engine_obj.apply( + self.values, + self.func, + self.args, + self.kwargs, + engine_kwargs, + self.axis, + ) else: result = np.apply_along_axis( wrap_function(self.func),