-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Support third-party execution engines in Series.map #61467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
6f61d7b
ea45245
ef62074
b5e5519
30ca3bd
b32ae65
c3afd05
4a3bcfa
e838c4c
cae63ac
a4d8b4a
56c3ce0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,64 @@ | ||
import numpy as np | ||
|
||
from pandas import ( | ||
DataFrame, | ||
Series, | ||
) | ||
from pandas.api.executors import BaseExecutionEngine | ||
from pandas.core.groupby.base import transformation_kernels | ||
|
||
# There is no Series.cumcount or DataFrame.cumcount | ||
series_transform_kernels = [ | ||
x for x in sorted(transformation_kernels) if x != "cumcount" | ||
] | ||
frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"] | ||
|
||
|
||
class MockExecutionEngine(BaseExecutionEngine): | ||
""" | ||
Execution Engine to test if the execution engine interface receives and | ||
uses all parameters provided by the user. | ||
|
||
Making this engine work as the default Python engine by calling it, no extra | ||
functionality is implemented here. | ||
|
||
When testing, this will be called when this engine is provided, and then the | ||
same pandas.map and pandas.apply function will be called, but without engine, | ||
executing the default behavior from the python engine. | ||
""" | ||
|
||
def map(data, func, args, kwargs, decorator, skip_na): | ||
kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {} | ||
return data.map(func, na_action="ignore" if skip_na else None, **kwargs_to_pass) | ||
|
||
def apply(data, func, args, kwargs, decorator, axis): | ||
if isinstance(data, Series): | ||
return data.apply(func, convert_dtype=True, args=args, by_row=False) | ||
elif isinstance(data, DataFrame): | ||
return data.apply( | ||
func, | ||
axis=axis, | ||
raw=False, | ||
result_type=None, | ||
args=args, | ||
by_row="compat", | ||
**kwargs, | ||
) | ||
else: | ||
assert isinstance(data, np.ndarray) | ||
|
||
def wrap_function(func): | ||
# https://github.com/numpy/numpy/issues/8352 | ||
def wrapper(*args, **kwargs): | ||
result = func(*args, **kwargs) | ||
if isinstance(result, str): | ||
result = np.array(result, dtype=object) | ||
return result | ||
|
||
return wrapper | ||
|
||
return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs) | ||
|
||
|
||
class MockEngineDecorator: | ||
__pandas_udf__ = MockExecutionEngine |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import pytest | ||
|
||
from pandas.tests.apply.common import MockEngineDecorator | ||
|
||
|
||
@pytest.fixture(params=[None, MockEngineDecorator]) | ||
def engine(request): | ||
return request.param |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
timedelta_range, | ||
) | ||
import pandas._testing as tm | ||
from pandas.tests.apply.conftest import engine # noqa: F401 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious why this needs importing since it's already in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. I answered in a comment, so readers of that input don't need to ask themselves it. Most tests related to apply/map are in I moved the mock classes to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if your work in #61125 continues to expand, I would be OK moving this to the global |
||
|
||
|
||
def test_series_map_box_timedelta(): | ||
|
@@ -32,16 +33,20 @@ def f(x): | |
ser.map(f) | ||
|
||
|
||
def test_map_callable(datetime_series): | ||
def test_map_callable(datetime_series, engine): # noqa: F811 | ||
with np.errstate(all="ignore"): | ||
tm.assert_series_equal(datetime_series.map(np.sqrt), np.sqrt(datetime_series)) | ||
tm.assert_series_equal( | ||
datetime_series.map(np.sqrt, engine=engine), np.sqrt(datetime_series) | ||
) | ||
|
||
# map function element-wise | ||
tm.assert_series_equal(datetime_series.map(math.exp), np.exp(datetime_series)) | ||
tm.assert_series_equal( | ||
datetime_series.map(math.exp, engine=engine), np.exp(datetime_series) | ||
) | ||
|
||
# empty series | ||
s = Series(dtype=object, name="foo", index=Index([], name="bar")) | ||
rs = s.map(lambda x: x) | ||
rs = s.map(lambda x: x, engine=engine) | ||
tm.assert_series_equal(s, rs) | ||
|
||
# check all metadata (GH 9322) | ||
|
@@ -52,7 +57,7 @@ def test_map_callable(datetime_series): | |
|
||
# index but no data | ||
s = Series(index=[1, 2, 3], dtype=np.float64) | ||
rs = s.map(lambda x: x) | ||
rs = s.map(lambda x: x, engine=engine) | ||
tm.assert_series_equal(s, rs) | ||
|
||
|
||
|
@@ -269,10 +274,10 @@ def test_map_decimal(string_series): | |
assert isinstance(result.iloc[0], Decimal) | ||
|
||
|
||
def test_map_na_exclusion(): | ||
def test_map_na_exclusion(engine): # noqa: F811 | ||
s = Series([1.5, np.nan, 3, np.nan, 5]) | ||
|
||
result = s.map(lambda x: x * 2, na_action="ignore") | ||
result = s.map(lambda x: x * 2, na_action="ignore", engine=engine) | ||
exp = s * 2 | ||
tm.assert_series_equal(result, exp) | ||
|
||
|
@@ -628,3 +633,18 @@ def test_map_no_func_or_arg(): | |
def test_map_func_is_none(): | ||
with pytest.raises(ValueError, match="The `func` parameter is required"): | ||
Series([1, 2]).map(func=None) | ||
|
||
|
||
@pytest.mark.parametrize("func", [{}, {1: 2}, Series([3, 4])]) | ||
def test_map_engine_no_function(func): | ||
s = Series([1, 2]) | ||
|
||
with pytest.raises(ValueError, match="engine argument can only be specified"): | ||
s.map(func, engine="something") | ||
|
||
|
||
def test_map_engine_not_executor(): | ||
s = Series([1, 2]) | ||
|
||
with pytest.raises(ValueError, match="Not a valid engine: 'something'"): | ||
s.map(lambda x: x, engine="something") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we move this to the
conftest.py
as well? In the past, I've moved away from defining objects in nontest_
orconftest.py
files