From 576923b814bb9c2e0960d7956ef1337521b5aa14 Mon Sep 17 00:00:00 2001 From: wjsi Date: Mon, 2 Oct 2023 23:05:20 +0800 Subject: [PATCH] Fix test failures --- .../merge/tests/test_merge_execution.py | 32 +++++++++++-------- mars/dataframe/utils.py | 4 +-- mars/learn/contrib/lightgbm/core.py | 1 - .../contrib/lightgbm/tests/test_classifier.py | 8 ++--- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/mars/dataframe/merge/tests/test_merge_execution.py b/mars/dataframe/merge/tests/test_merge_execution.py index 4bb8292ea1..62281b7199 100644 --- a/mars/dataframe/merge/tests/test_merge_execution.py +++ b/mars/dataframe/merge/tests/test_merge_execution.py @@ -312,11 +312,15 @@ def test_join_on(setup): expected4.set_index("a2", inplace=True) result4.set_index("a2", inplace=True) pd.testing.assert_frame_equal( - sort_dataframe_inplace(expected4, 0), sort_dataframe_inplace(result4, 0) + sort_dataframe_inplace(expected4, 0, kind="mergesort"), + sort_dataframe_inplace(result4, 0, kind="mergesort"), ) def test_merge_one_chunk(setup): + def sort_by_col1(df): + return df.sort_values(by=df.columns[1], kind="mergesort") + df1 = pd.DataFrame( {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]}, index=["a1", "a2", "a3", "a4"], @@ -348,8 +352,8 @@ def test_merge_one_chunk(setup): result = jdf.execute().fetch() pd.testing.assert_frame_equal( - expected.sort_values(by=expected.columns[1]).reset_index(drop=True), - result.sort_values(by=result.columns[1]).reset_index(drop=True), + sort_by_col1(expected).reset_index(drop=True), + sort_by_col1(result).reset_index(drop=True), ) # right have one chunk @@ -361,8 +365,8 @@ def test_merge_one_chunk(setup): result = jdf.execute().fetch() pd.testing.assert_frame_equal( - expected.sort_values(by=expected.columns[1]).reset_index(drop=True), - result.sort_values(by=result.columns[1]).reset_index(drop=True), + sort_by_col1(expected).reset_index(drop=True), + sort_by_col1(result).reset_index(drop=True), ) # left have one chunk and how="left", then one chunk tile @@ -377,8 +381,8 @@ def test_merge_one_chunk(setup): result = jdf.execute().fetch() pd.testing.assert_frame_equal( - expected.sort_values(by=expected.columns[1]).reset_index(drop=True), - result.sort_values(by=result.columns[1]).reset_index(drop=True), + sort_by_col1(expected).reset_index(drop=True), + sort_by_col1(result).reset_index(drop=True), ) @@ -418,7 +422,8 @@ def test_broadcast_merge(setup): expected.set_index("key", inplace=True) result.set_index("key", inplace=True) pd.testing.assert_frame_equal( - sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0) + sort_dataframe_inplace(expected, 0, kind="mergesort"), + sort_dataframe_inplace(result, 0, kind="mergesort"), ) # test broadcast right and how="left" @@ -438,8 +443,8 @@ def test_broadcast_merge(setup): expected.set_index("key", inplace=True) result.set_index("key", inplace=True) pd.testing.assert_frame_equal( - expected.sort_values(by=["key", "value_x"]), - result.sort_values(by=["key", "value_x"]), + expected.sort_values(by=["key", "value_x"], kind="mergesort"), + result.sort_values(by=["key", "value_x"], kind="mergesort"), ) # test broadcast left @@ -459,7 +464,8 @@ def test_broadcast_merge(setup): expected.set_index("key", inplace=True) result.set_index("key", inplace=True) pd.testing.assert_frame_equal( - sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0) + sort_dataframe_inplace(expected, 0, kind="mergesort"), + sort_dataframe_inplace(result, 0, kind="mergesort"), ) # test broadcast left and how="right" @@ -479,8 +485,8 @@ def test_broadcast_merge(setup): expected.set_index("key", inplace=True) result.set_index("key", inplace=True) pd.testing.assert_frame_equal( - expected.sort_values(by=["key", "value_x"]), - result.sort_values(by=["key", "value_x"]), + expected.sort_values(by=["key", "value_x"], kind="mergesort"), + result.sort_values(by=["key", "value_x"], kind="mergesort"), ) diff --git a/mars/dataframe/utils.py b/mars/dataframe/utils.py index 513c99208c..9df187bb6a 100644 --- a/mars/dataframe/utils.py +++ b/mars/dataframe/utils.py @@ -106,9 +106,9 @@ def hash_dtypes(dtypes, size): return [dtypes[index] for index in hashed_indexes] -def sort_dataframe_inplace(df, *axis): +def sort_dataframe_inplace(df, *axis, **kw): for ax in axis: - df.sort_index(axis=ax, inplace=True) + df.sort_index(axis=ax, inplace=True, **kw) return df diff --git a/mars/learn/contrib/lightgbm/core.py b/mars/learn/contrib/lightgbm/core.py index ff050cdbb0..7da06cfa56 100644 --- a/mars/learn/contrib/lightgbm/core.py +++ b/mars/learn/contrib/lightgbm/core.py @@ -20,7 +20,6 @@ import pandas as pd from ....dataframe import DataFrame as MarsDataFrame, Series as MarsSeries -from ....lib.version import parse as parse_version from ....tensor import tensor as mars_tensor diff --git a/mars/learn/contrib/lightgbm/tests/test_classifier.py b/mars/learn/contrib/lightgbm/tests/test_classifier.py index db7425fc86..28fd623421 100644 --- a/mars/learn/contrib/lightgbm/tests/test_classifier.py +++ b/mars/learn/contrib/lightgbm/tests/test_classifier.py @@ -75,9 +75,7 @@ def test_local_classifier(create_cluster): # test sparse tensor X_sparse_data = X_sparse classifier = LGBMClassifier(n_estimators=2) - classifier.fit( - X_sparse_data, y_data, eval_set=[(X_sparse_data, y_data)] - ) + classifier.fit(X_sparse_data, y_data, eval_set=[(X_sparse_data, y_data)]) prediction = classifier.predict(X_sparse_data) assert prediction.ndim == 1 @@ -118,9 +116,7 @@ def test_local_classifier(create_cluster): # should raise error if weight.ndim > 1 with pytest.raises(ValueError): - LGBMClassifier(n_estimators=2).fit( - X, y_df, sample_weight=mt.random.rand(1, 1) - ) + LGBMClassifier(n_estimators=2).fit(X, y_df, sample_weight=mt.random.rand(1, 1)) # test binary classifier new_y = (y_data > 0.5).astype(mt.int32)