Skip to content

Commit

Permalink
Fix test failures (#3362)
Browse files Browse the repository at this point in the history
  • Loading branch information
wjsi authored Nov 2, 2023
1 parent 0a42ba8 commit bcc0005
Show file tree
Hide file tree
Showing 21 changed files with 66 additions and 55 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/benchmark-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
git fetch upstream
git merge upstream/master
asv machine --yes
asv continuous -e -f 1.1 --strict upstream/master HEAD
asv continuous -e -f 1.1 upstream/master HEAD
if: ${{ steps.build.outcome == 'success' }}

- name: Publish benchmarks artifact
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/platform-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ jobs:
./ci/install-hadoop.sh
echo "import coverage; coverage.process_startup()" > \
$(python -c "import site; print(site.getsitepackages()[-1])")/coverage.pth
conda install -n test --quiet --yes -c conda-forge python=$PYTHON skein libffi conda-pack
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
sudo apt install -y g++-11
conda install -n test --quiet --yes -c conda-forge python=$PYTHON skein libffi conda-pack "grpcio<1.54"
fi
if [ -n "$WITH_VINEYARD" ]; then
pip install vineyard -i https://pypi.org/simple
Expand All @@ -104,8 +106,7 @@ jobs:
rm -fr /tmp/etcd-$ETCD_VER-linux-amd64.tar.gz /tmp/etcd-download-test
fi
if [ -n "$WITH_RAY" ] || [ -n "$WITH_RAY_DAG" ] || [ -n "$WITH_RAY_DEPLOY" ]; then
pip install "ray>=1.8.0,<2.4.0"
pip install "xgboost_ray<0.1.14" "protobuf<4"
pip install "ray>=1.8.0,<2.4.0" "xgboost<2" "xgboost_ray<0.1.14" "protobuf<4"
# Ray Datasets need pyarrow>=6.0.1
pip install "pyarrow>=6.0.1"
pip install lightgbm
Expand Down
6 changes: 4 additions & 2 deletions mars/dataframe/contrib/raydataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import operator
from functools import reduce

Expand Down Expand Up @@ -55,8 +56,9 @@ def __getstate__():
state.pop("dataframe", None)
return state

# `dataframe` is not serializable by ray.
dataset.__getstate__ = __getstate__
if not hasattr(type(dataset), "__getstate__"):
# if `dataframe` is not serializable by ray, patch our implementation
dataset.__getstate__ = __getstate__
return dataset


Expand Down
5 changes: 3 additions & 2 deletions mars/dataframe/contrib/raydataset/mldataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def __getstate__():
state.pop("dataframe", None)
return state

# `dataframe` is not serializable by ray.
dataset.__getstate__ = __getstate__
if not hasattr(dataset, "__getstate__"):
# `dataframe` is not serializable by ray.
dataset.__getstate__ = __getstate__
return dataset
6 changes: 1 addition & 5 deletions mars/dataframe/contrib/raydataset/tests/test_mldataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@

ray = lazy_import("ray")
ml_dataset = lazy_import("ray.util.data", rename="ml_dataset")

try:
import xgboost_ray
except ImportError: # pragma: no cover
xgboost_ray = None
xgboost_ray = lazy_import("xgboost_ray")
try:
import sklearn
except ImportError: # pragma: no cover
Expand Down
12 changes: 12 additions & 0 deletions mars/dataframe/datasource/read_raydataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,21 @@ def read_ray_dataset(ds, columns=None, incremental_index=False, **kwargs):
from ray.data.impl.pandas_block import PandasBlockSchema
except ImportError: # pragma: no cover
PandasBlockSchema = type(None)
try:
from ray.data.dataset import Schema as RayDatasetSchema
except ImportError:
RayDatasetSchema = type(None)

if isinstance(schema, PandasBlockSchema):
dtypes = pd.Series(schema.types, index=schema.names)
elif isinstance(schema, RayDatasetSchema):
dtypes = pd.Series(
[
t.to_pandas_dtype() if t is not object else np.dtype("O")
for t in schema.types
],
index=schema.names,
)
elif isinstance(schema, pa.Schema):
dtypes = schema.empty_table().to_pandas().dtypes
else:
Expand Down
4 changes: 0 additions & 4 deletions mars/dataframe/datasource/tests/test_datasource_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,10 +1288,6 @@ def test_read_raydataset(ray_start_regular, ray_create_mars_cluster):
pdf2,
)

# Test simple datasets
with pytest.raises(NotImplementedError):
ray.data.range(10).to_mars()


@require_ray
@pytest.mark.skipif(
Expand Down
32 changes: 19 additions & 13 deletions mars/dataframe/merge/tests/test_merge_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,11 +312,15 @@ def test_join_on(setup):
expected4.set_index("a2", inplace=True)
result4.set_index("a2", inplace=True)
pd.testing.assert_frame_equal(
sort_dataframe_inplace(expected4, 0), sort_dataframe_inplace(result4, 0)
sort_dataframe_inplace(expected4, 0, kind="mergesort"),
sort_dataframe_inplace(result4, 0, kind="mergesort"),
)


def test_merge_one_chunk(setup):
def sort_by_col1(df):
return df.sort_values(by=df.columns[1], kind="mergesort")

df1 = pd.DataFrame(
{"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
index=["a1", "a2", "a3", "a4"],
Expand Down Expand Up @@ -348,8 +352,8 @@ def test_merge_one_chunk(setup):
result = jdf.execute().fetch()

pd.testing.assert_frame_equal(
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
result.sort_values(by=result.columns[1]).reset_index(drop=True),
sort_by_col1(expected).reset_index(drop=True),
sort_by_col1(result).reset_index(drop=True),
)

# right have one chunk
Expand All @@ -361,8 +365,8 @@ def test_merge_one_chunk(setup):
result = jdf.execute().fetch()

pd.testing.assert_frame_equal(
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
result.sort_values(by=result.columns[1]).reset_index(drop=True),
sort_by_col1(expected).reset_index(drop=True),
sort_by_col1(result).reset_index(drop=True),
)

# left have one chunk and how="left", then one chunk tile
Expand All @@ -377,8 +381,8 @@ def test_merge_one_chunk(setup):
result = jdf.execute().fetch()

pd.testing.assert_frame_equal(
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
result.sort_values(by=result.columns[1]).reset_index(drop=True),
sort_by_col1(expected).reset_index(drop=True),
sort_by_col1(result).reset_index(drop=True),
)


Expand Down Expand Up @@ -418,7 +422,8 @@ def test_broadcast_merge(setup):
expected.set_index("key", inplace=True)
result.set_index("key", inplace=True)
pd.testing.assert_frame_equal(
sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0)
sort_dataframe_inplace(expected, 0, kind="mergesort"),
sort_dataframe_inplace(result, 0, kind="mergesort"),
)

# test broadcast right and how="left"
Expand All @@ -438,8 +443,8 @@ def test_broadcast_merge(setup):
expected.set_index("key", inplace=True)
result.set_index("key", inplace=True)
pd.testing.assert_frame_equal(
expected.sort_values(by=["key", "value_x"]),
result.sort_values(by=["key", "value_x"]),
expected.sort_values(by=["key", "value_x"], kind="mergesort"),
result.sort_values(by=["key", "value_x"], kind="mergesort"),
)

# test broadcast left
Expand All @@ -459,7 +464,8 @@ def test_broadcast_merge(setup):
expected.set_index("key", inplace=True)
result.set_index("key", inplace=True)
pd.testing.assert_frame_equal(
sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0)
sort_dataframe_inplace(expected, 0, kind="mergesort"),
sort_dataframe_inplace(result, 0, kind="mergesort"),
)

# test broadcast left and how="right"
Expand All @@ -479,8 +485,8 @@ def test_broadcast_merge(setup):
expected.set_index("key", inplace=True)
result.set_index("key", inplace=True)
pd.testing.assert_frame_equal(
expected.sort_values(by=["key", "value_x"]),
result.sort_values(by=["key", "value_x"]),
expected.sort_values(by=["key", "value_x"], kind="mergesort"),
result.sort_values(by=["key", "value_x"], kind="mergesort"),
)


Expand Down
4 changes: 2 additions & 2 deletions mars/dataframe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ def hash_dtypes(dtypes, size):
return [dtypes[index] for index in hashed_indexes]


def sort_dataframe_inplace(df, *axis):
def sort_dataframe_inplace(df, *axis, **kw):
for ax in axis:
df.sort_index(axis=ax, inplace=True)
df.sort_index(axis=ax, inplace=True, **kw)
return df


Expand Down
2 changes: 1 addition & 1 deletion mars/learn/contrib/lightgbm/_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def __call__(self):
elif hasattr(self.model, "classes_"):
dtype = np.array(self.model.classes_).dtype
else:
dtype = getattr(self.model, "out_dtype_", np.dtype("float"))
dtype = getattr(self.model, "out_dtype_", [np.dtype("float")])[0]

if self.output_types[0] == OutputType.tensor:
# tensor
Expand Down
6 changes: 3 additions & 3 deletions mars/learn/contrib/lightgbm/_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,11 @@ def execute(cls, ctx, op: "LGBMTrain"):
op.model_type == LGBMModelType.RANKER
or op.model_type == LGBMModelType.REGRESSOR
):
model.set_params(out_dtype_=np.dtype("float"))
model.set_params(out_dtype_=[np.dtype("float")])
elif hasattr(label_val, "dtype"):
model.set_params(out_dtype_=label_val.dtype)
model.set_params(out_dtype_=[label_val.dtype])
else:
model.set_params(out_dtype_=label_val.dtypes[0])
model.set_params(out_dtype_=[label_val.dtypes[0]])

ctx[op.outputs[0].key] = pickle.dumps(model)
finally:
Expand Down
1 change: 0 additions & 1 deletion mars/learn/contrib/lightgbm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import pandas as pd

from ....dataframe import DataFrame as MarsDataFrame, Series as MarsSeries
from ....lib.version import parse as parse_version
from ....tensor import tensor as mars_tensor


Expand Down
8 changes: 2 additions & 6 deletions mars/learn/contrib/lightgbm/tests/test_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@ def test_local_classifier(create_cluster):
# test sparse tensor
X_sparse_data = X_sparse
classifier = LGBMClassifier(n_estimators=2)
classifier.fit(
X_sparse_data, y_data, eval_set=[(X_sparse_data, y_data)]
)
classifier.fit(X_sparse_data, y_data, eval_set=[(X_sparse_data, y_data)])
prediction = classifier.predict(X_sparse_data)

assert prediction.ndim == 1
Expand Down Expand Up @@ -118,9 +116,7 @@ def test_local_classifier(create_cluster):

# should raise error if weight.ndim > 1
with pytest.raises(ValueError):
LGBMClassifier(n_estimators=2).fit(
X, y_df, sample_weight=mt.random.rand(1, 1)
)
LGBMClassifier(n_estimators=2).fit(X, y_df, sample_weight=mt.random.rand(1, 1))

# test binary classifier
new_y = (y_data > 0.5).astype(mt.int32)
Expand Down
2 changes: 1 addition & 1 deletion mars/learn/linear_model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def fit(self, X, y, sample_weight=None):
self.coef_.execute()
except LinAlgError:
# TODO: implement linalg.lstsq first
raise NotImplementedError("Does not support sigular matrix!")
raise NotImplementedError("Does not support singular matrix!")

if y.ndim == 1:
self.coef_ = mt.ravel(self.coef_)
Expand Down
4 changes: 2 additions & 2 deletions mars/learn/linear_model/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_linear_regression(setup):
assert_array_almost_equal(reg.predict(X), model.predict(X))

# Regular model fitting, #samples <= 2, # features < 2
error_msg = re.escape("Does not support sigular matrix!")
error_msg = re.escape("Does not support singular matrix!")

X = [[1], [2]]
Y = [1, 2]
Expand All @@ -69,7 +69,7 @@ def test_linear_regression(setup):
assert_array_almost_equal(reg.predict(X), model.predict(X))

# Extra case #1: singular matrix, degenerate input
error_msg = re.escape("Does not support sigular matrix!")
error_msg = re.escape("Does not support singular matrix!")

X = [[1]]
Y = [0]
Expand Down
2 changes: 1 addition & 1 deletion mars/learn/metrics/pairwise/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"precomputed": None, # HACK: precomputed is always allowed, never called
}

# These distances recquire boolean tensors, when using mars.tensor.spatial.distance
# These distances require boolean tensors, when using mars.tensor.spatial.distance
PAIRWISE_BOOLEAN_FUNCTIONS = [
"dice",
"jaccard",
Expand Down
10 changes: 5 additions & 5 deletions mars/storage/vineyard.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,11 @@ async def setup(cls, **kwargs) -> Tuple[Dict, Dict]:
vineyard_store = None
else:
vineyard_store = vineyard.deploy.local.start_vineyardd(
etcd_endpoints,
etcd_prefix,
vineyardd_path,
vineyard_size,
vineyard_socket,
etcd_endpoints=etcd_endpoints,
etcd_prefix=etcd_prefix,
vineyardd_path=vineyardd_path,
size=vineyard_size,
socket=vineyard_socket,
rpc=False,
)
vineyard_socket = (
Expand Down
2 changes: 1 addition & 1 deletion mars/tensor/base/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def tile(A, reps):
behavior, promote `A` to d-dimensions manually before calling this
function.
If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it.
If ``A.ndim > d``, `reps` is promoted to `A`.ndim by prepending 1's to it.
Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as
(1, 1, 2, 2).
Expand Down
2 changes: 1 addition & 1 deletion mars/tensor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ def fetch_corner_data(tensor, session=None):
# the tensor must have been executed,
# thus the size could not be NaN
if tensor.size > threshold:
# two edges for each exis
# two edges for each axis
indices_iter = list(itertools.product(*(range(2) for _ in range(tensor.ndim))))
corners = np.empty(shape=(2,) * tensor.ndim, dtype=object)
shape = [0 for _ in range(tensor.ndim)]
Expand Down
2 changes: 2 additions & 0 deletions mars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,10 @@ def __getattr__(self, item):
elif locals is not None:
locals[rename] = real_mod
ret = getattr(real_mod, item)

for on_load_func in self._on_loads:
on_load_func()

# make sure on_load hooks only executed once
self._on_loads = []
return ret
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ install_requires =
pandas>=1.0.0,<2.0.0
scipy>=1.0.0
scikit-learn>=0.20
numexpr>=2.6.4,!=2.8.5
numexpr>=2.6.4,!=2.8.5,!=2.8.6
cloudpickle>=1.5.0
pyyaml>=5.1
psutil>=5.9.0
Expand Down

0 comments on commit bcc0005

Please sign in to comment.