diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py index 8e8dfee9990e3..923236c3a5fbd 100644 --- a/python/pyspark/pandas/data_type_ops/num_ops.py +++ b/python/pyspark/pandas/data_type_ops/num_ops.py @@ -247,14 +247,22 @@ def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not is_valid_operand_for_numeric_arithmetic(right): raise TypeError("True division can not be applied to given types.") + right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) def truediv(left: PySparkColumn, right: Any) -> PySparkColumn: - return F.when( - F.lit(right != 0) | F.lit(right).isNull(), - left.__div__(right), - ).otherwise(F.lit(np.inf).__div__(left)) + if not get_option("compute.ansi_mode_support"): + return F.when( + F.lit(right != 0) | F.lit(right).isNull(), + left.__div__(right), + ).otherwise(F.lit(np.inf).__div__(left)) + else: + return F.when( + right == 0, + F.when(left < 0, F.lit(float("-inf"))) + .when(left > 0, F.lit(float("inf"))) + .otherwise(F.lit(np.nan)), + ).otherwise(left / right) - right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return numpy_column_op(truediv)(left, right) def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: @@ -332,18 +340,26 @@ def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not is_valid_operand_for_numeric_arithmetic(right): raise TypeError("True division can not be applied to given types.") + right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) def truediv(left: PySparkColumn, right: Any) -> PySparkColumn: - return F.when( - F.lit(right != 0) | F.lit(right).isNull(), - left.__div__(right), - ).otherwise( - F.when(F.lit(left == np.inf) | F.lit(left == -np.inf), left).otherwise( - F.lit(np.inf).__div__(left) + if not get_option("compute.ansi_mode_support"): + return F.when( + F.lit(right != 0) | F.lit(right).isNull(), + left.__div__(right), + ).otherwise( + F.when(F.lit(left == np.inf) | F.lit(left == -np.inf), left).otherwise( + F.lit(np.inf).__div__(left) + ) ) - ) + else: + return F.when( + right == 0, + F.when(left < 0, F.lit(float("-inf"))) + .when(left > 0, F.lit(float("inf"))) + .otherwise(F.lit(np.nan)), + ).otherwise(left / right) - right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return numpy_column_op(truediv)(left, right) def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: diff --git a/python/pyspark/pandas/tests/computation/test_binary_ops.py b/python/pyspark/pandas/tests/computation/test_binary_ops.py index 857a2d0da0ce3..861a158bb4a7c 100644 --- a/python/pyspark/pandas/tests/computation/test_binary_ops.py +++ b/python/pyspark/pandas/tests/computation/test_binary_ops.py @@ -111,7 +111,6 @@ def test_binary_operator_sub(self): psdf = ps.DataFrame({"a": ["x"], "b": ["y"]}) self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] - psdf["b"]) - @unittest.skipIf(is_ansi_mode_test, ansi_mode_not_supported_message) def test_divide_by_zero_behavior(self): pdf = pd.DataFrame( { @@ -127,6 +126,19 @@ def test_divide_by_zero_behavior(self): # b / a: 0 divided by .. self.assert_eq(psdf["b"] / psdf["a"], pdf["b"] / pdf["a"]) + pdf = pd.DataFrame( + { + "a": [1, -1, 0], + "b": [0, 0, 0], + } + ) + psdf = ps.from_pandas(pdf) + # a / b: .. divide by zero + self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"]) + + # b / a: 0 divided by .. + self.assert_eq(psdf["b"] / psdf["a"], pdf["b"] / pdf["a"]) + def test_binary_operator_truediv(self): # Positive pdf = pd.DataFrame({"a": [3], "b": [2]})