Add Python 3.11 support to Kedro (#2851)

* add 3.11 Signed-off-by: SajidAlamQB <[email protected]> * Update setup.py Signed-off-by: SajidAlamQB <[email protected]> * limit number of python version tests Signed-off-by: SajidAlamQB <[email protected]> * fix setup.py Signed-off-by: SajidAlamQB <[email protected]> * delta-spark version Signed-off-by: SajidAlamQB <[email protected]> * try original setup Signed-off-by: SajidAlamQB <[email protected]> * remove 3.11 from circleci Signed-off-by: SajidAlamQB <[email protected]> * Update setup.py Signed-off-by: SajidAlamQB <[email protected]> * Update setup.py Signed-off-by: SajidAlamQB <[email protected]> * pin matplotlib to 3.7.2 for python 3.11 Signed-off-by: SajidAlamQB <[email protected]> * Update test_import.py Signed-off-by: SajidAlamQB <[email protected]> * add noks test adjustments Signed-off-by: SajidAlamQB <[email protected]> * Update setup.py Signed-off-by: SajidAlamQB <[email protected]> * Update setup.py Signed-off-by: SajidAlamQB <[email protected]> * replace eq with is for multi_catalog Signed-off-by: SajidAlamQB <[email protected]> * minor fixes Signed-off-by: SajidAlamQB <[email protected]> * remove matplotlib 3.11 pin Signed-off-by: SajidAlamQB <[email protected]> * matplotlib pin for 3.11 Signed-off-by: SajidAlamQB <[email protected]> * run original matplotlib pin with ubuntu only Signed-off-by: SajidAlamQB <[email protected]> * run circleci Signed-off-by: SajidAlamQB <[email protected]> * pin spark < 3.4 as its breaking Signed-off-by: SajidAlamQB <[email protected]> * spark datasets revert earlier changes back to original Signed-off-by: SajidAlamQB <[email protected]> * delta Signed-off-by: SajidAlamQB <[email protected]> * remove all windows tests from GHA Signed-off-by: SajidAlamQB <[email protected]> * skip coverage as not needed Signed-off-by: SajidAlamQB <[email protected]> * Checking only WIN tests (will revert) Signed-off-by: SajidAlamQB <[email protected]> * Update unit-tests.yml Signed-off-by: SajidAlamQB <[email protected]> * undo previous commit Signed-off-by: SajidAlamQB <[email protected]> * add msbuild Signed-off-by: SajidAlamQB <[email protected]> * add ubuntu tests back in and remove 3.11 tests from circleci add in msbuild for the other tests Signed-off-by: SajidAlamQB <[email protected]> * set hdf5 disable check and lint Signed-off-by: SajidAlamQB <[email protected]> * pin tables for 3.8 Signed-off-by: SajidAlamQB <[email protected]> * pin 3.8 tables Signed-off-by: SajidAlamQB <[email protected]> * lint Signed-off-by: SajidAlamQB <[email protected]> * remove 3.11 completely from circleci and tidy up Signed-off-by: SajidAlamQB <[email protected]> * add 3.11 to pyarrow pin for docs Signed-off-by: SajidAlamQB <[email protected]> * undo previous commit Signed-off-by: SajidAlamQB <[email protected]> * update rtd to python 3.11 Signed-off-by: SajidAlamQB <[email protected]> * try 3.10 on rtd Signed-off-by: SajidAlamQB <[email protected]> * fix outdated pylint to ruff Signed-off-by: SajidAlamQB <[email protected]> * changes based on review Signed-off-by: SajidAlamQB <[email protected]> * add mock for pyarrow and deltalake for docs Signed-off-by: SajidAlamQB <[email protected]> * undo previous commit Signed-off-by: SajidAlamQB <[email protected]> * Update setup.py Signed-off-by: SajidAlamQB <[email protected]> * Update .readthedocs.yml Signed-off-by: SajidAlamQB <[email protected]> * Update conf.py Signed-off-by: SajidAlamQB <[email protected]> * changes based on review Signed-off-by: SajidAlamQB <[email protected]> * fix comments Signed-off-by: SajidAlamQB <[email protected]> * test running 3.10 parallel Signed-off-by: SajidAlamQB <[email protected]> * Add to release notes and update badges on docs Signed-off-by: SajidAlamQB <[email protected]> * Add migration tips Signed-off-by: SajidAlamQB <[email protected]> --------- Signed-off-by: SajidAlamQB <[email protected]>
kedro-org · Aug 8, 2023 · 654ede7 · 654ede7
1 parent 9a067d1
commit 654ede7
Show file tree

Hide file tree

Showing 23 changed files with 187 additions and 152 deletions.
diff --git a/.github/workflows/all-checks.yml b/.github/workflows/all-checks.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-latest, windows-latest ]
-        python-version: [ "3.7", "3.8", "3.9", "3.10" ]
+        python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
     uses: ./.github/workflows/unit-tests.yml
     with:
       os: ${{ matrix.os }}
@@ -31,7 +31,7 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-latest ]
-        python-version: [ "3.10" ]
+        python-version: [ "3.11" ]
     uses: ./.github/workflows/lint.yml
     with:
       os: ${{ matrix.os }}
@@ -41,7 +41,7 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-latest, windows-latest ]
-        python-version: [ "3.7", "3.8", "3.9", "3.10" ]
+        python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
     uses: ./.github/workflows/e2e-tests.yml
     with:
       os: ${{ matrix.os }}
@@ -51,7 +51,7 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-latest, windows-latest ]
-        python-version: [ "3.7", "3.8", "3.9", "3.10" ]
+        python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
     uses: ./.github/workflows/pip-compile.yml
     with:
       os: ${{ matrix.os }}

diff --git a/.github/workflows/docs-only-checks.yml b/.github/workflows/docs-only-checks.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-latest ]
-        python-version: [ "3.7", "3.8", "3.9", "3.10" ]
+        python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
     uses: ./.github/workflows/lint.yml
     with:
       os: ${{ matrix.os }}

diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
@@ -35,6 +35,9 @@ jobs:
         with:
           path: ~\AppData\Local\pip\Cache
           key: ${{inputs.os}}-python-${{inputs.python-version}}
+      - name: Add MSBuild to PATH
+        if: inputs.os == 'windows-latest'
+        uses: microsoft/setup-msbuild@v1
       - name: Install dependencies
         run: |
           make install-test-requirements

diff --git a/.github/workflows/pip-compile.yml b/.github/workflows/pip-compile.yml
@@ -31,6 +31,9 @@ jobs:
         with:
           path: ~\AppData\Local\pip\Cache
           key: ${{inputs.os}}-python-${{inputs.python-version}}
+      - name: Add MSBuild to PATH
+        if: inputs.os == 'windows-latest'
+        uses: microsoft/setup-msbuild@v1
       - name: Install dependencies
         run: |
           make install-test-requirements

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -30,6 +30,9 @@ jobs:
         with:
           path: ~\AppData\Local\pip\Cache
           key: ${{inputs.os}}-python-${{inputs.python-version}}
+      - name: Add MSBuild to PATH
+        if: inputs.os == 'windows-latest'
+        uses: microsoft/setup-msbuild@v1
       - name: Install dependencies
         run: |
             make install-test-requirements
@@ -39,15 +42,9 @@ jobs:
         run: pip install tables
       - name: pip freeze
         run: pip freeze
-      - name: Run unit tests sequentially
-        if: inputs.os == 'ubuntu-latest' && inputs.python-version == '3.10'
-        run: make test-sequential
       - name: Run unit tests
-        if: inputs.os == 'ubuntu-latest' && inputs.python-version != '3.10'
+        if: inputs.os == 'ubuntu-latest'
         run: make test
-      - name: Run unit tests without spark sequentially (Windows)
-        if: inputs.os == 'windows-latest' && inputs.python-version == '3.10'
-        run: make test-no-spark-sequential
       - name: Run unit tests without spark (Windows)
-        if: inputs.os == 'windows-latest' && inputs.python-version != '3.10'
+        if: inputs.os == 'windows-latest'
         run: make test-no-spark
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 ![Kedro Logo Banner - Light](.github/demo-dark.png#gh-dark-mode-only)
 ![Kedro Logo Banner - Dark](.github/demo-light.png#gh-light-mode-only)
-[![Python version](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-blue.svg)](https://pypi.org/project/kedro/)
+[![Python version](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg)](https://pypi.org/project/kedro/)
 [![PyPI version](https://badge.fury.io/py/kedro.svg)](https://pypi.org/project/kedro/)
 [![Conda version](https://img.shields.io/conda/vn/conda-forge/kedro.svg)](https://anaconda.org/conda-forge/kedro)
 [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/kedro-org/kedro/blob/main/LICENSE.md)

diff --git a/RELEASE.md b/RELEASE.md
@@ -12,13 +12,19 @@
 
 ## Major features and improvements
 * Allowed registering of custom resolvers to `OmegaConfigLoader` through `CONFIG_LOADER_ARGS`.
+* Added support for Python 3.11. This includes tackling challenges like dependency pinning and test adjustments to ensure a smooth experience. Detailed migration tips are provided below for further context.
 
 ## Bug fixes and other changes
 * Updated `kedro pipeline create` and `kedro catalog create` to use new `/conf` file structure.
 
 ## Documentation changes
 * Added migration guide from the `ConfigLoader` to the `OmegaConfigLoader`. The `ConfigLoader` is deprecated and will be removed in the `0.19.0` release.
 
+## Migration Tips for Python 3.11:
+* PyTables on Windows: Users on Windows with Python >=3.8 should note we've pinned `pytables` to `3.8.0` due to compatibility issues.
+* Spark Dependency: We've set an upper version limit for `pyspark` at <3.4 due to breaking changes in 3.4.
+* Testing with Python 3.10: The latest `moto` version now supports parallel test execution for Python 3.10, resolving previous issues.
+
 ## Breaking changes to the API
 
 ## Upcoming deprecations for Kedro 0.19.0

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -226,7 +226,8 @@
     "https://github.com/kedro-org/kedro/blob/main/README.md#the-humans-behind-kedro",  # "anchor not found" but is valid
     "https://opensource.org/license/apache2-0-php/",
     "https://docs.github.com/en/rest/overview/other-authentication-methods#via-username-and-password",
-    "https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/api/snowflake.snowpark.DataFrameWriter.saveAsTable.html"
+    "https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/api/snowflake.snowpark.DataFrameWriter.saveAsTable.html",
+    "https://www.educative.io/blog/advanced-yaml-syntax-cheatsheet#anchors"
 ]
 
 # retry before render a link broken (fix for "too many requests")

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -23,9 +23,9 @@ Welcome to Kedro's documentation!
     :target: https://opensource.org/license/apache2-0-php/
     :alt: License is Apache 2.0
 
-.. image:: https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-blue.svg
+.. image:: https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg
     :target: https://pypi.org/project/kedro/
-    :alt: Python version 3.7, 3.8, 3.9, 3.10
+    :alt: Python version 3.7, 3.8, 3.9, 3.10, 3.11
 
 .. image:: https://badge.fury.io/py/kedro.svg
     :target: https://pypi.org/project/kedro/

diff --git a/kedro/__init__.py b/kedro/__init__.py
@@ -18,7 +18,7 @@ class KedroPythonVersionWarning(UserWarning):
 if not sys.warnoptions:
     warnings.simplefilter("error", KedroPythonVersionWarning)
 
-if sys.version_info >= (3, 11):
+if sys.version_info >= (3, 12):
     warnings.warn(
         """Kedro is not yet fully compatible with this Python version.
 To proceed at your own risk and ignore this warning,

diff --git a/kedro/extras/datasets/spark/deltatable_dataset.py b/kedro/extras/datasets/spark/deltatable_dataset.py
@@ -100,7 +100,12 @@ def _exists(self) -> bool:
         try:
             self._get_spark().read.load(path=load_path, format="delta")
         except AnalysisException as exception:
-            if "is not a Delta table" in exception.desc:
+            # `AnalysisException.desc` is deprecated with pyspark >= 3.4
+            message = (
+                exception.desc if hasattr(exception, "desc") else exception.message
+            )
+
+            if "Path does not exist:" in message or "is not a Delta table" in message:
                 return False
             raise
 

diff --git a/kedro/extras/datasets/spark/spark_dataset.py b/kedro/extras/datasets/spark/spark_dataset.py
@@ -41,7 +41,8 @@ def _parse_glob_pattern(pattern: str) -> str:
 
 def _split_filepath(filepath: str) -> Tuple[str, str]:
     split_ = filepath.split("://", 1)
-    if len(split_) == 2:  # noqa: PLR2004
+    MIN_SPLIT_SIZE = 2
+    if len(split_) == MIN_SPLIT_SIZE:
         return split_[0] + "://", split_[1]
     return "", split_[0]
 
@@ -232,7 +233,7 @@ class SparkDataSet(AbstractVersionedDataSet[DataFrame, DataFrame]):
     DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
     DEFAULT_SAVE_ARGS = {}  # type: Dict[str, Any]
 
-    def __init__(  # noqa: too-many-arguments
+    def __init__(  # ruff: noqa: PLR0913
         self,
         filepath: str,
         file_format: str = "parquet",
@@ -401,10 +402,11 @@ def _exists(self) -> bool:
         try:
             self._get_spark().read.load(load_path, self._file_format)
         except AnalysisException as exception:
-            if (
-                exception.desc.startswith("Path does not exist:")
-                or "is not a Delta table" in exception.desc
-            ):
+            # `AnalysisException.desc` is deprecated with pyspark >= 3.4
+            message = (
+                exception.desc if hasattr(exception, "desc") else exception.message
+            )
+            if "Path does not exist:" in message or "is not a Delta table" in message:
                 return False
             raise
         return True

diff --git a/kedro/extras/datasets/spark/spark_jdbc_dataset.py b/kedro/extras/datasets/spark/spark_jdbc_dataset.py
@@ -168,7 +168,7 @@ def _describe(self) -> Dict[str, Any]:
         }
 
     @staticmethod
-    def _get_spark():
+    def _get_spark():  # pragma: no cover
         return SparkSession.builder.getOrCreate()
 
     def _load(self) -> DataFrame:

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 # at least 1.3 to be able to use XMLDataSet and pandas integration with fsspec
 PANDAS = "pandas~=1.3"
-SPARK = "pyspark>=2.2, <4.0"
+SPARK = "pyspark>=2.2, <3.4"
 HDFS = "hdfs>=2.5.8, <3.0"
 S3FS = "s3fs>=0.3.0, <0.5"
 
@@ -30,7 +30,7 @@ def _collect_requirements(requires):
     "geopandas.GeoJSONDataSet": ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"]
 }
 matplotlib_require = {"matplotlib.MatplotlibWriter": ["matplotlib>=3.0.3, <4.0"]}
-holoviews_require = {"holoviews.HoloviewsWriter": ["holoviews~=1.13.0"]}
+holoviews_require = {"holoviews.HoloviewsWriter": ["holoviews>=1.13.0"]}
 networkx_require = {"networkx.NetworkXDataSet": ["networkx~=2.4"]}
 pandas_require = {
     "pandas.CSVDataSet": [PANDAS],
@@ -143,14 +143,15 @@ def _collect_requirements(requires):
     "compress-pickle[lz4]~=2.1.0",
     "coverage[toml]",
     "dask[complete]~=2021.10",  # pinned by Snyk to avoid a vulnerability
-    "delta-spark~=1.2.1",  # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070
+    "delta-spark>=1.2.1; python_version >= '3.11'",  # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070
+    "delta-spark~=1.2.1; python_version < '3.11'",
     "dill~=0.3.1",
     "filelock>=3.4.0, <4.0",
     "gcsfs>=2021.4, <=2023.1; python_version == '3.7'",
     "gcsfs>=2023.1, <2023.3; python_version >= '3.8'",
     "geopandas>=0.6.0, <1.0",
     "hdfs>=2.5.8, <3.0",
-    "holoviews~=1.13.0",
+    "holoviews>=1.13.0",
     "import-linter[toml]==1.8.0",
     "ipython>=7.31.1, <8.0; python_version < '3.8'",
     "ipython~=8.10; python_version >= '3.8'",
@@ -162,23 +163,25 @@ def _collect_requirements(requires):
     "jupyter~=1.0",
     "lxml~=4.6",
     "matplotlib>=3.0.3, <3.4; python_version < '3.10'",  # 3.4.0 breaks holoviews
-    "matplotlib>=3.5, <3.6; python_version == '3.10'",
+    "matplotlib>=3.5, <3.6; python_version >= '3.10'",
     "memory_profiler>=0.50.0, <1.0",
     "moto==1.3.7; python_version < '3.10'",
-    "moto==3.0.4; python_version == '3.10'",
+    "moto==4.1.12; python_version >= '3.10'",
     "networkx~=2.4",
     "opencv-python~=4.5.5.64",
     "openpyxl>=3.0.3, <4.0",
-    "pandas-gbq>=0.12.0, <0.18.0",
+    "pandas-gbq>=0.12.0, <0.18.0; python_version < '3.11'",
+    "pandas-gbq>=0.18.0; python_version >= '3.11'",
     "pandas~=1.3  # 1.3 for read_xml/to_xml",
     "Pillow~=9.0",
     "plotly>=4.8.0, <6.0",
     "pre-commit>=2.9.2, <3.0",  # The hook `mypy` requires pre-commit version 2.9.2.
-    "psutil~=5.8",
-    "pyarrow>=6.0",
+    "pyarrow>=1.0; python_version < '3.11'",
+    "pyarrow>=7.0; python_version >= '3.11'",  # Adding to avoid numpy build errors
     "pylint>=2.17.0, <3.0",
     "pyproj~=3.0",
-    "pyspark>=2.2, <4.0",
+    "pyspark>=2.2, <3.4; python_version < '3.11'",
+    "pyspark>=3.4; python_version >= '3.11'",
     "pytest-cov~=3.0",
     "pytest-mock>=1.7.1, <2.0",
     "pytest-xdist[psutil]~=2.2.1",
@@ -187,10 +190,12 @@ def _collect_requirements(requires):
     "requests-mock~=1.6",
     "requests~=2.20",
     "s3fs>=0.3.0, <0.5",  # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem.
-    "scikit-learn~=1.0.2",
-    "scipy~=1.7.3",
+    "scikit-learn>=1.0.2,<2",
+    "scipy>=1.7.3",
+    "semver",
     "SQLAlchemy~=1.2",
-    "tables~=3.6.0; platform_system == 'Windows' and python_version<'3.9'",
+    "tables~=3.6.0; platform_system == 'Windows' and python_version<'3.8'",
+    "tables~=3.8.0; platform_system == 'Windows' and python_version>='3.8'",  # Import issues with python 3.8 with pytables pinning to 3.8.0 fixes this https://github.com/PyTables/PyTables/issues/933#issuecomment-1555917593
     "tables~=3.6; platform_system != 'Windows'",
     "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
     # https://developer.apple.com/metal/tensorflow-plugin/

diff --git a/tests/extras/datasets/spark/test_deltatable_dataset.py b/tests/extras/datasets/spark/test_deltatable_dataset.py
@@ -1,15 +1,19 @@
 import pytest
 from delta import DeltaTable
+from pyspark import __version__
 from pyspark.sql import SparkSession
 from pyspark.sql.types import IntegerType, StringType, StructField, StructType
 from pyspark.sql.utils import AnalysisException
+from semver import VersionInfo
 
 from kedro.extras.datasets.spark import DeltaTableDataSet, SparkDataSet
 from kedro.io import DataCatalog, DatasetError
 from kedro.pipeline import node
 from kedro.pipeline.modular_pipeline import pipeline as modular_pipeline
 from kedro.runner import ParallelRunner
 
+SPARK_VERSION = VersionInfo.parse(__version__)
+
 
 @pytest.fixture
 def sample_spark_df():
@@ -65,10 +69,16 @@ def test_exists(self, tmp_path, sample_spark_df):
 
     def test_exists_raises_error(self, mocker):
         delta_ds = DeltaTableDataSet(filepath="")
-        mocker.patch.object(
-            delta_ds, "_get_spark", side_effect=AnalysisException("Other Exception", [])
-        )
-
+        if SPARK_VERSION.match(">=3.4.0"):
+            mocker.patch.object(
+                delta_ds, "_get_spark", side_effect=AnalysisException("Other Exception")
+            )
+        else:
+            mocker.patch.object(
+                delta_ds,
+                "_get_spark",
+                side_effect=AnalysisException("Other Exception", []),
+            )
         with pytest.raises(DatasetError, match="Other Exception"):
             delta_ds.exists()
 

diff --git a/tests/extras/datasets/spark/test_spark_dataset.py b/tests/extras/datasets/spark/test_spark_dataset.py
@@ -7,6 +7,7 @@
 import pandas as pd
 import pytest
 from moto import mock_s3
+from pyspark import __version__
 from pyspark.sql import SparkSession
 from pyspark.sql.functions import col
 from pyspark.sql.types import (
@@ -17,6 +18,7 @@
     StructType,
 )
 from pyspark.sql.utils import AnalysisException
+from semver import VersionInfo
 
 from kedro.extras.datasets.pandas import CSVDataSet, ParquetDataSet
 from kedro.extras.datasets.pickle import PickleDataSet
@@ -60,6 +62,8 @@
     (HDFS_PREFIX + "/2019-02-01T00.00.00.000Z", [], ["other_file"]),
 ]
 
+SPARK_VERSION = VersionInfo.parse(__version__)
+
 
 @pytest.fixture
 def sample_pandas_df() -> pd.DataFrame:
@@ -405,11 +409,18 @@ def test_exists_raises_error(self, mocker):
         # exists should raise all errors except for
         # AnalysisExceptions clearly indicating a missing file
         spark_data_set = SparkDataSet(filepath="")
-        mocker.patch.object(
-            spark_data_set,
-            "_get_spark",
-            side_effect=AnalysisException("Other Exception", []),
-        )
+        if SPARK_VERSION.match(">=3.4.0"):
+            mocker.patch.object(
+                spark_data_set,
+                "_get_spark",
+                side_effect=AnalysisException("Other Exception"),
+            )
+        else:
+            mocker.patch.object(  # pylint: disable=expression-not-assigned
+                spark_data_set,
+                "_get_spark",
+                side_effect=AnalysisException("Other Exception", []),
+            )
 
         with pytest.raises(DatasetError, match="Other Exception"):
             spark_data_set.exists()

diff --git a/tests/extras/datasets/spark/test_spark_hive_dataset.py b/tests/extras/datasets/spark/test_spark_hive_dataset.py
@@ -293,12 +293,9 @@ def test_read_from_non_existent_table(self):
         )
         with pytest.raises(
             DatasetError,
-            match=r"Failed while loading data from data set "
-            r"SparkHiveDataSet\(database=default_1, format=hive, "
-            r"table=table_doesnt_exist, table_pk=\[\], write_mode=append\)\.\n"
-            r"Table or view not found: default_1.table_doesnt_exist;\n"
-            r"'UnresolvedRelation \[default_1, "
-            r"table_doesnt_exist\], \[\], false\n",
+            match=r"Failed while loading data from data set SparkHiveDataSet"
+            r"|table_doesnt_exist"
+            r"|UnresolvedRelation",
         ):
             dataset.load()