diff --git a/.github/workflows/all-checks.yml b/.github/workflows/all-checks.yml index e3759460ac..51efe0e4db 100644 --- a/.github/workflows/all-checks.yml +++ b/.github/workflows/all-checks.yml @@ -21,7 +21,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - python-version: [ "3.7", "3.8", "3.9", "3.10" ] + python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ] uses: ./.github/workflows/unit-tests.yml with: os: ${{ matrix.os }} @@ -31,7 +31,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - python-version: [ "3.10" ] + python-version: [ "3.11" ] uses: ./.github/workflows/lint.yml with: os: ${{ matrix.os }} @@ -41,7 +41,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - python-version: [ "3.7", "3.8", "3.9", "3.10" ] + python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ] uses: ./.github/workflows/e2e-tests.yml with: os: ${{ matrix.os }} @@ -51,7 +51,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - python-version: [ "3.7", "3.8", "3.9", "3.10" ] + python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ] uses: ./.github/workflows/pip-compile.yml with: os: ${{ matrix.os }} diff --git a/.github/workflows/docs-only-checks.yml b/.github/workflows/docs-only-checks.yml index 536fb515a4..b7940e85be 100644 --- a/.github/workflows/docs-only-checks.yml +++ b/.github/workflows/docs-only-checks.yml @@ -21,7 +21,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - python-version: [ "3.7", "3.8", "3.9", "3.10" ] + python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ] uses: ./.github/workflows/lint.yml with: os: ${{ matrix.os }} diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 593af27299..0369e4b134 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -35,6 +35,9 @@ jobs: with: path: ~\AppData\Local\pip\Cache key: ${{inputs.os}}-python-${{inputs.python-version}} + - name: Add MSBuild to PATH + if: inputs.os == 'windows-latest' + uses: microsoft/setup-msbuild@v1 - name: Install dependencies run: | make install-test-requirements diff --git a/.github/workflows/pip-compile.yml b/.github/workflows/pip-compile.yml index 796ba6049c..b5b1453782 100644 --- a/.github/workflows/pip-compile.yml +++ b/.github/workflows/pip-compile.yml @@ -31,6 +31,9 @@ jobs: with: path: ~\AppData\Local\pip\Cache key: ${{inputs.os}}-python-${{inputs.python-version}} + - name: Add MSBuild to PATH + if: inputs.os == 'windows-latest' + uses: microsoft/setup-msbuild@v1 - name: Install dependencies run: | make install-test-requirements diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 2434f7811f..c56a67c707 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -30,6 +30,9 @@ jobs: with: path: ~\AppData\Local\pip\Cache key: ${{inputs.os}}-python-${{inputs.python-version}} + - name: Add MSBuild to PATH + if: inputs.os == 'windows-latest' + uses: microsoft/setup-msbuild@v1 - name: Install dependencies run: | make install-test-requirements @@ -39,15 +42,9 @@ jobs: run: pip install tables - name: pip freeze run: pip freeze - - name: Run unit tests sequentially - if: inputs.os == 'ubuntu-latest' && inputs.python-version == '3.10' - run: make test-sequential - name: Run unit tests - if: inputs.os == 'ubuntu-latest' && inputs.python-version != '3.10' + if: inputs.os == 'ubuntu-latest' run: make test - - name: Run unit tests without spark sequentially (Windows) - if: inputs.os == 'windows-latest' && inputs.python-version == '3.10' - run: make test-no-spark-sequential - name: Run unit tests without spark (Windows) - if: inputs.os == 'windows-latest' && inputs.python-version != '3.10' + if: inputs.os == 'windows-latest' run: make test-no-spark diff --git a/README.md b/README.md index a909df2535..aed1d6894c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![Kedro Logo Banner - Light](.github/demo-dark.png#gh-dark-mode-only) ![Kedro Logo Banner - Dark](.github/demo-light.png#gh-light-mode-only) -[![Python version](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-blue.svg)](https://pypi.org/project/kedro/) +[![Python version](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg)](https://pypi.org/project/kedro/) [![PyPI version](https://badge.fury.io/py/kedro.svg)](https://pypi.org/project/kedro/) [![Conda version](https://img.shields.io/conda/vn/conda-forge/kedro.svg)](https://anaconda.org/conda-forge/kedro) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/kedro-org/kedro/blob/main/LICENSE.md) diff --git a/RELEASE.md b/RELEASE.md index 71b8a4748b..a9fe6bfcbf 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -12,6 +12,7 @@ ## Major features and improvements * Allowed registering of custom resolvers to `OmegaConfigLoader` through `CONFIG_LOADER_ARGS`. +* Added support for Python 3.11. This includes tackling challenges like dependency pinning and test adjustments to ensure a smooth experience. Detailed migration tips are provided below for further context. ## Bug fixes and other changes * Updated `kedro pipeline create` and `kedro catalog create` to use new `/conf` file structure. @@ -19,6 +20,11 @@ ## Documentation changes * Added migration guide from the `ConfigLoader` to the `OmegaConfigLoader`. The `ConfigLoader` is deprecated and will be removed in the `0.19.0` release. +## Migration Tips for Python 3.11: +* PyTables on Windows: Users on Windows with Python >=3.8 should note we've pinned `pytables` to `3.8.0` due to compatibility issues. +* Spark Dependency: We've set an upper version limit for `pyspark` at <3.4 due to breaking changes in 3.4. +* Testing with Python 3.10: The latest `moto` version now supports parallel test execution for Python 3.10, resolving previous issues. + ## Breaking changes to the API ## Upcoming deprecations for Kedro 0.19.0 diff --git a/docs/source/conf.py b/docs/source/conf.py index 205f98416e..804bbbbfa9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -226,7 +226,8 @@ "https://github.com/kedro-org/kedro/blob/main/README.md#the-humans-behind-kedro", # "anchor not found" but is valid "https://opensource.org/license/apache2-0-php/", "https://docs.github.com/en/rest/overview/other-authentication-methods#via-username-and-password", - "https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/api/snowflake.snowpark.DataFrameWriter.saveAsTable.html" + "https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/api/snowflake.snowpark.DataFrameWriter.saveAsTable.html", + "https://www.educative.io/blog/advanced-yaml-syntax-cheatsheet#anchors" ] # retry before render a link broken (fix for "too many requests") diff --git a/docs/source/index.rst b/docs/source/index.rst index f9c78a2748..ac106f9c48 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -23,9 +23,9 @@ Welcome to Kedro's documentation! :target: https://opensource.org/license/apache2-0-php/ :alt: License is Apache 2.0 -.. image:: https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-blue.svg +.. image:: https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg :target: https://pypi.org/project/kedro/ - :alt: Python version 3.7, 3.8, 3.9, 3.10 + :alt: Python version 3.7, 3.8, 3.9, 3.10, 3.11 .. image:: https://badge.fury.io/py/kedro.svg :target: https://pypi.org/project/kedro/ diff --git a/kedro/__init__.py b/kedro/__init__.py index 39f84743b2..7a7db37ae2 100644 --- a/kedro/__init__.py +++ b/kedro/__init__.py @@ -18,7 +18,7 @@ class KedroPythonVersionWarning(UserWarning): if not sys.warnoptions: warnings.simplefilter("error", KedroPythonVersionWarning) -if sys.version_info >= (3, 11): +if sys.version_info >= (3, 12): warnings.warn( """Kedro is not yet fully compatible with this Python version. To proceed at your own risk and ignore this warning, diff --git a/kedro/extras/datasets/spark/deltatable_dataset.py b/kedro/extras/datasets/spark/deltatable_dataset.py index 3d56f81048..fc6c1d5d97 100644 --- a/kedro/extras/datasets/spark/deltatable_dataset.py +++ b/kedro/extras/datasets/spark/deltatable_dataset.py @@ -100,7 +100,12 @@ def _exists(self) -> bool: try: self._get_spark().read.load(path=load_path, format="delta") except AnalysisException as exception: - if "is not a Delta table" in exception.desc: + # `AnalysisException.desc` is deprecated with pyspark >= 3.4 + message = ( + exception.desc if hasattr(exception, "desc") else exception.message + ) + + if "Path does not exist:" in message or "is not a Delta table" in message: return False raise diff --git a/kedro/extras/datasets/spark/spark_dataset.py b/kedro/extras/datasets/spark/spark_dataset.py index b27147b7a6..0d60d943ac 100644 --- a/kedro/extras/datasets/spark/spark_dataset.py +++ b/kedro/extras/datasets/spark/spark_dataset.py @@ -41,7 +41,8 @@ def _parse_glob_pattern(pattern: str) -> str: def _split_filepath(filepath: str) -> Tuple[str, str]: split_ = filepath.split("://", 1) - if len(split_) == 2: # noqa: PLR2004 + MIN_SPLIT_SIZE = 2 + if len(split_) == MIN_SPLIT_SIZE: return split_[0] + "://", split_[1] return "", split_[0] @@ -232,7 +233,7 @@ class SparkDataSet(AbstractVersionedDataSet[DataFrame, DataFrame]): DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] DEFAULT_SAVE_ARGS = {} # type: Dict[str, Any] - def __init__( # noqa: too-many-arguments + def __init__( # ruff: noqa: PLR0913 self, filepath: str, file_format: str = "parquet", @@ -401,10 +402,11 @@ def _exists(self) -> bool: try: self._get_spark().read.load(load_path, self._file_format) except AnalysisException as exception: - if ( - exception.desc.startswith("Path does not exist:") - or "is not a Delta table" in exception.desc - ): + # `AnalysisException.desc` is deprecated with pyspark >= 3.4 + message = ( + exception.desc if hasattr(exception, "desc") else exception.message + ) + if "Path does not exist:" in message or "is not a Delta table" in message: return False raise return True diff --git a/kedro/extras/datasets/spark/spark_jdbc_dataset.py b/kedro/extras/datasets/spark/spark_jdbc_dataset.py index 26a1ed2481..15e01c4468 100644 --- a/kedro/extras/datasets/spark/spark_jdbc_dataset.py +++ b/kedro/extras/datasets/spark/spark_jdbc_dataset.py @@ -168,7 +168,7 @@ def _describe(self) -> Dict[str, Any]: } @staticmethod - def _get_spark(): + def _get_spark(): # pragma: no cover return SparkSession.builder.getOrCreate() def _load(self) -> DataFrame: diff --git a/setup.py b/setup.py index 8232f8a315..e78ea817a7 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ # at least 1.3 to be able to use XMLDataSet and pandas integration with fsspec PANDAS = "pandas~=1.3" -SPARK = "pyspark>=2.2, <4.0" +SPARK = "pyspark>=2.2, <3.4" HDFS = "hdfs>=2.5.8, <3.0" S3FS = "s3fs>=0.3.0, <0.5" @@ -30,7 +30,7 @@ def _collect_requirements(requires): "geopandas.GeoJSONDataSet": ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"] } matplotlib_require = {"matplotlib.MatplotlibWriter": ["matplotlib>=3.0.3, <4.0"]} -holoviews_require = {"holoviews.HoloviewsWriter": ["holoviews~=1.13.0"]} +holoviews_require = {"holoviews.HoloviewsWriter": ["holoviews>=1.13.0"]} networkx_require = {"networkx.NetworkXDataSet": ["networkx~=2.4"]} pandas_require = { "pandas.CSVDataSet": [PANDAS], @@ -143,14 +143,15 @@ def _collect_requirements(requires): "compress-pickle[lz4]~=2.1.0", "coverage[toml]", "dask[complete]~=2021.10", # pinned by Snyk to avoid a vulnerability - "delta-spark~=1.2.1", # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070 + "delta-spark>=1.2.1; python_version >= '3.11'", # 1.2.0 has a bug that breaks some of our tests: https://github.com/delta-io/delta/issues/1070 + "delta-spark~=1.2.1; python_version < '3.11'", "dill~=0.3.1", "filelock>=3.4.0, <4.0", "gcsfs>=2021.4, <=2023.1; python_version == '3.7'", "gcsfs>=2023.1, <2023.3; python_version >= '3.8'", "geopandas>=0.6.0, <1.0", "hdfs>=2.5.8, <3.0", - "holoviews~=1.13.0", + "holoviews>=1.13.0", "import-linter[toml]==1.8.0", "ipython>=7.31.1, <8.0; python_version < '3.8'", "ipython~=8.10; python_version >= '3.8'", @@ -162,23 +163,25 @@ def _collect_requirements(requires): "jupyter~=1.0", "lxml~=4.6", "matplotlib>=3.0.3, <3.4; python_version < '3.10'", # 3.4.0 breaks holoviews - "matplotlib>=3.5, <3.6; python_version == '3.10'", + "matplotlib>=3.5, <3.6; python_version >= '3.10'", "memory_profiler>=0.50.0, <1.0", "moto==1.3.7; python_version < '3.10'", - "moto==3.0.4; python_version == '3.10'", + "moto==4.1.12; python_version >= '3.10'", "networkx~=2.4", "opencv-python~=4.5.5.64", "openpyxl>=3.0.3, <4.0", - "pandas-gbq>=0.12.0, <0.18.0", + "pandas-gbq>=0.12.0, <0.18.0; python_version < '3.11'", + "pandas-gbq>=0.18.0; python_version >= '3.11'", "pandas~=1.3 # 1.3 for read_xml/to_xml", "Pillow~=9.0", "plotly>=4.8.0, <6.0", "pre-commit>=2.9.2, <3.0", # The hook `mypy` requires pre-commit version 2.9.2. - "psutil~=5.8", - "pyarrow>=6.0", + "pyarrow>=1.0; python_version < '3.11'", + "pyarrow>=7.0; python_version >= '3.11'", # Adding to avoid numpy build errors "pylint>=2.17.0, <3.0", "pyproj~=3.0", - "pyspark>=2.2, <4.0", + "pyspark>=2.2, <3.4; python_version < '3.11'", + "pyspark>=3.4; python_version >= '3.11'", "pytest-cov~=3.0", "pytest-mock>=1.7.1, <2.0", "pytest-xdist[psutil]~=2.2.1", @@ -187,10 +190,12 @@ def _collect_requirements(requires): "requests-mock~=1.6", "requests~=2.20", "s3fs>=0.3.0, <0.5", # Needs to be at least 0.3.0 to make use of `cachable` attribute on S3FileSystem. - "scikit-learn~=1.0.2", - "scipy~=1.7.3", + "scikit-learn>=1.0.2,<2", + "scipy>=1.7.3", + "semver", "SQLAlchemy~=1.2", - "tables~=3.6.0; platform_system == 'Windows' and python_version<'3.9'", + "tables~=3.6.0; platform_system == 'Windows' and python_version<'3.8'", + "tables~=3.8.0; platform_system == 'Windows' and python_version>='3.8'", # Import issues with python 3.8 with pytables pinning to 3.8.0 fixes this https://github.com/PyTables/PyTables/issues/933#issuecomment-1555917593 "tables~=3.6; platform_system != 'Windows'", "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'", # https://developer.apple.com/metal/tensorflow-plugin/ diff --git a/tests/extras/datasets/spark/test_deltatable_dataset.py b/tests/extras/datasets/spark/test_deltatable_dataset.py index 00eb313f6a..a0ad5bc9d9 100644 --- a/tests/extras/datasets/spark/test_deltatable_dataset.py +++ b/tests/extras/datasets/spark/test_deltatable_dataset.py @@ -1,8 +1,10 @@ import pytest from delta import DeltaTable +from pyspark import __version__ from pyspark.sql import SparkSession from pyspark.sql.types import IntegerType, StringType, StructField, StructType from pyspark.sql.utils import AnalysisException +from semver import VersionInfo from kedro.extras.datasets.spark import DeltaTableDataSet, SparkDataSet from kedro.io import DataCatalog, DatasetError @@ -10,6 +12,8 @@ from kedro.pipeline.modular_pipeline import pipeline as modular_pipeline from kedro.runner import ParallelRunner +SPARK_VERSION = VersionInfo.parse(__version__) + @pytest.fixture def sample_spark_df(): @@ -65,10 +69,16 @@ def test_exists(self, tmp_path, sample_spark_df): def test_exists_raises_error(self, mocker): delta_ds = DeltaTableDataSet(filepath="") - mocker.patch.object( - delta_ds, "_get_spark", side_effect=AnalysisException("Other Exception", []) - ) - + if SPARK_VERSION.match(">=3.4.0"): + mocker.patch.object( + delta_ds, "_get_spark", side_effect=AnalysisException("Other Exception") + ) + else: + mocker.patch.object( + delta_ds, + "_get_spark", + side_effect=AnalysisException("Other Exception", []), + ) with pytest.raises(DatasetError, match="Other Exception"): delta_ds.exists() diff --git a/tests/extras/datasets/spark/test_spark_dataset.py b/tests/extras/datasets/spark/test_spark_dataset.py index da979901ac..a491ef6aeb 100644 --- a/tests/extras/datasets/spark/test_spark_dataset.py +++ b/tests/extras/datasets/spark/test_spark_dataset.py @@ -7,6 +7,7 @@ import pandas as pd import pytest from moto import mock_s3 +from pyspark import __version__ from pyspark.sql import SparkSession from pyspark.sql.functions import col from pyspark.sql.types import ( @@ -17,6 +18,7 @@ StructType, ) from pyspark.sql.utils import AnalysisException +from semver import VersionInfo from kedro.extras.datasets.pandas import CSVDataSet, ParquetDataSet from kedro.extras.datasets.pickle import PickleDataSet @@ -60,6 +62,8 @@ (HDFS_PREFIX + "/2019-02-01T00.00.00.000Z", [], ["other_file"]), ] +SPARK_VERSION = VersionInfo.parse(__version__) + @pytest.fixture def sample_pandas_df() -> pd.DataFrame: @@ -405,11 +409,18 @@ def test_exists_raises_error(self, mocker): # exists should raise all errors except for # AnalysisExceptions clearly indicating a missing file spark_data_set = SparkDataSet(filepath="") - mocker.patch.object( - spark_data_set, - "_get_spark", - side_effect=AnalysisException("Other Exception", []), - ) + if SPARK_VERSION.match(">=3.4.0"): + mocker.patch.object( + spark_data_set, + "_get_spark", + side_effect=AnalysisException("Other Exception"), + ) + else: + mocker.patch.object( # pylint: disable=expression-not-assigned + spark_data_set, + "_get_spark", + side_effect=AnalysisException("Other Exception", []), + ) with pytest.raises(DatasetError, match="Other Exception"): spark_data_set.exists() diff --git a/tests/extras/datasets/spark/test_spark_hive_dataset.py b/tests/extras/datasets/spark/test_spark_hive_dataset.py index ba7fc734a6..399ebc4169 100644 --- a/tests/extras/datasets/spark/test_spark_hive_dataset.py +++ b/tests/extras/datasets/spark/test_spark_hive_dataset.py @@ -293,12 +293,9 @@ def test_read_from_non_existent_table(self): ) with pytest.raises( DatasetError, - match=r"Failed while loading data from data set " - r"SparkHiveDataSet\(database=default_1, format=hive, " - r"table=table_doesnt_exist, table_pk=\[\], write_mode=append\)\.\n" - r"Table or view not found: default_1.table_doesnt_exist;\n" - r"'UnresolvedRelation \[default_1, " - r"table_doesnt_exist\], \[\], false\n", + match=r"Failed while loading data from data set SparkHiveDataSet" + r"|table_doesnt_exist" + r"|UnresolvedRelation", ): dataset.load() diff --git a/tests/extras/datasets/spark/test_spark_jdbc_dataset.py b/tests/extras/datasets/spark/test_spark_jdbc_dataset.py index fa7af0f966..6d89251fc5 100644 --- a/tests/extras/datasets/spark/test_spark_jdbc_dataset.py +++ b/tests/extras/datasets/spark/test_spark_jdbc_dataset.py @@ -1,5 +1,3 @@ -from unittest import mock - import pytest from kedro.extras.datasets.spark import SparkJDBCDataSet @@ -53,59 +51,52 @@ def test_missing_table(): SparkJDBCDataSet(url="dummy_url", table=None) -def mock_save(arg_dict): - mock_data = mock.Mock() - data_set = SparkJDBCDataSet(**arg_dict) +def test_save(mocker, spark_jdbc_args): + mock_data = mocker.Mock() + data_set = SparkJDBCDataSet(**spark_jdbc_args) data_set.save(mock_data) - return mock_data - - -def test_save(spark_jdbc_args): - data = mock_save(spark_jdbc_args) - data.write.jdbc.assert_called_with("dummy_url", "dummy_table") + mock_data.write.jdbc.assert_called_with("dummy_url", "dummy_table") -def test_save_credentials(spark_jdbc_args_credentials): - data = mock_save(spark_jdbc_args_credentials) - data.write.jdbc.assert_called_with( +def test_save_credentials(mocker, spark_jdbc_args_credentials): + mock_data = mocker.Mock() + data_set = SparkJDBCDataSet(**spark_jdbc_args_credentials) + data_set.save(mock_data) + mock_data.write.jdbc.assert_called_with( "dummy_url", "dummy_table", properties={"user": "dummy_user", "password": "dummy_pw"}, ) -def test_save_args(spark_jdbc_args_save_load): - data = mock_save(spark_jdbc_args_save_load) - data.write.jdbc.assert_called_with( +def test_save_args(mocker, spark_jdbc_args_save_load): + mock_data = mocker.Mock() + data_set = SparkJDBCDataSet(**spark_jdbc_args_save_load) + data_set.save(mock_data) + mock_data.write.jdbc.assert_called_with( "dummy_url", "dummy_table", properties={"driver": "dummy_driver"} ) -def test_except_bad_credentials(spark_jdbc_args_credentials_with_none_password): +def test_except_bad_credentials(mocker, spark_jdbc_args_credentials_with_none_password): pattern = r"Credential property 'password' cannot be None(.+)" with pytest.raises(DatasetError, match=pattern): - mock_save(spark_jdbc_args_credentials_with_none_password) + mock_data = mocker.Mock() + data_set = SparkJDBCDataSet(**spark_jdbc_args_credentials_with_none_password) + data_set.save(mock_data) -@mock.patch( - "kedro.extras.datasets.spark.spark_jdbc_dataset.SparkSession.builder.getOrCreate" -) -def mock_load(mock_get_or_create, arg_dict): - spark = mock_get_or_create.return_value - data_set = SparkJDBCDataSet(**arg_dict) +def test_load(mocker, spark_jdbc_args): + spark = mocker.patch.object(SparkJDBCDataSet, "_get_spark").return_value + data_set = SparkJDBCDataSet(**spark_jdbc_args) data_set.load() - return spark - - -def test_load(spark_jdbc_args): - # pylint: disable=no-value-for-parameter - spark = mock_load(arg_dict=spark_jdbc_args) spark.read.jdbc.assert_called_with("dummy_url", "dummy_table") -def test_load_credentials(spark_jdbc_args_credentials): - # pylint: disable=no-value-for-parameter - spark = mock_load(arg_dict=spark_jdbc_args_credentials) +def test_load_credentials(mocker, spark_jdbc_args_credentials): + spark = mocker.patch.object(SparkJDBCDataSet, "_get_spark").return_value + data_set = SparkJDBCDataSet(**spark_jdbc_args_credentials) + data_set.load() spark.read.jdbc.assert_called_with( "dummy_url", "dummy_table", @@ -113,9 +104,10 @@ def test_load_credentials(spark_jdbc_args_credentials): ) -def test_load_args(spark_jdbc_args_save_load): - # pylint: disable=no-value-for-parameter - spark = mock_load(arg_dict=spark_jdbc_args_save_load) +def test_load_args(mocker, spark_jdbc_args_save_load): + spark = mocker.patch.object(SparkJDBCDataSet, "_get_spark").return_value + data_set = SparkJDBCDataSet(**spark_jdbc_args_save_load) + data_set.load() spark.read.jdbc.assert_called_with( "dummy_url", "dummy_table", properties={"driver": "dummy_driver"} ) diff --git a/tests/framework/cli/test_cli.py b/tests/framework/cli/test_cli.py index 6788f349f0..8c33f4e2ae 100644 --- a/tests/framework/cli/test_cli.py +++ b/tests/framework/cli/test_cli.py @@ -329,14 +329,14 @@ def test_init_error_is_caught(self, entry_points, entry_point): class TestKedroCLI: def test_project_commands_no_clipy(self, mocker, fake_metadata): - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - side_effect=cycle([ModuleNotFoundError()]), - ) mocker.patch("kedro.framework.cli.cli._is_project", return_value=True) mocker.patch( "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata ) + mocker.patch( + "kedro.framework.cli.cli.importlib.import_module", + side_effect=cycle([ModuleNotFoundError()]), + ) kedro_cli = KedroCLI(fake_metadata.project_path) print(kedro_cli.project_groups) assert len(kedro_cli.project_groups) == 6 @@ -356,26 +356,26 @@ def test_project_commands_no_project(self, mocker, tmp_path): assert kedro_cli._metadata is None def test_project_commands_invalid_clipy(self, mocker, fake_metadata): - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", return_value=None - ) mocker.patch("kedro.framework.cli.cli._is_project", return_value=True) mocker.patch( "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata ) + mocker.patch( + "kedro.framework.cli.cli.importlib.import_module", return_value=None + ) with raises(KedroCliError, match="Cannot load commands from"): _ = KedroCLI(fake_metadata.project_path) def test_project_commands_valid_clipy(self, mocker, fake_metadata): Module = namedtuple("Module", ["cli"]) - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - return_value=Module(cli=cli), - ) mocker.patch("kedro.framework.cli.cli._is_project", return_value=True) mocker.patch( "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata ) + mocker.patch( + "kedro.framework.cli.cli.importlib.import_module", + return_value=Module(cli=cli), + ) kedro_cli = KedroCLI(fake_metadata.project_path) assert len(kedro_cli.project_groups) == 7 assert kedro_cli.project_groups == [ @@ -402,14 +402,14 @@ def test_kedro_cli_no_project(self, mocker, tmp_path): def test_kedro_cli_with_project(self, mocker, fake_metadata): Module = namedtuple("Module", ["cli"]) - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - return_value=Module(cli=cli), - ) mocker.patch("kedro.framework.cli.cli._is_project", return_value=True) mocker.patch( "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata ) + mocker.patch( + "kedro.framework.cli.cli.importlib.import_module", + return_value=Module(cli=cli), + ) kedro_cli = KedroCLI(fake_metadata.project_path) assert len(kedro_cli.global_groups) == 2 diff --git a/tests/framework/cli/test_cli_hooks.py b/tests/framework/cli/test_cli_hooks.py index 0f7866f45f..41fbdaa705 100644 --- a/tests/framework/cli/test_cli_hooks.py +++ b/tests/framework/cli/test_cli_hooks.py @@ -98,10 +98,6 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin( caplog.set_level(logging.DEBUG, logger="kedro") Module = namedtuple("Module", ["cli"]) - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - return_value=Module(cli=cli), - ) mocker.patch( "kedro.framework.cli.cli._is_project", return_value=True, @@ -110,6 +106,10 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin( "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata, ) + mocker.patch( + "kedro.framework.cli.cli.importlib.import_module", + return_value=Module(cli=cli), + ) kedro_cli = KedroCLI(fake_metadata.project_path) result = CliRunner().invoke(kedro_cli, [command]) assert ( diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py index 574f349809..9c61a9d3ec 100644 --- a/tests/io/test_data_catalog.py +++ b/tests/io/test_data_catalog.py @@ -336,7 +336,6 @@ def test_multi_catalog_list_bad_regex(self, multi_catalog): multi_catalog.list("((") def test_eq(self, multi_catalog, data_catalog): - assert multi_catalog == multi_catalog # noqa: PLR0124 assert multi_catalog == multi_catalog.shallow_copy() assert multi_catalog != data_catalog @@ -377,13 +376,14 @@ def test_mutating_datasets_not_allowed(self, data_catalog_from_config): def test_confirm(self, mocker, caplog): """Confirm the dataset""" - mock_ds = mocker.Mock() - data_catalog = DataCatalog(data_sets={"mocked": mock_ds}) - data_catalog.confirm("mocked") - mock_ds.confirm.assert_called_once_with() - assert caplog.record_tuples == [ - ("kedro.io.data_catalog", logging.INFO, "Confirming dataset 'mocked'") - ] + with caplog.at_level(logging.INFO): + mock_ds = mocker.Mock() + data_catalog = DataCatalog(data_sets={"mocked": mock_ds}) + data_catalog.confirm("mocked") + mock_ds.confirm.assert_called_once_with() + assert caplog.record_tuples == [ + ("kedro.io.data_catalog", logging.INFO, "Confirming dataset 'mocked'") + ] @pytest.mark.parametrize( "dataset_name,error_pattern", @@ -567,24 +567,25 @@ def test_error_dataset_init(self, bad_config): def test_confirm(self, tmp_path, caplog, mocker): """Confirm the dataset""" - mock_confirm = mocker.patch("kedro.io.IncrementalDataset.confirm") - catalog = { - "ds_to_confirm": { - "type": "IncrementalDataset", - "dataset": "pandas.CSVDataSet", - "path": str(tmp_path), + with caplog.at_level(logging.INFO): + mock_confirm = mocker.patch("kedro.io.IncrementalDataset.confirm") + catalog = { + "ds_to_confirm": { + "type": "IncrementalDataset", + "dataset": "pandas.CSVDataSet", + "path": str(tmp_path), + } } - } - data_catalog = DataCatalog.from_config(catalog=catalog) - data_catalog.confirm("ds_to_confirm") - assert caplog.record_tuples == [ - ( - "kedro.io.data_catalog", - logging.INFO, - "Confirming dataset 'ds_to_confirm'", - ) - ] - mock_confirm.assert_called_once_with() + data_catalog = DataCatalog.from_config(catalog=catalog) + data_catalog.confirm("ds_to_confirm") + assert caplog.record_tuples == [ + ( + "kedro.io.data_catalog", + logging.INFO, + "Confirming dataset 'ds_to_confirm'", + ) + ] + mock_confirm.assert_called_once_with() @pytest.mark.parametrize( "dataset_name,pattern", @@ -735,8 +736,10 @@ def test_replacing_nonword_characters(self): assert "ds3__csv" in catalog.datasets.__dict__ assert "jalapeƱo" in catalog.datasets.__dict__ - def test_no_versions_with_cloud_protocol(self): + def test_no_versions_with_cloud_protocol(self, monkeypatch): """Check the error if no versions are available for load from cloud storage""" + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "dummmy") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "dummmy") version = Version(load=None, save=None) versioned_dataset = CSVDataSet("s3://bucket/file.csv", version=version) pattern = re.escape( diff --git a/tests/test_import.py b/tests/test_import.py index 81436ecfc6..a9aa72e21a 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -4,8 +4,8 @@ def test_import_kedro_with_no_official_support_raise_error(mocker): - """Test importing kedro with python>=3.11 should fail""" - mocker.patch("kedro.sys.version_info", (3, 11)) + """Test importing kedro with python>=3.12 should fail""" + mocker.patch("kedro.sys.version_info", (3, 12)) # We use the parent class to avoid issues with `exec_module` with pytest.raises(UserWarning) as excinfo: @@ -15,8 +15,8 @@ def test_import_kedro_with_no_official_support_raise_error(mocker): def test_import_kedro_with_no_official_support_emits_warning(mocker): - """Test importing kedro python>=3.11 and controlled warnings should work""" - mocker.patch("kedro.sys.version_info", (3, 11)) + """Test importing kedro python>=3.12 and controlled warnings should work""" + mocker.patch("kedro.sys.version_info", (3, 12)) mocker.patch("kedro.sys.warnoptions", ["default:Kedro is not yet fully compatible"]) # We use the parent class to avoid issues with `exec_module` diff --git a/tests/tools/test_cli.py b/tests/tools/test_cli.py index 1b80ad8064..cf3ce71d1c 100644 --- a/tests/tools/test_cli.py +++ b/tests/tools/test_cli.py @@ -56,10 +56,6 @@ def fake_metadata(fake_root_dir): class TestCLITools: def test_get_cli_structure_raw(self, mocker, fake_metadata): Module = namedtuple("Module", ["cli"]) - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - return_value=Module(cli=cli), - ) mocker.patch( "kedro.framework.cli.cli._is_project", return_value=True, @@ -68,6 +64,10 @@ def test_get_cli_structure_raw(self, mocker, fake_metadata): "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata, ) + mocker.patch( + "kedro.framework.cli.cli.importlib.import_module", + return_value=Module(cli=cli), + ) kedro_cli = KedroCLI(fake_metadata.project_path) raw_cli_structure = get_cli_structure(kedro_cli, get_help=False) @@ -85,10 +85,6 @@ def test_get_cli_structure_raw(self, mocker, fake_metadata): def test_get_cli_structure_depth(self, mocker, fake_metadata): Module = namedtuple("Module", ["cli"]) - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - return_value=Module(cli=cli), - ) mocker.patch( "kedro.framework.cli.cli._is_project", return_value=True, @@ -97,6 +93,10 @@ def test_get_cli_structure_depth(self, mocker, fake_metadata): "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata, ) + mocker.patch( + "kedro.framework.cli.cli.importlib.import_module", + return_value=Module(cli=cli), + ) kedro_cli = KedroCLI(fake_metadata.project_path) raw_cli_structure = get_cli_structure(kedro_cli, get_help=False) assert isinstance(raw_cli_structure["kedro"]["new"], dict) @@ -121,10 +121,6 @@ def test_get_cli_structure_depth(self, mocker, fake_metadata): def test_get_cli_structure_help(self, mocker, fake_metadata): Module = namedtuple("Module", ["cli"]) - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - return_value=Module(cli=cli), - ) mocker.patch( "kedro.framework.cli.cli._is_project", return_value=True, @@ -133,6 +129,10 @@ def test_get_cli_structure_help(self, mocker, fake_metadata): "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata, ) + mocker.patch( + "kedro.framework.cli.cli.importlib.import_module", + return_value=Module(cli=cli), + ) kedro_cli = KedroCLI(fake_metadata.project_path) help_cli_structure = get_cli_structure(kedro_cli, get_help=True)