From c750430e428bfb0ff40f619351bd8b34e2e8609f Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 8 Aug 2023 05:02:23 +1200 Subject: [PATCH 1/5] Configure isort and run on the whole codebase Signed-off-by: Nathan McDougall --- .pre-commit-config.yaml | 1 - pandera/__init__.py | 11 ++++------- pandera/api/base/schema.py | 4 ++-- pandera/api/pyspark/container.py | 2 +- pandera/backends/pandas/__init__.py | 10 ++++------ pandera/backends/pandas/array.py | 6 +++--- pandera/backends/pandas/base.py | 2 +- pandera/backends/pandas/components.py | 10 +++++----- pandera/backends/pandas/container.py | 6 +++--- pandera/backends/pyspark/__init__.py | 1 - pandera/backends/pyspark/column.py | 2 +- pandera/backends/pyspark/components.py | 2 +- pandera/config.py | 1 + pandera/engines/engine.py | 1 - pandera/engines/pyspark_engine.py | 4 ++-- pandera/error_handlers.py | 2 +- pandera/pyspark.py | 17 +++------------- pandera/typing/__init__.py | 1 - pandera/typing/pyspark_sql.py | 3 ++- pyproject.toml | 8 +++++++- setup.cfg | 3 --- tests/core/test_decorators.py | 2 +- tests/core/test_dtypes.py | 1 - tests/core/test_errors.py | 2 +- tests/core/test_extension_modules.py | 3 +-- tests/core/test_pydantic_dtype.py | 2 +- tests/pyspark/conftest.py | 4 +++- tests/pyspark/test_pyspark_accessor.py | 5 ++--- tests/pyspark/test_pyspark_check.py | 26 ++++++++++++------------- tests/pyspark/test_pyspark_config.py | 2 +- tests/pyspark/test_pyspark_container.py | 7 ++++--- tests/pyspark/test_pyspark_dtypes.py | 7 ++++--- tests/pyspark/test_pyspark_engine.py | 1 + tests/pyspark/test_pyspark_error.py | 11 +++++------ tests/pyspark/test_pyspark_model.py | 5 +++-- 35 files changed, 80 insertions(+), 95 deletions(-) delete mode 100644 setup.cfg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 45ce90e35..3643b25e6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,6 @@ repos: rev: v5.10.1 hooks: - id: isort - args: ["--line-length=79", "--skip=docs/source/conf.py", "--diff"] - repo: https://github.com/ikamensh/flynt rev: '0.76' diff --git a/pandera/__init__.py b/pandera/__init__.py index 1ebee0126..6777a0404 100644 --- a/pandera/__init__.py +++ b/pandera/__init__.py @@ -2,14 +2,17 @@ import platform import pandera.backends +import pandera.backends.base.builtin_checks +import pandera.backends.base.builtin_hypotheses +import pandera.backends.pandas from pandera import errors, external_config, typing from pandera.accessors import pandas_accessor from pandera.api import extensions from pandera.api.checks import Check from pandera.api.hypotheses import Hypothesis from pandera.api.pandas.array import SeriesSchema -from pandera.api.pandas.container import DataFrameSchema from pandera.api.pandas.components import Column, Index, MultiIndex +from pandera.api.pandas.container import DataFrameSchema from pandera.api.pandas.model import DataFrameModel, SchemaModel from pandera.api.pandas.model_components import Field, check, dataframe_check from pandera.decorators import check_input, check_io, check_output, check_types @@ -57,15 +60,9 @@ UINT64, pandas_version, ) - -import pandera.backends.base.builtin_checks -import pandera.backends.base.builtin_hypotheses -import pandera.backends.pandas - from pandera.schema_inference.pandas import infer_schema from pandera.version import __version__ - if platform.system() != "Windows": # pylint: disable=ungrouped-imports from pandera.dtypes import Complex256, Float128 diff --git a/pandera/api/base/schema.py b/pandera/api/base/schema.py index ef872288b..50d2a85df 100644 --- a/pandera/api/base/schema.py +++ b/pandera/api/base/schema.py @@ -8,11 +8,11 @@ import inspect from abc import ABC from functools import wraps -from typing import Any, Dict, Tuple, Type, Optional, Union +from typing import Any, Dict, Optional, Tuple, Type, Union from pandera.backends.base import BaseSchemaBackend -from pandera.errors import BackendNotFoundError from pandera.dtypes import DataType +from pandera.errors import BackendNotFoundError DtypeInputTypes = Union[str, type, DataType, Type] diff --git a/pandera/api/pyspark/container.py b/pandera/api/pyspark/container.py index 6a272f7f1..76ad70523 100644 --- a/pandera/api/pyspark/container.py +++ b/pandera/api/pyspark/container.py @@ -11,7 +11,6 @@ from pyspark.sql import DataFrame from pandera import errors -from pandera.config import CONFIG from pandera.api.base.schema import BaseSchema from pandera.api.checks import Check from pandera.api.pyspark.error_handler import ErrorHandler @@ -20,6 +19,7 @@ PySparkDtypeInputTypes, StrictType, ) +from pandera.config import CONFIG from pandera.dtypes import DataType, UniqueSettings from pandera.engines import pyspark_engine diff --git a/pandera/backends/pandas/__init__.py b/pandera/backends/pandas/__init__.py index 141f1f93a..0e5f9445b 100644 --- a/pandera/backends/pandas/__init__.py +++ b/pandera/backends/pandas/__init__.py @@ -6,20 +6,18 @@ from pandera.api.checks import Check from pandera.api.hypotheses import Hypothesis from pandera.api.pandas.array import SeriesSchema -from pandera.api.pandas.container import DataFrameSchema from pandera.api.pandas.components import Column, Index, MultiIndex - +from pandera.api.pandas.container import DataFrameSchema from pandera.backends.pandas import builtin_checks, builtin_hypotheses -from pandera.backends.pandas.checks import PandasCheckBackend -from pandera.backends.pandas.hypotheses import PandasHypothesisBackend from pandera.backends.pandas.array import SeriesSchemaBackend -from pandera.backends.pandas.container import DataFrameSchemaBackend +from pandera.backends.pandas.checks import PandasCheckBackend from pandera.backends.pandas.components import ( ColumnBackend, IndexBackend, MultiIndexBackend, ) - +from pandera.backends.pandas.container import DataFrameSchemaBackend +from pandera.backends.pandas.hypotheses import PandasHypothesisBackend dataframe_datatypes = [pd.DataFrame] series_datatypes = [pd.Series] diff --git a/pandera/backends/pandas/array.py b/pandera/backends/pandas/array.py index 98c19962a..26e5bf8ac 100644 --- a/pandera/backends/pandas/array.py +++ b/pandera/backends/pandas/array.py @@ -1,12 +1,12 @@ """Pandera array backends.""" -from typing import cast, List, Optional +from typing import List, Optional, cast import pandas as pd from multimethod import DispatchError -from pandera.backends.base import CoreCheckResult from pandera.api.pandas.types import is_field +from pandera.backends.base import CoreCheckResult from pandera.backends.pandas.base import PandasSchemaBackend from pandera.backends.pandas.error_formatters import ( reshape_failure_cases, @@ -17,10 +17,10 @@ from pandera.error_handlers import SchemaErrorHandler from pandera.errors import ( ParserError, + SchemaDefinitionError, SchemaError, SchemaErrorReason, SchemaErrors, - SchemaDefinitionError, ) diff --git a/pandera/backends/pandas/base.py b/pandera/backends/pandas/base.py index 1582bd231..a28716747 100644 --- a/pandera/backends/pandas/base.py +++ b/pandera/backends/pandas/base.py @@ -23,8 +23,8 @@ scalar_failure_case, summarize_failure_cases, ) -from pandera.errors import FailureCaseMetadata, SchemaError, SchemaErrorReason from pandera.error_handlers import SchemaErrorHandler +from pandera.errors import FailureCaseMetadata, SchemaError, SchemaErrorReason class ColumnInfo(NamedTuple): diff --git a/pandera/backends/pandas/components.py b/pandera/backends/pandas/components.py index ea1b542fc..28bc2f64a 100644 --- a/pandera/backends/pandas/components.py +++ b/pandera/backends/pandas/components.py @@ -8,22 +8,22 @@ import numpy as np import pandas as pd -from pandera.backends.base import CoreCheckResult -from pandera.backends.pandas.array import ArraySchemaBackend -from pandera.backends.pandas.container import DataFrameSchemaBackend from pandera.api.pandas.types import ( is_field, is_index, is_multiindex, is_table, ) +from pandera.backends.base import CoreCheckResult +from pandera.backends.pandas.array import ArraySchemaBackend +from pandera.backends.pandas.container import DataFrameSchemaBackend from pandera.backends.pandas.error_formatters import scalar_failure_case from pandera.error_handlers import SchemaErrorHandler from pandera.errors import ( + SchemaDefinitionError, SchemaError, - SchemaErrors, SchemaErrorReason, - SchemaDefinitionError, + SchemaErrors, ) diff --git a/pandera/backends/pandas/container.py b/pandera/backends/pandas/container.py index 9f7946828..7a3b32b41 100644 --- a/pandera/backends/pandas/container.py +++ b/pandera/backends/pandas/container.py @@ -3,20 +3,20 @@ import copy import itertools import traceback -from typing import Any, Callable, List, Optional, Tuple, Dict +from typing import Any, Callable, Dict, List, Optional, Tuple import pandas as pd from pydantic import BaseModel -from pandera.backends.base import CoreCheckResult from pandera.api.pandas.types import is_table +from pandera.backends.base import CoreCheckResult from pandera.backends.pandas.base import ColumnInfo, PandasSchemaBackend from pandera.backends.pandas.error_formatters import ( reshape_failure_cases, scalar_failure_case, ) -from pandera.engines import pandas_engine from pandera.backends.pandas.utils import convert_uniquesettings +from pandera.engines import pandas_engine from pandera.error_handlers import SchemaErrorHandler from pandera.errors import ( ParserError, diff --git a/pandera/backends/pyspark/__init__.py b/pandera/backends/pyspark/__init__.py index 9f95a94c2..218bfb951 100644 --- a/pandera/backends/pyspark/__init__.py +++ b/pandera/backends/pyspark/__init__.py @@ -12,7 +12,6 @@ from pandera.backends.pyspark.components import ColumnBackend from pandera.backends.pyspark.container import DataFrameSchemaBackend - for t in [pst.DataFrame]: Check.register_backend(t, PySparkCheckBackend) ColumnSchema.register_backend(t, ColumnSchemaBackend) diff --git a/pandera/backends/pyspark/column.py b/pandera/backends/pyspark/column.py index 7c0ac168e..40a30998a 100644 --- a/pandera/backends/pyspark/column.py +++ b/pandera/backends/pyspark/column.py @@ -9,7 +9,7 @@ from pandera.api.pyspark.error_handler import ErrorCategory, ErrorHandler from pandera.backends.pyspark.base import PysparkSchemaBackend -from pandera.backends.pyspark.decorators import validate_scope, ValidationScope +from pandera.backends.pyspark.decorators import ValidationScope, validate_scope from pandera.backends.pyspark.error_formatters import scalar_failure_case from pandera.engines.pyspark_engine import Engine from pandera.errors import ParserError, SchemaError, SchemaErrorReason diff --git a/pandera/backends/pyspark/components.py b/pandera/backends/pyspark/components.py index 973a7abf0..e0bec98a3 100644 --- a/pandera/backends/pyspark/components.py +++ b/pandera/backends/pyspark/components.py @@ -10,7 +10,7 @@ from pandera.api.pyspark.error_handler import ErrorCategory, ErrorHandler from pandera.backends.pyspark.column import ColumnSchemaBackend -from pandera.backends.pyspark.decorators import validate_scope, ValidationScope +from pandera.backends.pyspark.decorators import ValidationScope, validate_scope from pandera.backends.pyspark.error_formatters import scalar_failure_case from pandera.errors import SchemaError, SchemaErrorReason diff --git a/pandera/config.py b/pandera/config.py index 2e694ca81..eb52f5d53 100644 --- a/pandera/config.py +++ b/pandera/config.py @@ -1,6 +1,7 @@ """Pandera configuration.""" from enum import Enum + from pydantic import BaseSettings diff --git a/pandera/engines/engine.py b/pandera/engines/engine.py index 75acadded..61187e4fd 100644 --- a/pandera/engines/engine.py +++ b/pandera/engines/engine.py @@ -25,7 +25,6 @@ from pandera.dtypes import DataType - # register different TypedDict type depending on python version if sys.version_info >= (3, 9): from typing import TypedDict diff --git a/pandera/engines/pyspark_engine.py b/pandera/engines/pyspark_engine.py index a9668402b..f573de078 100644 --- a/pandera/engines/pyspark_engine.py +++ b/pandera/engines/pyspark_engine.py @@ -10,9 +10,9 @@ import dataclasses import inspect import re -import warnings -from typing import Any, Iterable, Union, Optional import sys +import warnings +from typing import Any, Iterable, Optional, Union import pyspark.sql.types as pst diff --git a/pandera/error_handlers.py b/pandera/error_handlers.py index e05615dea..afc28ace6 100644 --- a/pandera/error_handlers.py +++ b/pandera/error_handlers.py @@ -2,7 +2,7 @@ from typing import List, Optional -from pandera.errors import SchemaError, SchemaErrors, SchemaErrorReason +from pandera.errors import SchemaError, SchemaErrorReason, SchemaErrors class SchemaErrorHandler: diff --git a/pandera/pyspark.py b/pandera/pyspark.py index 4573aa802..8a1f69a0f 100644 --- a/pandera/pyspark.py +++ b/pandera/pyspark.py @@ -1,21 +1,11 @@ """A flexible and expressive pyspark validation library.""" # pylint: disable=unused-import -from pandera.accessors import pyspark_sql_accessor +import pandera.backends.pyspark from pandera.api.checks import Check from pandera.api.pyspark import Column, DataFrameSchema from pandera.api.pyspark.model import DataFrameModel, SchemaModel -from pandera.api.pyspark.model_components import ( - Field, - check, - dataframe_check, -) -import pandera.backends.pyspark -from pandera.decorators import ( - check_input, - check_io, - check_output, - check_types, -) +from pandera.api.pyspark.model_components import Field, check, dataframe_check +from pandera.decorators import check_input, check_io, check_output, check_types from pandera.dtypes import ( Bool, Category, @@ -51,7 +41,6 @@ from pandera.typing import pyspark_sql from pandera.version import __version__ - __all__ = [ # dtypes "Bool", diff --git a/pandera/typing/__init__.py b/pandera/typing/__init__.py index 8de4e6ff4..d87c45730 100644 --- a/pandera/typing/__init__.py +++ b/pandera/typing/__init__.py @@ -50,7 +50,6 @@ ) from pandera.typing.pandas import DataFrame, Index, Series - DATAFRAME_TYPES: Set[Type] = {DataFrame} SERIES_TYPES: Set[Type] = {Series} INDEX_TYPES: Set[Type] = {Index} diff --git a/pandera/typing/pyspark_sql.py b/pandera/typing/pyspark_sql.py index b20603580..1b924cd03 100644 --- a/pandera/typing/pyspark_sql.py +++ b/pandera/typing/pyspark_sql.py @@ -1,5 +1,6 @@ """Pandera type annotations for Pyspark.""" -from typing import Union, TypeVar +from typing import TypeVar, Union + from pandera.typing.common import DataFrameBase from pandera.typing.pandas import DataFrameModel, _GenericAlias diff --git a/pyproject.toml b/pyproject.toml index 0a0b1d2a2..2d41229e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,13 @@ [tool.pyright] -include = [ "pandera", "tests" ] +include = ["pandera", "tests"] exclude = [".nox/**", ".nox-*/**"] [tool.pytest.ini_options] log_cli = true log_cli_level = 20 + +[tool.isort] +profile = "black" +line_length = 79 # sync with black +float_to_top = true +skip = ["docs/source/conf.py"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index bbc646b1c..000000000 --- a/setup.cfg +++ /dev/null @@ -1,3 +0,0 @@ -[isort] -float_to_top = true -profile = black diff --git a/tests/core/test_decorators.py b/tests/core/test_decorators.py index a1f023611..1fa425349 100644 --- a/tests/core/test_decorators.py +++ b/tests/core/test_decorators.py @@ -9,12 +9,12 @@ from pandera import ( Check, Column, + DataFrameModel, DataFrameSchema, DateTime, Field, Float, Int, - DataFrameModel, String, check_input, check_io, diff --git a/tests/core/test_dtypes.py b/tests/core/test_dtypes.py index 11f228548..6edff7af9 100644 --- a/tests/core/test_dtypes.py +++ b/tests/core/test_dtypes.py @@ -29,7 +29,6 @@ # instances. from pandera.typing.geopandas import GEOPANDAS_INSTALLED - # register different TypedDict type depending on python version if sys.version_info >= (3, 9): from typing import TypedDict diff --git a/tests/core/test_errors.py b/tests/core/test_errors.py index 28e476e89..8b7fcebff 100644 --- a/tests/core/test_errors.py +++ b/tests/core/test_errors.py @@ -19,7 +19,7 @@ import pytest from pandera import Check, Column, DataFrameSchema -from pandera.engines import pandas_engine, numpy_engine +from pandera.engines import numpy_engine, pandas_engine from pandera.errors import ( ParserError, ReducedPickleExceptionBase, diff --git a/tests/core/test_extension_modules.py b/tests/core/test_extension_modules.py index 212feba8e..f124a09fd 100644 --- a/tests/core/test_extension_modules.py +++ b/tests/core/test_extension_modules.py @@ -1,8 +1,7 @@ """Tests for extension module imports.""" -import pytest - import pandas as pd +import pytest from pandera.api.hypotheses import Hypothesis from pandera.backends.pandas.hypotheses import HAS_SCIPY diff --git a/tests/core/test_pydantic_dtype.py b/tests/core/test_pydantic_dtype.py index a15366207..a04937a9e 100644 --- a/tests/core/test_pydantic_dtype.py +++ b/tests/core/test_pydantic_dtype.py @@ -7,8 +7,8 @@ from pydantic import BaseModel import pandera as pa -from pandera.engines.pandas_engine import PydanticModel from pandera.api.pandas.array import ArraySchema +from pandera.engines.pandas_engine import PydanticModel class Record(BaseModel): diff --git a/tests/pyspark/conftest.py b/tests/pyspark/conftest.py index e3d4ad378..1253fcb0c 100644 --- a/tests/pyspark/conftest.py +++ b/tests/pyspark/conftest.py @@ -1,9 +1,11 @@ """ conftest """ # pylint:disable=redefined-outer-name import datetime + +import pyspark.sql.types as T import pytest from pyspark.sql import SparkSession -import pyspark.sql.types as T + from pandera.config import PanderaConfig diff --git a/tests/pyspark/test_pyspark_accessor.py b/tests/pyspark/test_pyspark_accessor.py index a24311d6f..a2e8b6284 100644 --- a/tests/pyspark/test_pyspark_accessor.py +++ b/tests/pyspark/test_pyspark_accessor.py @@ -1,16 +1,15 @@ """Unit tests for dask_accessor module.""" from typing import Union +import pytest from pyspark.sql import DataFrame, SparkSession from pyspark.sql.functions import col from pyspark.sql.types import FloatType, LongType -import pytest -from pandera.config import PanderaConfig, ValidationDepth import pandera.pyspark as pa +from pandera.config import PanderaConfig, ValidationDepth from pandera.pyspark import pyspark_sql_accessor - spark = SparkSession.builder.getOrCreate() diff --git a/tests/pyspark/test_pyspark_check.py b/tests/pyspark/test_pyspark_check.py index 1bcf6963d..690cb3705 100644 --- a/tests/pyspark/test_pyspark_check.py +++ b/tests/pyspark/test_pyspark_check.py @@ -3,33 +3,31 @@ import datetime import decimal +import pytest from pyspark.sql.functions import col from pyspark.sql.types import ( - LongType, - StringType, - StructField, - StructType, - IntegerType, + ArrayType, + BooleanType, ByteType, - ShortType, - TimestampType, DateType, DecimalType, DoubleType, - BooleanType, FloatType, - ArrayType, + IntegerType, + LongType, MapType, + ShortType, + StringType, + StructField, + StructType, + TimestampType, ) -import pytest - import pandera.extensions import pandera.pyspark as pa -from pandera.pyspark import DataFrameModel, Field -from pandera.backends.pyspark.decorators import validate_scope, ValidationScope -from pandera.pyspark import DataFrameSchema, Column +from pandera.backends.pyspark.decorators import ValidationScope, validate_scope from pandera.errors import PysparkSchemaError +from pandera.pyspark import Column, DataFrameModel, DataFrameSchema, Field class TestDecorator: diff --git a/tests/pyspark/test_pyspark_config.py b/tests/pyspark/test_pyspark_config.py index 60ebfe4d9..ae01d48e7 100644 --- a/tests/pyspark/test_pyspark_config.py +++ b/tests/pyspark/test_pyspark_config.py @@ -6,9 +6,9 @@ from pandera.config import CONFIG, ValidationDepth from pandera.pyspark import ( Check, - DataFrameSchema, Column, DataFrameModel, + DataFrameSchema, Field, ) from tests.pyspark.conftest import spark_df diff --git a/tests/pyspark/test_pyspark_container.py b/tests/pyspark/test_pyspark_container.py index e0306df40..f97b1eeea 100644 --- a/tests/pyspark/test_pyspark_container.py +++ b/tests/pyspark/test_pyspark_container.py @@ -1,12 +1,13 @@ """Unit tests for pyspark container.""" -from pyspark.sql import DataFrame, SparkSession import pyspark.sql.types as T import pytest -import pandera.pyspark as pa +from pyspark.sql import DataFrame, SparkSession + import pandera.errors +import pandera.pyspark as pa from pandera.config import PanderaConfig, ValidationDepth -from pandera.pyspark import DataFrameSchema, Column +from pandera.pyspark import Column, DataFrameSchema spark = SparkSession.builder.getOrCreate() diff --git a/tests/pyspark/test_pyspark_dtypes.py b/tests/pyspark/test_pyspark_dtypes.py index 0b00536cf..4349d9993 100644 --- a/tests/pyspark/test_pyspark_dtypes.py +++ b/tests/pyspark/test_pyspark_dtypes.py @@ -1,13 +1,14 @@ """Unit tests for pyspark container.""" from typing import Any + import pyspark.sql.types as T from pyspark.sql import DataFrame -from pandera.pyspark import DataFrameSchema, Column -from tests.pyspark.conftest import spark_df +from pandera.backends.pyspark.decorators import ValidationScope, validate_scope from pandera.config import PanderaConfig -from pandera.backends.pyspark.decorators import validate_scope, ValidationScope +from pandera.pyspark import Column, DataFrameSchema +from tests.pyspark.conftest import spark_df class BaseClass: diff --git a/tests/pyspark/test_pyspark_engine.py b/tests/pyspark/test_pyspark_engine.py index 5ebb00916..74ec50c04 100644 --- a/tests/pyspark/test_pyspark_engine.py +++ b/tests/pyspark/test_pyspark_engine.py @@ -2,6 +2,7 @@ # pylint:disable=redefined-outer-name,unused-argument import pytest + from pandera.engines import pyspark_engine diff --git a/tests/pyspark/test_pyspark_error.py b/tests/pyspark/test_pyspark_error.py index a8d75ecab..e4af0adbc 100644 --- a/tests/pyspark/test_pyspark_error.py +++ b/tests/pyspark/test_pyspark_error.py @@ -3,19 +3,18 @@ from typing import Union +import pyspark.sql.types as T +import pytest from pyspark.sql import DataFrame, SparkSession from pyspark.sql.functions import col -import pyspark.sql.types as T from pyspark.sql.types import StringType -import pytest -from pandera.errors import SchemaError, SchemaErrorReason -from pandera.api.pyspark import error_handler import pandera.pyspark as pa -from pandera.pyspark import DataFrameSchema, Column, DataFrameModel, Field +from pandera.api.pyspark import error_handler +from pandera.errors import SchemaError, SchemaErrorReason +from pandera.pyspark import Column, DataFrameModel, DataFrameSchema, Field from tests.pyspark.conftest import spark_df - spark = SparkSession.builder.getOrCreate() diff --git a/tests/pyspark/test_pyspark_model.py b/tests/pyspark/test_pyspark_model.py index d1766573d..c802e4e24 100644 --- a/tests/pyspark/test_pyspark_model.py +++ b/tests/pyspark/test_pyspark_model.py @@ -2,16 +2,17 @@ # pylint:disable=abstract-method from typing import Optional -from pyspark.sql import DataFrame + import pyspark.sql.types as T import pytest +from pyspark.sql import DataFrame import pandera import pandera.pyspark as pa +from pandera.api.pyspark.model import docstring_substitution from pandera.config import PanderaConfig, ValidationDepth from pandera.pyspark import DataFrameModel, DataFrameSchema, Field from tests.pyspark.conftest import spark_df -from pandera.api.pyspark.model import docstring_substitution def test_schema_with_bare_types(): From ebd29dc69f2ae132b19e9fc4eac2dd5fc1b87cf6 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 8 Aug 2023 05:20:11 +1200 Subject: [PATCH 2/5] isort on two stragglers not caught in prev commit Signed-off-by: Nathan McDougall --- pandera/backends/pyspark/container.py | 4 ++-- tests/core/test_schemas.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandera/backends/pyspark/container.py b/pandera/backends/pyspark/container.py index 4c5bf2174..fb04064d4 100644 --- a/pandera/backends/pyspark/container.py +++ b/pandera/backends/pyspark/container.py @@ -3,7 +3,7 @@ import copy import traceback import warnings -from typing import Any, List, Optional, Dict +from typing import Any, Dict, List, Optional from pyspark.sql import DataFrame from pyspark.sql.functions import col @@ -11,7 +11,7 @@ from pandera.api.pyspark.error_handler import ErrorCategory, ErrorHandler from pandera.api.pyspark.types import is_table from pandera.backends.pyspark.base import ColumnInfo, PysparkSchemaBackend -from pandera.backends.pyspark.decorators import validate_scope, ValidationScope +from pandera.backends.pyspark.decorators import ValidationScope, validate_scope from pandera.backends.pyspark.error_formatters import scalar_failure_case from pandera.config import CONFIG from pandera.errors import ( diff --git a/tests/core/test_schemas.py b/tests/core/test_schemas.py index 9e3282b8c..e317b2bfd 100644 --- a/tests/core/test_schemas.py +++ b/tests/core/test_schemas.py @@ -14,19 +14,19 @@ Category, Check, Column, + DataFrameModel, DataFrameSchema, + Field, Index, Int, MultiIndex, SeriesSchema, String, errors, - Field, - DataFrameModel, ) +from pandera.api.pandas.array import ArraySchema from pandera.dtypes import UniqueSettings from pandera.engines.pandas_engine import Engine -from pandera.api.pandas.array import ArraySchema def test_dataframe_schema() -> None: From 61eefad9d85aac877e0f03c4a3c3049bb782b86d Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 6 Dec 2023 10:06:19 +1300 Subject: [PATCH 3/5] Run isort on all files Signed-off-by: Nathan McDougall --- pandera/api/pandas/array.py | 5 +++-- pandera/api/pandas/container.py | 6 +++--- pandera/api/pandas/model.py | 2 +- pandera/backends/pyspark/container.py | 2 +- pandera/backends/pyspark/decorators.py | 1 + pandera/engines/__init__.py | 1 - pandera/engines/pandas_engine.py | 7 +++---- pandera/engines/pyspark_engine.py | 2 ++ pandera/typing/fastapi.py | 2 +- pandera/typing/geopandas.py | 10 +++------- pandera/typing/pandas.py | 13 +++++++------ tests/core/test_pydantic.py | 2 +- tests/geopandas/test_engine.py | 6 +++--- .../test_from_to_format_conversions.py | 3 +-- tests/geopandas/test_geopandas.py | 17 ++++++++--------- tests/geopandas/test_pydantic.py | 4 ++-- tests/pyspark/test_pyspark_container.py | 7 ++++--- tests/pyspark/test_pyspark_decorators.py | 11 ++++------- tests/pyspark/test_pyspark_dtypes.py | 4 ++-- tests/pyspark/test_pyspark_model.py | 12 +++++------- 20 files changed, 55 insertions(+), 62 deletions(-) diff --git a/pandera/api/pandas/array.py b/pandera/api/pandas/array.py index f9e86d503..975c98305 100644 --- a/pandera/api/pandas/array.py +++ b/pandera/api/pandas/array.py @@ -3,6 +3,7 @@ import copy import warnings from typing import Any, List, Optional, TypeVar, Union, cast + import pandas as pd from pandera import errors @@ -13,11 +14,11 @@ from pandera.api.pandas.types import CheckList, PandasDtypeInputTypes, is_field from pandera.config import CONFIG from pandera.dtypes import DataType, UniqueSettings -from pandera.engines import pandas_engine, PYDANTIC_V2 +from pandera.engines import PYDANTIC_V2, pandas_engine if PYDANTIC_V2: - from pydantic_core import core_schema from pydantic import GetCoreSchemaHandler + from pydantic_core import core_schema TArraySchemaBase = TypeVar("TArraySchemaBase", bound="ArraySchema") diff --git a/pandera/api/pandas/container.py b/pandera/api/pandas/container.py index 17b7d46ca..70bb9d741 100644 --- a/pandera/api/pandas/container.py +++ b/pandera/api/pandas/container.py @@ -11,7 +11,6 @@ import pandas as pd from pandera import errors -from pandera.config import CONFIG from pandera import strategies as st from pandera.api.base.schema import BaseSchema, inferred_schema_guard from pandera.api.checks import Check @@ -21,12 +20,13 @@ PandasDtypeInputTypes, StrictType, ) +from pandera.config import CONFIG from pandera.dtypes import DataType, UniqueSettings -from pandera.engines import pandas_engine, PYDANTIC_V2 +from pandera.engines import PYDANTIC_V2, pandas_engine if PYDANTIC_V2: - from pydantic_core import core_schema from pydantic import GetCoreSchemaHandler + from pydantic_core import core_schema N_INDENT_SPACES = 4 diff --git a/pandera/api/pandas/model.py b/pandera/api/pandas/model.py index cc79ccae5..e7bf5630c 100644 --- a/pandera/api/pandas/model.py +++ b/pandera/api/pandas/model.py @@ -44,8 +44,8 @@ from pandera.typing.common import DataFrameBase if PYDANTIC_V2: + from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler from pydantic_core import core_schema - from pydantic import GetJsonSchemaHandler, GetCoreSchemaHandler try: from typing_extensions import get_type_hints diff --git a/pandera/backends/pyspark/container.py b/pandera/backends/pyspark/container.py index e1fa9fac6..10930a8eb 100644 --- a/pandera/backends/pyspark/container.py +++ b/pandera/backends/pyspark/container.py @@ -13,8 +13,8 @@ from pandera.backends.pyspark.base import ColumnInfo, PysparkSchemaBackend from pandera.backends.pyspark.decorators import ( ValidationScope, - validate_scope, cache_check_obj, + validate_scope, ) from pandera.backends.pyspark.error_formatters import scalar_failure_case from pandera.config import CONFIG diff --git a/pandera/backends/pyspark/decorators.py b/pandera/backends/pyspark/decorators.py index 3dacc398b..ff445b126 100644 --- a/pandera/backends/pyspark/decorators.py +++ b/pandera/backends/pyspark/decorators.py @@ -8,6 +8,7 @@ from typing import List, Type from pyspark.sql import DataFrame + from pandera.api.pyspark.types import PysparkDefaultTypes from pandera.config import CONFIG, ValidationDepth from pandera.errors import SchemaError diff --git a/pandera/engines/__init__.py b/pandera/engines/__init__.py index 5129fae99..f59bc9705 100644 --- a/pandera/engines/__init__.py +++ b/pandera/engines/__init__.py @@ -2,5 +2,4 @@ from pandera.engines.utils import pydantic_version - PYDANTIC_V2 = pydantic_version().release >= (2, 0, 0) diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index c72b058b7..24fa13d07 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -34,14 +34,13 @@ from pandera import dtypes, errors from pandera.dtypes import immutable -from pandera.engines import engine, numpy_engine, utils +from pandera.engines import PYDANTIC_V2, engine, numpy_engine, utils from pandera.engines.type_aliases import ( PandasDataType, PandasExtensionType, PandasObject, ) from pandera.engines.utils import pandas_version -from pandera.engines import PYDANTIC_V2 from pandera.system import FLOAT_128_AVAILABLE if PYDANTIC_V2: @@ -1068,10 +1067,10 @@ def from_parametrized_dtype(cls, pd_dtype: pd.IntervalDtype): if GEOPANDAS_INSTALLED: - from geopandas.array import GeometryArray, GeometryDtype, from_shapely + import pyproj import shapely import shapely.geometry - import pyproj + from geopandas.array import GeometryArray, GeometryDtype, from_shapely GeoPandasObject = Union[ pd.Series, pd.DataFrame, gpd.GeoSeries, gpd.GeoDataFrame diff --git a/pandera/engines/pyspark_engine.py b/pandera/engines/pyspark_engine.py index 7eefa69be..d42911dac 100644 --- a/pandera/engines/pyspark_engine.py +++ b/pandera/engines/pyspark_engine.py @@ -11,6 +11,8 @@ import inspect import re import sys +import warnings +from typing import Any, Iterable, Optional, Union import pyspark import pyspark.sql.types as pst diff --git a/pandera/typing/fastapi.py b/pandera/typing/fastapi.py index aa5b558bb..92e5e2b9b 100644 --- a/pandera/typing/fastapi.py +++ b/pandera/typing/fastapi.py @@ -21,8 +21,8 @@ if PYDANTIC_V2: - from pydantic_core import core_schema from pydantic import GetCoreSchemaHandler + from pydantic_core import core_schema if FASTAPI_INSTALLED: diff --git a/pandera/typing/geopandas.py b/pandera/typing/geopandas.py index c937105f8..c45032272 100644 --- a/pandera/typing/geopandas.py +++ b/pandera/typing/geopandas.py @@ -8,24 +8,20 @@ Generic, TypeVar, Union, - get_args, _type_check, + get_args, ) import pandas as pd from pandera.engines import PYDANTIC_V2 from pandera.errors import SchemaError, SchemaInitError -from pandera.typing.common import ( - DataFrameBase, - DataFrameModel, - SeriesBase, -) +from pandera.typing.common import DataFrameBase, DataFrameModel, SeriesBase from pandera.typing.formats import Formats if PYDANTIC_V2: - from pydantic_core import core_schema from pydantic import GetCoreSchemaHandler + from pydantic_core import core_schema try: import geopandas as gpd diff --git a/pandera/typing/pandas.py b/pandera/typing/pandas.py index 4a70b52f9..b8d51cac2 100644 --- a/pandera/typing/pandas.py +++ b/pandera/typing/pandas.py @@ -15,11 +15,6 @@ _type_check, ) -try: - from typing import get_args -except ImportError: - from typing_extensions import get_args - import numpy as np import pandas as pd @@ -34,6 +29,12 @@ ) from pandera.typing.formats import Formats +try: + from typing import get_args +except ImportError: + from typing_extensions import get_args + + try: from typing import _GenericAlias # type: ignore[attr-defined] except ImportError: # pragma: no cover @@ -41,8 +42,8 @@ if PYDANTIC_V2: - from pydantic_core import core_schema from pydantic import GetCoreSchemaHandler + from pydantic_core import core_schema # pylint:disable=too-few-public-methods diff --git a/tests/core/test_pydantic.py b/tests/core/test_pydantic.py index 9123106c9..331bfd692 100644 --- a/tests/core/test_pydantic.py +++ b/tests/core/test_pydantic.py @@ -6,8 +6,8 @@ import pytest import pandera as pa -from pandera.typing import DataFrame, Series from pandera.engines.utils import pydantic_version +from pandera.typing import DataFrame, Series try: from pydantic import BaseModel, ValidationError diff --git a/tests/geopandas/test_engine.py b/tests/geopandas/test_engine.py index 49f8da1bf..bc9fb6b20 100644 --- a/tests/geopandas/test_engine.py +++ b/tests/geopandas/test_engine.py @@ -1,14 +1,14 @@ """Unit tests for the geopandas engine dtype Geometry.""" -import shapely +import geopandas as gpd import numpy as np import pandas as pd -import geopandas as gpd import pytest +import shapely from shapely.geometry import Point import pandera as pa -from pandera.engines.pandas_engine import Geometry, DateTime +from pandera.engines.pandas_engine import DateTime, Geometry def test_engine_geometry_simple(): diff --git a/tests/geopandas/test_from_to_format_conversions.py b/tests/geopandas/test_from_to_format_conversions.py index 23dbf8d1f..d275665fa 100644 --- a/tests/geopandas/test_from_to_format_conversions.py +++ b/tests/geopandas/test_from_to_format_conversions.py @@ -6,12 +6,11 @@ import tempfile from typing import Any -import pandas as pd import geopandas as gpd +import pandas as pd import pytest from shapely.geometry import Point - import pandera as pa from pandera.engines import pandas_engine from pandera.typing.geopandas import GeoDataFrame, GeoSeries diff --git a/tests/geopandas/test_geopandas.py b/tests/geopandas/test_geopandas.py index 868726e19..9f162be09 100644 --- a/tests/geopandas/test_geopandas.py +++ b/tests/geopandas/test_geopandas.py @@ -1,19 +1,18 @@ """Unit tests for the geopandas integration.""" - -try: # python 3.9+ - from typing import Annotated # type: ignore -except ImportError: - from typing_extensions import Annotated # type: ignore - -import pandas as pd import geopandas as gpd +import pandas as pd import pytest -from shapely.geometry import Polygon, Point +from shapely.geometry import Point, Polygon import pandera as pa +from pandera.engines.pandas_engine import Geometry from pandera.typing import Series from pandera.typing.geopandas import GeoDataFrame, GeoSeries -from pandera.engines.pandas_engine import Geometry + +try: # python 3.9+ + from typing import Annotated # type: ignore +except ImportError: + from typing_extensions import Annotated # type: ignore def test_dataframe_schema(): diff --git a/tests/geopandas/test_pydantic.py b/tests/geopandas/test_pydantic.py index 9b375286e..ba0597dcb 100644 --- a/tests/geopandas/test_pydantic.py +++ b/tests/geopandas/test_pydantic.py @@ -1,11 +1,11 @@ """Tests GeoPandas schema creation and validation from type annotations.""" # pylint:disable=missing-class-docstring,missing-function-docstring,too-few-public-methods -import pandas as pd import geopandas as gpd +import pandas as pd import pytest -from shapely.geometry import Point from pydantic import BaseModel, ValidationError +from shapely.geometry import Point import pandera as pa from pandera.typing.geopandas import GeoDataFrame, GeoSeries diff --git a/tests/pyspark/test_pyspark_container.py b/tests/pyspark/test_pyspark_container.py index 22f20693e..0832b0594 100644 --- a/tests/pyspark/test_pyspark_container.py +++ b/tests/pyspark/test_pyspark_container.py @@ -2,13 +2,14 @@ from contextlib import nullcontext as does_not_raise -import pandera.errors -import pandera.pyspark as pa import pyspark.sql.types as T import pytest +from pyspark.sql import DataFrame, SparkSession + +import pandera.errors +import pandera.pyspark as pa from pandera.config import PanderaConfig, ValidationDepth from pandera.pyspark import Column, DataFrameSchema -from pyspark.sql import DataFrame, SparkSession spark = SparkSession.builder.getOrCreate() diff --git a/tests/pyspark/test_pyspark_decorators.py b/tests/pyspark/test_pyspark_decorators.py index 67e5e2b2c..a0eeecdc9 100644 --- a/tests/pyspark/test_pyspark_decorators.py +++ b/tests/pyspark/test_pyspark_decorators.py @@ -1,19 +1,16 @@ """This module is to test the behaviour change based on defined config in pandera""" # pylint:disable=import-outside-toplevel,abstract-method -from contextlib import nullcontext as does_not_raise import logging +from contextlib import nullcontext as does_not_raise + import pyspark.sql.types as T -from pyspark.sql import DataFrame import pytest +from pyspark.sql import DataFrame from pandera.backends.pyspark.decorators import cache_check_obj from pandera.config import CONFIG -from pandera.pyspark import ( - Check, - DataFrameSchema, - Column, -) +from pandera.pyspark import Check, Column, DataFrameSchema from tests.pyspark.conftest import spark_df diff --git a/tests/pyspark/test_pyspark_dtypes.py b/tests/pyspark/test_pyspark_dtypes.py index a82bb760a..c67890d4a 100644 --- a/tests/pyspark/test_pyspark_dtypes.py +++ b/tests/pyspark/test_pyspark_dtypes.py @@ -4,11 +4,11 @@ import pyspark import pyspark.sql.types as T +from pyspark.sql import DataFrame + from pandera.backends.pyspark.decorators import ValidationScope, validate_scope from pandera.config import PanderaConfig from pandera.pyspark import Column, DataFrameSchema -from pyspark.sql import DataFrame - from tests.pyspark.conftest import spark_df diff --git a/tests/pyspark/test_pyspark_model.py b/tests/pyspark/test_pyspark_model.py index 0714f2c0e..ef26ed1e5 100644 --- a/tests/pyspark/test_pyspark_model.py +++ b/tests/pyspark/test_pyspark_model.py @@ -4,19 +4,17 @@ from contextlib import nullcontext as does_not_raise from typing import Optional +import pyspark.sql.types as T +import pytest +from pyspark.sql import DataFrame + import pandera import pandera.api.extensions as pax import pandera.pyspark as pa -import pyspark.sql.types as T -import pytest from pandera.api.pyspark.model import docstring_substitution from pandera.config import PanderaConfig, ValidationDepth -from pandera.errors import ( - SchemaDefinitionError, -) +from pandera.errors import SchemaDefinitionError from pandera.pyspark import DataFrameModel, DataFrameSchema, Field -from pyspark.sql import DataFrame - from tests.pyspark.conftest import spark_df From 730b5c6f9f3a03aa25946d8291850673f573d6ee Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 6 Dec 2023 10:35:03 +1300 Subject: [PATCH 4/5] Fix broken import Signed-off-by: Nathan McDougall --- tests/pyspark/test_pyspark_accessor.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/pyspark/test_pyspark_accessor.py b/tests/pyspark/test_pyspark_accessor.py index a2e8b6284..010b7ddfe 100644 --- a/tests/pyspark/test_pyspark_accessor.py +++ b/tests/pyspark/test_pyspark_accessor.py @@ -1,15 +1,14 @@ """Unit tests for dask_accessor module.""" from typing import Union +import pandera.pyspark as pa import pytest +from pandera.accessors import pyspark_sql_accessor +from pandera.config import PanderaConfig, ValidationDepth from pyspark.sql import DataFrame, SparkSession from pyspark.sql.functions import col from pyspark.sql.types import FloatType, LongType -import pandera.pyspark as pa -from pandera.config import PanderaConfig, ValidationDepth -from pandera.pyspark import pyspark_sql_accessor - spark = SparkSession.builder.getOrCreate() From 006dafadea154275bfe06122ba85725ccdb967c6 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Wed, 6 Dec 2023 10:37:38 +1300 Subject: [PATCH 5/5] Run isort Signed-off-by: Nathan McDougall --- tests/pyspark/test_pyspark_accessor.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/pyspark/test_pyspark_accessor.py b/tests/pyspark/test_pyspark_accessor.py index 010b7ddfe..00195410b 100644 --- a/tests/pyspark/test_pyspark_accessor.py +++ b/tests/pyspark/test_pyspark_accessor.py @@ -1,14 +1,15 @@ """Unit tests for dask_accessor module.""" from typing import Union -import pandera.pyspark as pa import pytest -from pandera.accessors import pyspark_sql_accessor -from pandera.config import PanderaConfig, ValidationDepth from pyspark.sql import DataFrame, SparkSession from pyspark.sql.functions import col from pyspark.sql.types import FloatType, LongType +import pandera.pyspark as pa +from pandera.accessors import pyspark_sql_accessor +from pandera.config import PanderaConfig, ValidationDepth + spark = SparkSession.builder.getOrCreate()