From 5f3aa611d4028747f9a49268a0666c69deeed2b7 Mon Sep 17 00:00:00 2001 From: Ivar Soares Urdalen Date: Tue, 16 Jan 2024 21:21:10 +0100 Subject: [PATCH] revise setup.py and add tqdm as dependency (#153) --- .github/workflows/build_test.yml | 58 +++++++++++++++++++ .github/workflows/publish.yml | 2 +- .github/workflows/test_installability.yml | 2 +- .python-version | 2 +- .../services/file_time_series_parser.py | 5 +- .../tests/services/test_feather_reader.py | 5 +- .../util/handle_missing_imports.py | 1 + setup.py | 20 +++++-- 8 files changed, 84 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/build_test.yml diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml new file mode 100644 index 0000000..507ebba --- /dev/null +++ b/.github/workflows/build_test.yml @@ -0,0 +1,58 @@ +name: Builds and tests the Python Package on pull requests + +on: + pull_request: + workflow_dispatch: + +jobs: + build-package: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + + - name: Install dependencies and build package + run: | + pip install pipenv wheel + pipenv sync --system + ./build.sh + + - name: Cache build + uses: actions/cache@v3 + with: + path: ./dist + key: build-cache-${{ github.sha }} + + test-package: + needs: build-package + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + fail-fast: true + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Recover cache + uses: actions/cache@v3 + with: + path: ./dist + key: build-cache-${{ github.sha }} + + - name: Install SDK from cache + run: | + PACKAGE=$(ls ./dist/ | grep -P .+\.whl$) + pip install ./dist/$PACKAGE --no-cache-dir + + - name: Run unit tests + run: | + mkdir -p tests/exabel_data_sdk + cp -r ./exabel_data_sdk/tests ./tests/exabel_data_sdk/ + cd ./tests/ + python -m unittest discover -s ./exabel_data_sdk/tests + diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e2ec029..27ca812 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] fail-fast: true steps: diff --git a/.github/workflows/test_installability.yml b/.github/workflows/test_installability.yml index 2f7257f..a70861e 100644 --- a/.github/workflows/test_installability.yml +++ b/.github/workflows/test_installability.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] fail-fast: true steps: diff --git a/.python-version b/.python-version index cc1923a..9ad6380 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.8 +3.8.18 diff --git a/exabel_data_sdk/services/file_time_series_parser.py b/exabel_data_sdk/services/file_time_series_parser.py index 00b666c..5dfa132 100644 --- a/exabel_data_sdk/services/file_time_series_parser.py +++ b/exabel_data_sdk/services/file_time_series_parser.py @@ -32,7 +32,6 @@ ) from exabel_data_sdk.services import file_constants from exabel_data_sdk.services.csv_reader import CsvReader -from exabel_data_sdk.services.feather_reader import FeatherReader from exabel_data_sdk.services.file_loading_exception import FileLoadingException from exabel_data_sdk.util.handle_missing_imports import handle_missing_imports from exabel_data_sdk.util.resource_name_normalization import ( @@ -95,6 +94,8 @@ def from_file( return (TimeSeriesFileParser(filename, None, s, None) for s in workbook.sheetnames) if Path(filename).suffix.lower() in file_constants.FEATHER_EXTENSIONS: if batch_size is not None: + from exabel_data_sdk.services.feather_reader import FeatherReader + logger.info( "Reading in batches from Feather file. Batch size will be set to the " "batch size fixed in the Feather file." @@ -167,6 +168,8 @@ def parse_file( engine="openpyxl", ) elif extension in file_constants.FEATHER_EXTENSIONS: + from exabel_data_sdk.services.feather_reader import FeatherReader + df = FeatherReader.read_file(self.filename, string_columns=[0]) else: raise FileLoadingException(f"Unknown file extension '{extension}'") diff --git a/exabel_data_sdk/tests/services/test_feather_reader.py b/exabel_data_sdk/tests/services/test_feather_reader.py index 1a7fc8c..4cce9a5 100644 --- a/exabel_data_sdk/tests/services/test_feather_reader.py +++ b/exabel_data_sdk/tests/services/test_feather_reader.py @@ -4,7 +4,6 @@ import pandas as pd -from exabel_data_sdk.services.feather_reader import FeatherReader from exabel_data_sdk.tests.decorators import requires_modules @@ -16,6 +15,8 @@ def _read_feather( string_columns: Iterable[int], ): with tempfile.TemporaryDirectory() as tmp: + from exabel_data_sdk.services.feather_reader import FeatherReader + file = f"{tmp}/file.feather" pd.DataFrame(content[1:], columns=content[0]).to_feather(file) return FeatherReader.read_file(filename=file, string_columns=string_columns) @@ -47,6 +48,8 @@ def test_read_feather_with_empty_value(self): def test_read_feather_in_batches(self): df = pd.DataFrame({"A": range(100000), "B": range(100000)}) with tempfile.TemporaryDirectory() as tmp: + from exabel_data_sdk.services.feather_reader import FeatherReader + file = f"{tmp}/file_for_batch_reading.feather" df.to_feather(file) df = df.astype({"A": str}) diff --git a/exabel_data_sdk/util/handle_missing_imports.py b/exabel_data_sdk/util/handle_missing_imports.py index 9914e93..5e86760 100644 --- a/exabel_data_sdk/util/handle_missing_imports.py +++ b/exabel_data_sdk/util/handle_missing_imports.py @@ -10,6 +10,7 @@ "openpyxl": "openpyxl", "google.cloud.bigquery": "google-cloud-bigquery", "google.oauth2.service_account": "google-cloud-bigquery", + "pyarrow": "pyarrow", } diff --git a/setup.py b/setup.py index 166157a..be6bd48 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +import itertools + import setuptools with open("README.md", "r", encoding="utf-8") as fh: @@ -16,7 +18,7 @@ ] _SNOWFLAKE_REQUIREMENTS = _SQLALCHEMY_REQUIREMENTS + [ - "snowflake-connector-python", + "snowflake-connector-python[pandas]", "snowflake-sqlalchemy", ] @@ -25,6 +27,14 @@ "pyarrow", ] +extras = { + "snowflake": _SNOWFLAKE_REQUIREMENTS, + "bigquery": _BIGQUERY_REQUIREMENTS, + "athena": _ATHENA_REQUIREMENTS, +} +extras["all"] = list(itertools.chain.from_iterable(extras.values())) + + setuptools.setup( name="exabel-data-sdk", version=version, @@ -44,12 +54,9 @@ "pandas", "protobuf>=4", "requests", + "tqdm", ], - extras_require={ - "snowflake": _SNOWFLAKE_REQUIREMENTS, - "bigquery": _BIGQUERY_REQUIREMENTS, - "athena": _ATHENA_REQUIREMENTS, - }, + extras_require=extras, python_requires=">=3.8", classifiers=[ "Development Status :: 5 - Production/Stable", @@ -58,6 +65,7 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ],