Skip to content
This repository has been archived by the owner on Mar 11, 2024. It is now read-only.

Commit

Permalink
fix kwargs
Browse files Browse the repository at this point in the history
  • Loading branch information
danielgafni committed Oct 31, 2023
1 parent bce29ad commit 4b717f9
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ on:

jobs:
test:
name: test ${{ matrix.py }} - ${{ matrix.os }} - polars=${{ matrix.polars_version }} - dagster=${{ matrix.dagster_version }}
name: test polars=${{ matrix.polars_version }} dagster=${{ matrix.dagster_version }} py=${{ matrix.py }} ${{ matrix.os }}
runs-on: ${{ matrix.os }}-latest
strategy:
fail-fast: false
Expand Down Expand Up @@ -60,7 +60,7 @@ jobs:
run: pytest -v .

lint:
name: lint ${{ matrix.py }} - ${{ matrix.os }} - polars=${{ matrix.polars_version }} - dagster=${{ matrix.dagster_version }}
name: lint polars=${{ matrix.polars_version }} dagster=${{ matrix.dagster_version }} py=${{ matrix.py }} ${{ matrix.os }}
runs-on: ${{ matrix.os }}-latest
strategy:
fail-fast: false
Expand Down
12 changes: 7 additions & 5 deletions dagster_polars/io_managers/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ def scan_parquet(path: UPath, context: InputContext) -> pl.LazyFrame:
# TODO: explore removing this as universal-pathlib should always provide storage_options in newer versions
pass

return pl.scan_parquet(
str(path),
storage_options=storage_options,
kwargs = dict(
n_rows=context.metadata.get("n_rows", None),
cache=context.metadata.get("cache", True),
parallel=context.metadata.get("parallel", "auto"),
Expand All @@ -84,10 +82,14 @@ def scan_parquet(path: UPath, context: InputContext) -> pl.LazyFrame:
row_count_offset=context.metadata.get("row_count_offset", 0),
low_memory=context.metadata.get("low_memory", False),
use_statistics=context.metadata.get("use_statistics", True),
hive_partitioning=context.metadata.get("hive_partitioning", True),
retries=context.metadata.get("retries", 0),
)

if Version(pl.__version__) > Version("0.19.4"):
kwargs["hive_partitioning"] = context.metadata.get("hive_partitioning", True)
kwargs["retries"] = context.metadata.get("retries", 0)

return pl.scan_parquet(str(path), storage_options=storage_options, **kwargs) # type: ignore


class PolarsParquetIOManager(BasePolarsUPathIOManager):
extension: str = ".parquet"
Expand Down

0 comments on commit 4b717f9

Please sign in to comment.