Skip to content

Attempt to remove Arrow support from obspec.List #14

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 13 additions & 78 deletions src/obspec/_list.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,12 @@
from __future__ import annotations

from typing import Generic, Literal, Protocol, Self, TypedDict, TypeVar, overload
from typing import TYPE_CHECKING, Protocol, Self, TypedDict

from ._meta import ObjectMeta
from .arrow import ArrowArrayExportable, ArrowStreamExportable
if TYPE_CHECKING:
from ._meta import ObjectMeta

ListChunkType_co = TypeVar(
"ListChunkType_co",
list[ObjectMeta],
ArrowArrayExportable,
ArrowStreamExportable,
covariant=True,
)
"""The data structure used for holding list results.

By default, listing APIs return a `list` of [`ObjectMeta`][obspec.ObjectMeta]. However
for improved performance when listing large buckets, you can pass `return_arrow=True`.
Then an Arrow `RecordBatch` will be returned instead.
"""


class ListResult(TypedDict, Generic[ListChunkType_co]):
class ListResult(TypedDict):
"""Result of a list call.

Includes objects, prefixes (directories) and a token for the next set of results.
Expand All @@ -31,11 +17,11 @@ class ListResult(TypedDict, Generic[ListChunkType_co]):
common_prefixes: list[str]
"""Prefixes that are common (like directories)"""

objects: ListChunkType_co
objects: list[ObjectMeta]
"""Object metadata for the listing"""


class ListStream(Protocol[ListChunkType_co]):
class ListStream(Protocol):
"""A stream of [ObjectMeta][obspec.ObjectMeta] that can be polled in a sync or
async fashion.
""" # noqa: D205
Expand All @@ -48,58 +34,39 @@ def __iter__(self) -> Self:
"""Return `Self` as an async iterator."""
...

async def collect_async(self) -> ListChunkType_co:
async def collect_async(self) -> list[ObjectMeta]:
"""Collect all remaining ObjectMeta objects in the stream.

This ignores the `chunk_size` parameter from the `list` call and collects all
remaining data into a single chunk.
"""
...

def collect(self) -> ListChunkType_co:
def collect(self) -> list[ObjectMeta]:
"""Collect all remaining ObjectMeta objects in the stream.

This ignores the `chunk_size` parameter from the `list` call and collects all
remaining data into a single chunk.
"""
...

async def __anext__(self) -> ListChunkType_co:
async def __anext__(self) -> list[ObjectMeta]:
"""Return the next chunk of ObjectMeta in the stream."""
...

def __next__(self) -> ListChunkType_co:
def __next__(self) -> list[ObjectMeta]:
"""Return the next chunk of ObjectMeta in the stream."""
...


class List(Protocol):
@overload
def list(
self,
prefix: str | None = None,
*,
offset: str | None = None,
chunk_size: int = 50,
return_arrow: Literal[True],
) -> ListStream[ArrowArrayExportable]: ...
@overload
def list(
self,
prefix: str | None = None,
*,
offset: str | None = None,
chunk_size: int = 50,
return_arrow: Literal[False] = False,
) -> ListStream[list[ObjectMeta]]: ...
def list(
self,
prefix: str | None = None,
*,
offset: str | None = None,
chunk_size: int = 50,
return_arrow: bool = False,
) -> ListStream[ArrowArrayExportable] | ListStream[list[ObjectMeta]]:
) -> ListStream:
"""List all the objects with the given prefix.

Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix of
Expand Down Expand Up @@ -193,26 +160,10 @@ def list(


class ListWithDelimiter(Protocol):
@overload
def list_with_delimiter(
self,
prefix: str | None = None,
*,
return_arrow: Literal[True],
) -> ListResult[ArrowStreamExportable]: ...
@overload
def list_with_delimiter(
self,
prefix: str | None = None,
*,
return_arrow: Literal[False] = False,
) -> ListResult[list[ObjectMeta]]: ...
def list_with_delimiter(
self,
prefix: str | None = None,
*,
return_arrow: bool = False,
) -> ListResult[ArrowStreamExportable] | ListResult[list[ObjectMeta]]:
) -> ListResult:
"""List objects with the given prefix and an implementation specific
delimiter.

Expand Down Expand Up @@ -246,26 +197,10 @@ def list_with_delimiter(


class ListWithDelimiterAsync(Protocol):
@overload
async def list_with_delimiter_async(
self,
prefix: str | None = None,
*,
return_arrow: Literal[True],
) -> ListResult[ArrowStreamExportable]: ...
@overload
async def list_with_delimiter_async(
self,
prefix: str | None = None,
*,
return_arrow: Literal[False] = False,
) -> ListResult[list[ObjectMeta]]: ...
async def list_with_delimiter_async(
self,
prefix: str | None = None,
*,
return_arrow: bool = False,
) -> ListResult[ArrowStreamExportable] | ListResult[list[ObjectMeta]]:
) -> ListResult:
"""Call `list_with_delimiter` asynchronously.

Refer to the documentation for
Expand Down
Empty file added tests/__init__.py
Empty file.
81 changes: 81 additions & 0 deletions tests/test_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Generic, Literal, Self, TypeVar, overload

from arro3.core import RecordBatch, Table

from obspec._meta import ObjectMeta

if TYPE_CHECKING:
import obspec


def test_list_arrow_compatible():

Check failure on line 13 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (C901)

tests/test_list.py:13:5: C901 `test_list_arrow_compatible` is too complex (11 > 10)
ListChunkType = TypeVar("ListChunkType", list[ObjectMeta], RecordBatch, Table)

class ListStream(Generic[ListChunkType]):
def __aiter__(self) -> Self:
"""Return `Self` as an async iterator."""
...

Check failure on line 19 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (PIE790)

tests/test_list.py:19:13: PIE790 Unnecessary `...` literal

def __iter__(self) -> Self:
"""Return `Self` as an async iterator."""
...

Check failure on line 23 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (PIE790)

tests/test_list.py:23:13: PIE790 Unnecessary `...` literal

async def collect_async(self) -> ListChunkType:
"""Collect all remaining ObjectMeta objects in the stream.

This ignores the `chunk_size` parameter from the `list` call and collects all

Check failure on line 28 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (E501)

tests/test_list.py:28:89: E501 Line too long (89 > 88)
remaining data into a single chunk.
"""
...

Check failure on line 31 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (PIE790)

tests/test_list.py:31:13: PIE790 Unnecessary `...` literal

def collect(self) -> ListChunkType:
"""Collect all remaining ObjectMeta objects in the stream.

This ignores the `chunk_size` parameter from the `list` call and collects all

Check failure on line 36 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (E501)

tests/test_list.py:36:89: E501 Line too long (89 > 88)
remaining data into a single chunk.
"""
...

Check failure on line 39 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (PIE790)

tests/test_list.py:39:13: PIE790 Unnecessary `...` literal

async def __anext__(self) -> ListChunkType:
"""Return the next chunk of ObjectMeta in the stream."""
...

Check failure on line 43 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (PIE790)

tests/test_list.py:43:13: PIE790 Unnecessary `...` literal

def __next__(self) -> ListChunkType:
"""Return the next chunk of ObjectMeta in the stream."""
...

Check failure on line 47 in tests/test_list.py

View workflow job for this annotation

GitHub Actions / Run pre-commit on Python code

Ruff (PIE790)

tests/test_list.py:47:13: PIE790 Unnecessary `...` literal

class ObstoreList:
@overload
def list(
self,
prefix: str | None = None,
*,
offset: str | None = None,
chunk_size: int = 50,
return_arrow: Literal[True],
) -> ListStream[RecordBatch]: ...
@overload
def list(
self,
prefix: str | None = None,
*,
offset: str | None = None,
chunk_size: int = 50,
return_arrow: Literal[False] = False,
) -> ListStream[list[ObjectMeta]]: ...
def list(
self,
prefix: str | None = None,
*,
offset: str | None = None,
chunk_size: int = 50,
return_arrow: bool = False,
) -> ListStream[RecordBatch] | ListStream[list[ObjectMeta]]:
pass

def accepts_obspec_list(provider: obspec.List):
pass

accepts_obspec_list(ObstoreList())
Loading