developmentseed · kylebarron · May 21, 2025 · May 21, 2025 · May 21, 2025
diff --git a/docs/api/arrow.md b/docs/api/arrow.md
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -41,7 +41,6 @@ nav:
       - api/put.md
       - api/rename.md
       - api/attributes.md
-      - api/arrow.md
   - CHANGELOG.md
 
 watch:
@@ -121,8 +120,6 @@ plugins:
             signature_crossrefs: true
 
           inventories:
-            - https://arrow.apache.org/docs/objects.inv
-            - https://docs.pola.rs/api/python/stable/objects.inv
             - https://docs.python.org/3/objects.inv
             - https://filesystem-spec.readthedocs.io/en/latest/objects.inv
 

diff --git a/src/obspec/_list.py b/src/obspec/_list.py
@@ -1,30 +1,22 @@
 from __future__ import annotations
 
-import sys
 from collections.abc import Sequence
-from typing import Generic, Literal, Protocol, TypedDict, TypeVar, overload
+from typing import TYPE_CHECKING, Generic, Protocol, TypedDict, TypeVar
 
-from ._meta import ObjectMeta
-from .arrow import ArrowArrayExportable, ArrowStreamExportable
+if TYPE_CHECKING:
+    import sys
+    from collections.abc import Sequence
 
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
+    from ._meta import ObjectMeta
 
-ListChunkType_co = TypeVar(
-    "ListChunkType_co",
-    Sequence[ObjectMeta],
-    ArrowArrayExportable,
-    ArrowStreamExportable,
-    covariant=True,
-)
-"""The data structure used for holding list results.
+    if sys.version_info >= (3, 11):
+        from typing import Self
+    else:
+        from typing_extensions import Self
 
-By default, listing APIs return a `list` of [`ObjectMeta`][obspec.ObjectMeta]. However
-for improved performance when listing large buckets, you can pass `return_arrow=True`.
-Then an Arrow `RecordBatch` will be returned instead.
-"""
+
+ListChunkType_co = TypeVar("ListChunkType_co", covariant=True)
+"""The data structure used for holding list results."""
 
 
 class ListResult(TypedDict, Generic[ListChunkType_co]):
@@ -83,32 +75,13 @@ async def __anext__(self) -> ListChunkType_co:
 
 
 class List(Protocol):
-    @overload
-    def list(
-        self,
-        prefix: str | None = None,
-        *,
-        offset: str | None = None,
-        chunk_size: int = 50,
-        return_arrow: Literal[True],
-    ) -> ListIterator[ArrowArrayExportable]: ...
-    @overload
     def list(
         self,
         prefix: str | None = None,
         *,
         offset: str | None = None,
         chunk_size: int = 50,
-        return_arrow: Literal[False] = False,
-    ) -> ListIterator[Sequence[ObjectMeta]]: ...
-    def list(
-        self,
-        prefix: str | None = None,
-        *,
-        offset: str | None = None,
-        chunk_size: int = 50,
-        return_arrow: bool = False,
-    ) -> ListIterator[ArrowArrayExportable] | ListIterator[Sequence[ObjectMeta]]:
+    ) -> ListIterator[Sequence[ObjectMeta]]:
         """List all the objects with the given prefix.
 
         Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix of
@@ -134,28 +107,6 @@ def list(
             break
         ```
 
-        Return large list results as [Arrow](https://arrow.apache.org/). This is most
-        useful with large list operations. In this case you may want to increase the
-        `chunk_size` parameter.
-
-        ```py
-        stream = obs.list(store, chunk_size=1000, return_arrow=True)
-        # Stream is now an iterable/async iterable of `RecordBatch`es
-        for batch in stream:
-            print(batch.num_rows) # 100
-
-            # If desired, convert to a pyarrow RecordBatch (zero-copy) with
-            # `pyarrow.record_batch(batch)`
-            break
-        ```
-
-        Collect all list results into a single Arrow `RecordBatch`.
-
-        ```py
-        stream = obs.list(store, return_arrow=True)
-        batch = stream.collect()
-        ```
-
         !!! note
             The order of returned [`ObjectMeta`][obspec.ObjectMeta] is not
             guaranteed
@@ -169,10 +120,6 @@ def list(
             chunk_size: The number of items to collect per chunk in the returned
                 (async) iterator. All chunks except for the last one will have this many
                 items. This is ignored in [`collect`][obspec.ListIterator.collect].
-            return_arrow: If `True`, return each batch of list items as an Arrow
-                `RecordBatch`, not as a list of Python `dict`s. Arrow removes
-                serialization overhead between Rust and Python and so this can be
-                significantly faster for large list operations. Defaults to `False`.
 
         Returns:
             A ListStream, which you can iterate through to access list results.
@@ -182,32 +129,13 @@ def list(
 
 
 class ListAsync(Protocol):
-    @overload
-    def list_async(
-        self,
-        prefix: str | None = None,
-        *,
-        offset: str | None = None,
-        chunk_size: int = 50,
-        return_arrow: Literal[True],
-    ) -> ListStream[ArrowArrayExportable]: ...
-    @overload
-    def list_async(
-        self,
-        prefix: str | None = None,
-        *,
-        offset: str | None = None,
-        chunk_size: int = 50,
-        return_arrow: Literal[False] = False,
-    ) -> ListStream[Sequence[ObjectMeta]]: ...
     def list_async(
         self,
         prefix: str | None = None,
         *,
         offset: str | None = None,
         chunk_size: int = 50,
-        return_arrow: bool = False,
-    ) -> ListStream[ArrowArrayExportable] | ListStream[Sequence[ObjectMeta]]:
+    ) -> ListStream[Sequence[ObjectMeta]]:
         """List all the objects with the given prefix.
 
         Note that this method itself is **not async**. It's a synchronous method but
@@ -241,10 +169,6 @@ def list_async(
                 (async) iterator. All chunks except for the last one will have this many
                 items. This is ignored in
                 [`collect_async`][obspec.ListStream.collect_async].
-            return_arrow: If `True`, return each batch of list items as an Arrow
-                `RecordBatch`, not as a list of Python `dict`s. Arrow removes
-                serialization overhead between Rust and Python and so this can be
-                significantly faster for large list operations. Defaults to `False`.
 
         Returns:
             A ListStream, which you can iterate through to access list results.
@@ -254,26 +178,10 @@ def list_async(
 
 
 class ListWithDelimiter(Protocol):
-    @overload
-    def list_with_delimiter(
-        self,
-        prefix: str | None = None,
-        *,
-        return_arrow: Literal[True],
-    ) -> ListResult[ArrowStreamExportable]: ...
-    @overload
-    def list_with_delimiter(
-        self,
-        prefix: str | None = None,
-        *,
-        return_arrow: Literal[False] = False,
-    ) -> ListResult[Sequence[ObjectMeta]]: ...
     def list_with_delimiter(
         self,
         prefix: str | None = None,
-        *,
-        return_arrow: bool = False,
-    ) -> ListResult[ArrowStreamExportable] | ListResult[Sequence[ObjectMeta]]:
+    ) -> ListResult[Sequence[ObjectMeta]]:
         """List objects with the given prefix and an implementation specific
         delimiter.
 
@@ -292,13 +200,6 @@ def list_with_delimiter(
         Args:
             prefix: The prefix within ObjectStore to use for listing. Defaults to None.
 
-        Keyword Args:
-            return_arrow: If `True`, return list results as an Arrow
-                `Table`, not as a list of Python `dict`s. Arrow removes serialization
-                overhead between Rust and Python and so this can be significantly faster
-                for large list operations. Defaults to `False`.
-
-
         Returns:
             ListResult
 
@@ -307,26 +208,10 @@ def list_with_delimiter(
 
 
 class ListWithDelimiterAsync(Protocol):
-    @overload
-    async def list_with_delimiter_async(
-        self,
-        prefix: str | None = None,
-        *,
-        return_arrow: Literal[True],
-    ) -> ListResult[ArrowStreamExportable]: ...
-    @overload
     async def list_with_delimiter_async(
         self,
         prefix: str | None = None,
-        *,
-        return_arrow: Literal[False] = False,
-    ) -> ListResult[Sequence[ObjectMeta]]: ...
-    async def list_with_delimiter_async(
-        self,
-        prefix: str | None = None,
-        *,
-        return_arrow: bool = False,
-    ) -> ListResult[ArrowStreamExportable] | ListResult[Sequence[ObjectMeta]]:
+    ) -> ListResult[Sequence[ObjectMeta]]:
         """Call `list_with_delimiter` asynchronously.
 
         Refer to the documentation for

diff --git a/src/obspec/arrow.py b/src/obspec/arrow.py