Skip to content

Commit

Permalink
refactor: from_arrow (#917)
Browse files Browse the repository at this point in the history
  • Loading branch information
ion-elgreco authored Oct 15, 2024
1 parent b4b03fe commit 494b89a
Showing 1 changed file with 31 additions and 3 deletions.
34 changes: 31 additions & 3 deletions python/datafusion/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from datafusion.record_batch import RecordBatchStream
from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF

from typing import Any, TYPE_CHECKING
from typing import Any, TYPE_CHECKING, Protocol
from typing_extensions import deprecated

if TYPE_CHECKING:
Expand All @@ -41,6 +41,28 @@
from datafusion.plan import LogicalPlan, ExecutionPlan


class ArrowStreamExportable(Protocol):
"""Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
"""

def __arrow_c_stream__( # noqa: D105
self, requested_schema: object | None = None
) -> object: ...


class ArrowArrayExportable(Protocol):
"""Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface.
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
"""

def __arrow_c_array__( # noqa: D105
self, requested_schema: object | None = None
) -> tuple[object, object]: ...


class SessionConfig:
"""Session configuration options."""

Expand Down Expand Up @@ -592,12 +614,18 @@ def from_pydict(
"""
return DataFrame(self.ctx.from_pydict(data, name))

def from_arrow(self, data: Any, name: str | None = None) -> DataFrame:
def from_arrow(
self,
data: ArrowStreamExportable | ArrowArrayExportable,
name: str | None = None,
) -> DataFrame:
"""Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow source.
The Arrow data source can be any object that implements either
``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it must return
a struct array. Common examples of sources from pyarrow include
a struct array.
Arrow data can be Polars, Pandas, Pyarrow etc.
Args:
data: Arrow data source.
Expand Down

0 comments on commit 494b89a

Please sign in to comment.