adding explode (#327)

Graciaaa3 · web-flow · commit 6158e2942dd7 · 2025-08-14T16:16:49.000-07:00
* adding explode * update docstring * adding explode unit tests * adding reference and example docstring * Revert "adding explode unit tests" recover describe function tests This reverts commit ac37d6f. * update explode tests * docstring typo * adding coverage tests * change naming convention and update docstring * docstring format * docstring format again * docstring format again x2 * docstring grammer
diff --git a/docs/reference/nestedframe.rst b/docs/reference/nestedframe.rst
@@ -42,6 +42,7 @@ Extended Pandas.DataFrame Interface
     NestedFrame.min
     NestedFrame.max
     NestedFrame.describe
+    NestedFrame.explode
 
 I/O
 ~~~~~~~~~
diff --git a/src/nested_pandas/nestedframe/core.py b/src/nested_pandas/nestedframe/core.py
@@ -974,6 +974,94 @@ def describe(self, exclude_nest: bool = False, percentiles=None, include=None, e
 
         return NestedFrame(pd.concat(result, axis=1))
 
+    def explode(self, column: IndexLabel, ignore_index: bool = False):
+        """
+
+        Transform each element of a list-like base column to a row, replicating index value.
+        Or unnest a specified nested column with the other columns being replicated as part
+        of the unnest. The exploded columns will be added to the right of the rest of the frame.
+
+        Parameters
+        ----------
+        column : IndexLabel
+            Base column(s) or nested column to explode.
+            For multiple base columns, specify a non-empty list with each element being a string or tuple.
+            For all specified base columns, their list-like data on same row of the frame
+            must have matching length.
+            Only a single nested column can be exploded at a time. Indicate the nested column as a string.
+        ignore_index : bool, default False
+            If True, the resulting index will be labeled 0, 1, ..., n - 1.
+
+        Returns
+        -------
+        NestedFrame
+            A new NestedFrame with the specified column(s) exploded.
+
+        Raises
+        ------
+        ValueError
+            If specified columns to explode have more than one nested column,
+            or contain a mix of nested and base columns.
+
+        See Also
+        --------
+        :meth:`pandas.DataFrame.explode`
+
+        Examples
+        --------
+        >>> from nested_pandas.datasets.generation import generate_data
+        >>> nf = generate_data(3,3, seed=1)
+
+        >>> nf_explode = nf.explode(column="nested")
+        >>> nf_explode
+                  a         b          t       flux band
+        0  0.417022  0.604665   3.725204  67.046751    g
+        0  0.417022  0.604665  10.776335  14.038694    g
+        0  0.417022  0.604665   4.089045  96.826158    g
+        1  0.720324  0.293512   6.911215   41.73048    r
+        1  0.720324  0.293512    8.38389  19.810149    r
+        1  0.720324  0.293512  17.562349  31.342418    g
+        2  0.000114  0.184677   7.935349  55.868983    r
+        2  0.000114  0.184677   13.70439  80.074457    r
+        2  0.000114  0.184677   0.547752  69.232262    g
+
+        """
+
+        if isinstance(column, list):
+            nested_in_list = [col for col in column if col in self.nested_columns]
+            # list contains more than 1 nested columns
+            if len(nested_in_list) > 1:
+                raise ValueError(
+                    f"Exploding multiple nested columns at once is not supported.\n"
+                    f"Nested columns: {nested_in_list}"
+                )
+
+            # list contains mixing nested & base columns
+            if len(nested_in_list) == 1 and len(column) > 1:
+                raise ValueError(
+                    f"Exploding nested column together with base columns is not supported.\n"
+                    f"Nested column: {nested_in_list[0]}"
+                )
+
+        # normalize a single-element list to string
+        if isinstance(column, list) and len(column) == 1:
+            column = column[0]
+
+        # handle single nested column explode
+        if isinstance(column, str) and column in self.nested_columns:
+            selected_nested_df = self[column].nest.to_flat()
+            other_col = [col for col in self.columns if col != column]
+            other_col_df = self[other_col]
+            result = other_col_df.join(selected_nested_df)
+
+            if ignore_index:
+                result = result.reset_index(drop=True)
+
+            return NestedFrame(result)
+
+        # otherwise just use pandas' explode
+        return NestedFrame(super().explode(column=column, ignore_index=ignore_index))
+
     def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
         """Evaluate a string describing operations on NestedFrame columns.
 
diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py
@@ -1403,7 +1403,7 @@ def test_min():
 
 
 def test_max():
-    """Test max function return correct result with an without the nested columns"""
+    """Test max function return correct result with and without the nested columns"""
     base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6], "c": ["x", "y", "z"]}, index=[0, 1, 2])
     nested = pd.DataFrame(
         data={"d": [10, 11, 20, 21, 3, 31, 32], "y": [1, 10, 20, 30, 40, 50, 60]}, index=[0, 0, 1, 1, 1, 2, 2]
@@ -1638,6 +1638,61 @@ def test_describe():
     assert "top" not in r18.index
 
 
+def test_explode():
+    """Test NestedFrame.explode gives correct result for flattening specified nested columns"""
+    base = NestedFrame(
+        data={
+            "a": [[1, 2, 3], 4, [5, 6]],
+            "b": ["2", "4", "6"],
+            "c": [["x1", "x2", "x3"], "y", ["z1", "z2"]],
+        },
+        index=[0, 1, 2],
+    )
+
+    nested_num = pd.DataFrame(
+        data={"d": [10, 11, 20, 21, 30, 31, 32], "e": [1, 2, 3, 4, 5, 6, 7]}, index=[0, 0, 1, 1, 1, 2, 2]
+    )
+    nested_mix = pd.DataFrame(
+        data={"f": ["A", "B", "C", "D", "E", "A", "A", "B"], "g": [5, 4, 7, 5, 1, 9, 3, 4]},
+        index=[0, 0, 0, 1, 1, 2, 2, 2],
+    )
+    base = base.add_nested(nested_num, "nested_num").add_nested(nested_mix, "nested_mix")
+
+    # explode on base columns
+    r1 = base.explode(column=["a"])
+    assert r1.shape[0] == 6
+    assert r1.shape[1] == 5
+    expected1 = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 0, 0, 1, 2, 2])
+    assert (r1["a"] == expected1).all()
+
+    r2 = base.explode(column="c", ignore_index=True)
+    expected2 = pd.Series(["x1", "x2", "x3", "y", "z1", "z2"])
+    assert (r2["c"] == expected2).all()
+
+    r3 = base.explode(column=["a", "c"])
+    assert (r3["a"] == expected1).all()
+    expected3 = pd.Series(["x1", "x2", "x3", "y", "z1", "z2"], index=[0, 0, 0, 1, 2, 2])
+    assert (r3["c"] == expected3).all()
+
+    # explode on nested column error
+    with pytest.raises(ValueError):
+        base.explode(column=["nested_num", "nested_mix"])
+
+    with pytest.raises(ValueError):
+        base.explode(column=["nested_num", "a"])
+
+    # explode on nested column
+    r4 = base.explode(column="nested_num")
+    assert r4.shape[1] == 6
+    expected4 = pd.Series([1, 2, 3, 4, 5, 6, 7], index=[0, 0, 1, 1, 1, 2, 2])
+    assert (r4["e"] == expected4).all()
+
+    r5 = base.explode(column="nested_mix", ignore_index=True)
+    assert r5.shape[1] == 6
+    expected5 = pd.Series(["A", "B", "C", "D", "E", "A", "A", "B"])
+    assert (r5["f"] == expected5).all()
+
+
 def test_eval():
     """
     Test basic behavior of NestedFrame.eval, and that it can handle nested references