Making select work with a list parameter

datapythonista · datapythonista · commit bf2a9ea1c305 · 2025-06-12T12:07:59.000+02:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4489,9 +4489,11 @@ def select(self, *args):
 
         Parameters
         ----------
-        *args : hashable or tuple of hashable
-            The names or the columns to return. In general this will be strings,
+        *args : hashable or a single list arg of hashable
+            The names of the columns to return. In general this will be strings,
             but pandas supports other types of column names, if they are hashable.
+            If only one argument of type list is provided, the elements of the
+            list will be considered the named of the columns to be returned
 
         Returns
         -------
@@ -4520,9 +4522,17 @@ def select(self, *args):
         1      Alice   22
         2        Bob   35
 
+        A list can also be used to specify the names of the columns to return:
+
+        >>> df.select(["last_name", "age"])
+                  last_name  age
+        0     Smith   61
+        1    Cooper   22
+        2    Marley   35
+
         Selecting with a pattern can be done with Python expressions:
 
-        >>> df.select(*[col for col in df.columns if col.endswith("_name")])
+        >>> df.select([col for col in df.columns if col.endswith("_name")])
           first_name last_name
         0       John     Smith
         1      Alice    Cooper
@@ -4536,15 +4546,6 @@ def select(self, *args):
         1    Cooper      Alice   22
         2    Marley        Bob   35
 
-        In case the columns are in a list, Python unpacking with star can be used:
-
-        >>> columns = ["last_name", "age"]
-        >>> df.select(*columns)
-                  last_name  age
-        0     Smith   61
-        1    Cooper   22
-        2    Marley   35
-
         Note that a DataFrame is always returned. If a single column is requested, a
         DataFrame with a single column is returned, not a Series:
 
@@ -4563,8 +4564,8 @@ def select(self, *args):
         ...     ),
         ... )
 
-        If just column names are provided, they will select from the first level of the
-        ``MultiIndex``:
+        If column names are provided, they will select from the first level of
+        the ``MultiIndex``:
 
         >>> df.select("names")
               names
@@ -4573,7 +4574,7 @@ def select(self, *args):
         1      Alice    Cooper
         2        Bob    Marley
 
-        To select from multiple or all levels, tuples can be provided:
+        To select from multiple or all levels, tuples can be used:
 
         >>> df.select(("names", "last_name"), ("other", "age"))
               names other
@@ -4583,11 +4584,16 @@ def select(self, *args):
         2    Marley    35
         """
         if args and isinstance(args[0], list):
-            raise ValueError(
-                "`DataFrame.select` does not support a list. Please use "
-                "`df.select('col1', 'col2',...)` or `df.select(*['col1', 'col2',...])` "
-                "instead"
-            )
+            if len(args) == 1:
+                args = args[0]
+            else:
+                raise ValueError(
+                    "`DataFrame.select` supports individual columns "
+                    "`df.select('col1', 'col2',...)` or a list "
+                    "`df.select(['col1', 'col2',...])`, but not both. "
+                    "You can unpack the list if you have a mix: "
+                    "`df.select(*['col1', 'col2'], 'col3')`."
+                )
 
         indexer = self.columns._get_indexer_strict(list(args), "columns")[1]
         return self.take(indexer, axis=1)
diff --git a/pandas/tests/frame/methods/test_select.py b/pandas/tests/frame/methods/test_select.py
@@ -44,9 +44,14 @@ def test_select_duplicated(self, regular_df):
         result = regular_df.select("a", "d", "a")
         assert result.columns.tolist() == expected
 
-    def test_select_list(self, regular_df):
-        with pytest.raises(ValueError, match="does not support a list"):
-            regular_df.select(["a", "b"])
+    def test_select_single_list(self, regular_df):
+        expected = DataFrame({"a": [1, 2], "c": [5, 6]})
+        result = regular_df.select(["a", "c"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_select_list_and_string(self, regular_df):
+        with pytest.raises(ValueError, match="supports individual columns"):
+            regular_df.select(["a", "c"], "b")
 
     def test_select_missing(self, regular_df):
         with pytest.raises(KeyError, match=r"None of .* are in the \[columns\]"):
@@ -80,6 +85,14 @@ def test_select_multiindex_multiple_columns(self, multiindex_df):
         result = multiindex_df.select(("A", "c"), ("B", "e"))
         tm.assert_frame_equal(result, expected)
 
+    def test_select_multiindex_multiple_columns_as_list(self, multiindex_df):
+        expected = DataFrame(
+            [(0, 4), (1, 5)],
+            columns=pd.MultiIndex.from_tuples([("A", "c"), ("B", "e")]),
+        )
+        result = multiindex_df.select([("A", "c"), ("B", "e")])
+        tm.assert_frame_equal(result, expected)
+
     def test_select_multiindex_missing(self, multiindex_df):
         with pytest.raises(KeyError, match="not in index"):
             multiindex_df.select("Z")