Skip to content

Commit bf2a9ea

Browse files
Making select work with a list parameter
1 parent 0f64c13 commit bf2a9ea

File tree

2 files changed

+42
-23
lines changed

2 files changed

+42
-23
lines changed

pandas/core/frame.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4489,9 +4489,11 @@ def select(self, *args):
44894489
44904490
Parameters
44914491
----------
4492-
*args : hashable or tuple of hashable
4493-
The names or the columns to return. In general this will be strings,
4492+
*args : hashable or a single list arg of hashable
4493+
The names of the columns to return. In general this will be strings,
44944494
but pandas supports other types of column names, if they are hashable.
4495+
If only one argument of type list is provided, the elements of the
4496+
list will be considered the named of the columns to be returned
44954497
44964498
Returns
44974499
-------
@@ -4520,9 +4522,17 @@ def select(self, *args):
45204522
1 Alice 22
45214523
2 Bob 35
45224524
4525+
A list can also be used to specify the names of the columns to return:
4526+
4527+
>>> df.select(["last_name", "age"])
4528+
last_name age
4529+
0 Smith 61
4530+
1 Cooper 22
4531+
2 Marley 35
4532+
45234533
Selecting with a pattern can be done with Python expressions:
45244534
4525-
>>> df.select(*[col for col in df.columns if col.endswith("_name")])
4535+
>>> df.select([col for col in df.columns if col.endswith("_name")])
45264536
first_name last_name
45274537
0 John Smith
45284538
1 Alice Cooper
@@ -4536,15 +4546,6 @@ def select(self, *args):
45364546
1 Cooper Alice 22
45374547
2 Marley Bob 35
45384548
4539-
In case the columns are in a list, Python unpacking with star can be used:
4540-
4541-
>>> columns = ["last_name", "age"]
4542-
>>> df.select(*columns)
4543-
last_name age
4544-
0 Smith 61
4545-
1 Cooper 22
4546-
2 Marley 35
4547-
45484549
Note that a DataFrame is always returned. If a single column is requested, a
45494550
DataFrame with a single column is returned, not a Series:
45504551
@@ -4563,8 +4564,8 @@ def select(self, *args):
45634564
... ),
45644565
... )
45654566
4566-
If just column names are provided, they will select from the first level of the
4567-
``MultiIndex``:
4567+
If column names are provided, they will select from the first level of
4568+
the ``MultiIndex``:
45684569
45694570
>>> df.select("names")
45704571
names
@@ -4573,7 +4574,7 @@ def select(self, *args):
45734574
1 Alice Cooper
45744575
2 Bob Marley
45754576
4576-
To select from multiple or all levels, tuples can be provided:
4577+
To select from multiple or all levels, tuples can be used:
45774578
45784579
>>> df.select(("names", "last_name"), ("other", "age"))
45794580
names other
@@ -4583,11 +4584,16 @@ def select(self, *args):
45834584
2 Marley 35
45844585
"""
45854586
if args and isinstance(args[0], list):
4586-
raise ValueError(
4587-
"`DataFrame.select` does not support a list. Please use "
4588-
"`df.select('col1', 'col2',...)` or `df.select(*['col1', 'col2',...])` "
4589-
"instead"
4590-
)
4587+
if len(args) == 1:
4588+
args = args[0]
4589+
else:
4590+
raise ValueError(
4591+
"`DataFrame.select` supports individual columns "
4592+
"`df.select('col1', 'col2',...)` or a list "
4593+
"`df.select(['col1', 'col2',...])`, but not both. "
4594+
"You can unpack the list if you have a mix: "
4595+
"`df.select(*['col1', 'col2'], 'col3')`."
4596+
)
45914597

45924598
indexer = self.columns._get_indexer_strict(list(args), "columns")[1]
45934599
return self.take(indexer, axis=1)

pandas/tests/frame/methods/test_select.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,14 @@ def test_select_duplicated(self, regular_df):
4444
result = regular_df.select("a", "d", "a")
4545
assert result.columns.tolist() == expected
4646

47-
def test_select_list(self, regular_df):
48-
with pytest.raises(ValueError, match="does not support a list"):
49-
regular_df.select(["a", "b"])
47+
def test_select_single_list(self, regular_df):
48+
expected = DataFrame({"a": [1, 2], "c": [5, 6]})
49+
result = regular_df.select(["a", "c"])
50+
tm.assert_frame_equal(result, expected)
51+
52+
def test_select_list_and_string(self, regular_df):
53+
with pytest.raises(ValueError, match="supports individual columns"):
54+
regular_df.select(["a", "c"], "b")
5055

5156
def test_select_missing(self, regular_df):
5257
with pytest.raises(KeyError, match=r"None of .* are in the \[columns\]"):
@@ -80,6 +85,14 @@ def test_select_multiindex_multiple_columns(self, multiindex_df):
8085
result = multiindex_df.select(("A", "c"), ("B", "e"))
8186
tm.assert_frame_equal(result, expected)
8287

88+
def test_select_multiindex_multiple_columns_as_list(self, multiindex_df):
89+
expected = DataFrame(
90+
[(0, 4), (1, 5)],
91+
columns=pd.MultiIndex.from_tuples([("A", "c"), ("B", "e")]),
92+
)
93+
result = multiindex_df.select([("A", "c"), ("B", "e")])
94+
tm.assert_frame_equal(result, expected)
95+
8396
def test_select_multiindex_missing(self, multiindex_df):
8497
with pytest.raises(KeyError, match="not in index"):
8598
multiindex_df.select("Z")

0 commit comments

Comments
 (0)