@@ -4489,9 +4489,11 @@ def select(self, *args):
4489
4489
4490
4490
Parameters
4491
4491
----------
4492
- *args : hashable or tuple of hashable
4493
- The names or the columns to return. In general this will be strings,
4492
+ *args : hashable or a single list arg of hashable
4493
+ The names of the columns to return. In general this will be strings,
4494
4494
but pandas supports other types of column names, if they are hashable.
4495
+ If only one argument of type list is provided, the elements of the
4496
+ list will be considered the named of the columns to be returned
4495
4497
4496
4498
Returns
4497
4499
-------
@@ -4520,9 +4522,17 @@ def select(self, *args):
4520
4522
1 Alice 22
4521
4523
2 Bob 35
4522
4524
4525
+ A list can also be used to specify the names of the columns to return:
4526
+
4527
+ >>> df.select(["last_name", "age"])
4528
+ last_name age
4529
+ 0 Smith 61
4530
+ 1 Cooper 22
4531
+ 2 Marley 35
4532
+
4523
4533
Selecting with a pattern can be done with Python expressions:
4524
4534
4525
- >>> df.select(* [col for col in df.columns if col.endswith("_name")])
4535
+ >>> df.select([col for col in df.columns if col.endswith("_name")])
4526
4536
first_name last_name
4527
4537
0 John Smith
4528
4538
1 Alice Cooper
@@ -4536,15 +4546,6 @@ def select(self, *args):
4536
4546
1 Cooper Alice 22
4537
4547
2 Marley Bob 35
4538
4548
4539
- In case the columns are in a list, Python unpacking with star can be used:
4540
-
4541
- >>> columns = ["last_name", "age"]
4542
- >>> df.select(*columns)
4543
- last_name age
4544
- 0 Smith 61
4545
- 1 Cooper 22
4546
- 2 Marley 35
4547
-
4548
4549
Note that a DataFrame is always returned. If a single column is requested, a
4549
4550
DataFrame with a single column is returned, not a Series:
4550
4551
@@ -4563,8 +4564,8 @@ def select(self, *args):
4563
4564
... ),
4564
4565
... )
4565
4566
4566
- If just column names are provided, they will select from the first level of the
4567
- ``MultiIndex``:
4567
+ If column names are provided, they will select from the first level of
4568
+ the ``MultiIndex``:
4568
4569
4569
4570
>>> df.select("names")
4570
4571
names
@@ -4573,7 +4574,7 @@ def select(self, *args):
4573
4574
1 Alice Cooper
4574
4575
2 Bob Marley
4575
4576
4576
- To select from multiple or all levels, tuples can be provided :
4577
+ To select from multiple or all levels, tuples can be used :
4577
4578
4578
4579
>>> df.select(("names", "last_name"), ("other", "age"))
4579
4580
names other
@@ -4583,11 +4584,16 @@ def select(self, *args):
4583
4584
2 Marley 35
4584
4585
"""
4585
4586
if args and isinstance (args [0 ], list ):
4586
- raise ValueError (
4587
- "`DataFrame.select` does not support a list. Please use "
4588
- "`df.select('col1', 'col2',...)` or `df.select(*['col1', 'col2',...])` "
4589
- "instead"
4590
- )
4587
+ if len (args ) == 1 :
4588
+ args = args [0 ]
4589
+ else :
4590
+ raise ValueError (
4591
+ "`DataFrame.select` supports individual columns "
4592
+ "`df.select('col1', 'col2',...)` or a list "
4593
+ "`df.select(['col1', 'col2',...])`, but not both. "
4594
+ "You can unpack the list if you have a mix: "
4595
+ "`df.select(*['col1', 'col2'], 'col3')`."
4596
+ )
4591
4597
4592
4598
indexer = self .columns ._get_indexer_strict (list (args ), "columns" )[1 ]
4593
4599
return self .take (indexer , axis = 1 )
0 commit comments