Skip to content

feature #49580: support new-style float_format string in to_csv #61650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,25 @@ def time_frame(self, kind):
self.df.to_csv(self.fname)


class ToCSVFloatFormatVariants(BaseIO):
fname = "__test__.csv"

def setup(self):
self.df = DataFrame(np.random.default_rng(seed=42).random((1000, 1000)))

def time_old_style_percent_format(self):
self.df.to_csv(self.fname, float_format="%.6f")

def time_new_style_brace_format(self):
self.df.to_csv(self.fname, float_format="{:.6f}")

def time_new_style_thousands_format(self):
self.df.to_csv(self.fname, float_format="{:,.2f}")

def time_callable_format(self):
self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}")


class ToCSVMultiIndexUnusedLevels(BaseIO):
fname = "__test__.csv"

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Other enhancements
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
Expand Down
25 changes: 24 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def __init__(
self.na_rep = na_rep
self.formatters = self._initialize_formatters(formatters)
self.justify = self._initialize_justify(justify)
self.float_format = float_format
self.float_format = self._validate_float_format(float_format)
self.sparsify = self._initialize_sparsify(sparsify)
self.show_index_names = index_names
self.decimal = decimal
Expand Down Expand Up @@ -849,6 +849,29 @@ def _get_column_name_list(self) -> list[Hashable]:
names.append("" if columns.name is None else columns.name)
return names

def _validate_float_format(
self, fmt: FloatFormatType | None
) -> FloatFormatType | None:
"""
Validates and processes the float_format argument.
Converts new-style format strings to callables.
"""
if fmt is None or callable(fmt):
return fmt

if isinstance(fmt, str):
if "%" in fmt:
# Keeps old-style format strings as they are (C code handles them)
return fmt
else:
try:
_ = fmt.format(1.0) # Test with an arbitrary float
return fmt.format
except (ValueError, KeyError, IndexError) as e:
raise ValueError(f"Invalid new-style format string {fmt!r}") from e

raise ValueError("float_format must be a string or callable")


class DataFrameRenderer:
"""Class for creating dataframe output in multiple formats.
Expand Down
137 changes: 137 additions & 0 deletions pandas/tests/io/formats/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,3 +741,140 @@ def test_to_csv_iterative_compression_buffer(compression):
pd.read_csv(buffer, compression=compression, index_col=0), df
)
assert not buffer.closed


def test_new_style_float_format_basic():
df = DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A\n0,1234.57\n1,9876.54\n"
assert result == expected


def test_new_style_float_format_thousands():
df = DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="{:,.2f}", lineterminator="\n")
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
assert result == expected


def test_new_style_scientific_format():
df = DataFrame({"A": [0.000123, 0.000456]})
result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
assert result == expected


def test_new_style_with_nan():
df = DataFrame({"A": [1.23, np.nan, 4.56]})
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
assert result == expected


def test_new_style_with_mixed_types():
df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
assert result == expected


def test_new_style_with_mixed_types_in_column():
df = DataFrame({"A": [1.23, "text", 4.56]})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A\n0,1.23\n1,text\n2,4.56\n"
assert result == expected


def test_invalid_new_style_format_missing_brace():
df = DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
df.to_csv(float_format="{:.2f")


def test_invalid_new_style_format_specifier():
df = DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
df.to_csv(float_format="{:.2z}")


def test_old_style_format_compatibility():
df = DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format="%.2f", lineterminator="\n")
expected = ",A\n0,1234.57\n1,9876.54\n"
assert result == expected


def test_callable_float_format_compatibility():
df = DataFrame({"A": [1234.56789, 9876.54321]})
result = df.to_csv(float_format=lambda x: f"{x:,.2f}", lineterminator="\n")
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
assert result == expected


def test_no_float_format():
df = DataFrame({"A": [1.23, 4.56]})
result = df.to_csv(float_format=None, lineterminator="\n")
expected = ",A\n0,1.23\n1,4.56\n"
assert result == expected


def test_large_numbers():
df = DataFrame({"A": [1e308, 2e308]})
result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
expected = ",A\n0,1.00e+308\n1,inf\n"
assert result == expected


def test_zero_and_negative():
df = DataFrame({"A": [0.0, -1.23456]})
result = df.to_csv(float_format="{:+.2f}", lineterminator="\n")
expected = ",A\n0,+0.00\n1,-1.23\n"
assert result == expected


def test_unicode_format():
df = DataFrame({"A": [1.23, 4.56]})
result = df.to_csv(float_format="{:.2f}€", encoding="utf-8", lineterminator="\n")
expected = ",A\n0,1.23€\n1,4.56€\n"
assert result == expected


def test_empty_dataframe():
df = DataFrame({"A": []})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A\n"
assert result == expected


def test_multi_column_float():
df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
assert result == expected


def test_invalid_float_format_type():
df = DataFrame({"A": [1.23]})
with pytest.raises(ValueError, match="float_format must be a string or callable"):
df.to_csv(float_format=123)


def test_new_style_with_inf():
df = DataFrame({"A": [1.23, np.inf, -np.inf]})
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
assert result == expected


def test_new_style_with_precision_edge():
df = DataFrame({"A": [1.23456789]})
result = df.to_csv(float_format="{:.10f}", lineterminator="\n")
expected = ",A\n0,1.2345678900\n"
assert result == expected


def test_new_style_with_template():
df = DataFrame({"A": [1234.56789]})
result = df.to_csv(float_format="Value: {:,.2f}", lineterminator="\n")
expected = ',A\n0,"Value: 1,234.57"\n'
assert result == expected
Loading