Skip to content

feat: starting serialization #997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jun 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,10 @@ source_group(
option(BOOST_HISTOGRAM_ERRORS "Make warnings errors (for CI mostly)")

# Adding warnings
# Boost.Histogram doesn't pass sign -Wsign-conversion
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
target_compile_options(
_core
PRIVATE -Wall
-Wextra
-pedantic-errors
-Wconversion
-Wsign-conversion
-Wsign-compare
-Wno-unused-value)
target_compile_options(_core PRIVATE -Wall -Wextra -pedantic-errors -Wconversion -Wsign-compare
-Wno-unused-value)
if(BOOST_HISTOGRAM_ERRORS)
target_compile_options(_core PRIVATE -Werror)
endif()
Expand Down
6 changes: 3 additions & 3 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ def tests(session: nox.Session) -> None:
opts = (
["--reinstall-package=boost-histogram"] if session.venv_backend == "uv" else []
)
args = session.posargs or ["-n", "auto"]
args = session.posargs or ["-n", "auto", "--benchmark-disable"]
pyproject = nox.project.load_toml("pyproject.toml")
session.install(*nox.project.dependency_groups(pyproject, "test"))
session.install("-v", ".", *opts, silent=False)
session.install("-v", "-e.", *opts, silent=False)
session.run("pytest", *args)


Expand Down Expand Up @@ -114,7 +114,7 @@ def pylint(session: nox.Session) -> None:
"""

session.install("pylint==3.3.*")
session.install("-e.")
session.install(".")
session.run("pylint", "boost_histogram", *session.posargs)


Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ environment.MACOSX_DEPLOYMENT_TARGET = "14.0"
[tool.pylint]
py-version = "3.9"
ignore-patterns = ['.*\.pyi']
ignore = "version.py"
extension-pkg-allow-list = ["boost_histogram._core"]
reports.output-format = "colorized"
similarities.ignore-imports = "yes"
Expand Down
14 changes: 14 additions & 0 deletions src/boost_histogram/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import boost_histogram
from boost_histogram import _core

from . import serialization
from ._compat.typing import Self
from ._utils import cast, register
from .axis import AxesTuple, Axis, Variable
Expand Down Expand Up @@ -408,6 +409,19 @@ def _generate_axes_(self) -> AxesTuple:

return AxesTuple(self._axis(i) for i in range(self.ndim))

def _to_uhi_(self) -> dict[str, Any]:
"""
Convert to a UHI histogram.
"""
return serialization.to_uhi(self)

@classmethod
def _from_uhi_(cls, inp: dict[str, Any], /) -> Self:
"""
Convert from a UHI histogram.
"""
return cls(serialization.from_uhi(inp))

@property
def ndim(self) -> int:
"""
Expand Down
41 changes: 41 additions & 0 deletions src/boost_histogram/serialization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import annotations

from typing import Any

# pylint: disable-next=import-error
from .. import histogram, version
from ._axis import _axis_from_dict, _axis_to_dict
from ._storage import _data_from_dict, _storage_from_dict, _storage_to_dict

__all__ = ["from_uhi", "to_uhi"]


def __dir__() -> list[str]:
return __all__


def to_uhi(h: histogram.Histogram, /) -> dict[str, Any]:
"""Convert an Histogram to a dictionary."""

# Convert the histogram to a dictionary
data = {
"writer_info": {"boost-histogram": {"version": version.version}},
"axes": [_axis_to_dict(axis) for axis in h.axes],
"storage": _storage_to_dict(h.storage_type(), h.view(flow=True)),
}
if h.metadata is not None:
data["metadata"] = h.metadata

return data


def from_uhi(data: dict[str, Any], /) -> histogram.Histogram:
"""Convert a dictionary to an Histogram."""

h = histogram.Histogram(
*(_axis_from_dict(ax) for ax in data["axes"]),
storage=_storage_from_dict(data["storage"]),
metadata=data.get("metadata"),
)
h[...] = _data_from_dict(data["storage"])
return h
144 changes: 144 additions & 0 deletions src/boost_histogram/serialization/_axis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from __future__ import annotations

import functools
from typing import Any

from .. import axis

__all__ = ["_axis_from_dict", "_axis_to_dict"]


def __dir__() -> list[str]:
return __all__


@functools.singledispatch
def _axis_to_dict(ax: Any, /) -> dict[str, Any]:
"""Convert an axis to a dictionary."""
raise TypeError(f"Unsupported axis type: {type(ax)}")


@_axis_to_dict.register(axis.Regular)
@_axis_to_dict.register(axis.Integer)
def _(ax: axis.Regular | axis.Integer, /) -> dict[str, Any]:
"""Convert a Regular axis to a dictionary."""

# Special handling if the axis has a transform
if isinstance(ax, axis.Regular) and ax.transform is not None:
data = {
"type": "variable",
"edges": ax.edges,
"underflow": ax.traits.underflow,
"overflow": ax.traits.overflow,
"circular": ax.traits.circular,
}
else:
data = {
"type": "regular",
"lower": ax.edges[0],
"upper": ax.edges[-1],
"bins": ax.size,
"underflow": ax.traits.underflow,
"overflow": ax.traits.overflow,
"circular": ax.traits.circular,
}
if isinstance(ax, axis.Integer):
data["writer_info"] = {"boost-histogram": {"orig_type": "Integer"}}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


@_axis_to_dict.register
def _(ax: axis.Variable, /) -> dict[str, Any]:
"""Convert a Variable or Integer axis to a dictionary."""
data = {
"type": "variable",
"edges": ax.edges,
"underflow": ax.traits.underflow,
"overflow": ax.traits.overflow,
"circular": ax.traits.circular,
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


@_axis_to_dict.register
def _(ax: axis.IntCategory, /) -> dict[str, Any]:
"""Convert an IntCategory axis to a dictionary."""
data = {
"type": "category_int",
"categories": list(ax),
"flow": ax.traits.overflow,
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


@_axis_to_dict.register
def _(ax: axis.StrCategory, /) -> dict[str, Any]:
"""Convert a StrCategory axis to a dictionary."""
data = {
"type": "category_str",
"categories": list(ax),
"flow": ax.traits.overflow,
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


@_axis_to_dict.register
def _(ax: axis.Boolean, /) -> dict[str, Any]:
"""Convert a Boolean axis to a dictionary."""
data = {
"type": "boolean",
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


def _axis_from_dict(data: dict[str, Any], /) -> axis.Axis:
hist_type = data["type"]
if hist_type == "regular":
return axis.Regular(
data["bins"],
data["lower"],
data["upper"],
underflow=data["underflow"],
overflow=data["overflow"],
circular=data["circular"],
metadata=data.get("metadata"),
)
if hist_type == "variable":
return axis.Variable(
data["edges"],
underflow=data["underflow"],
overflow=data["overflow"],
circular=data["circular"],
metadata=data.get("metadata"),
)
if hist_type == "category_int":
return axis.IntCategory(
data["categories"],
overflow=data["flow"],
metadata=data.get("metadata"),
)
if hist_type == "category_str":
return axis.StrCategory(
data["categories"],
overflow=data["flow"],
metadata=data.get("metadata"),
)
if hist_type == "boolean":
return axis.Boolean(metadata=data.get("metadata"))

raise TypeError(f"Unsupported axis type: {hist_type}")
110 changes: 110 additions & 0 deletions src/boost_histogram/serialization/_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from __future__ import annotations

import functools
from typing import Any

import numpy as np

from .. import storage

__all__ = ["_data_from_dict", "_storage_from_dict", "_storage_to_dict"]


def __dir__() -> list[str]:
return __all__


@functools.singledispatch
def _storage_to_dict(_storage: Any, /, data: Any) -> dict[str, Any]: # noqa: ARG001
"""Convert a storage to a dictionary."""
msg = f"Unsupported storage type: {_storage}"
raise TypeError(msg)


@_storage_to_dict.register(storage.AtomicInt64)
@_storage_to_dict.register(storage.Double)
@_storage_to_dict.register(storage.Int64)
@_storage_to_dict.register(storage.Unlimited)
def _(
storage_: storage.AtomicInt64 | storage.Double | storage.Int64 | storage.Unlimited,
/,
data: Any,
) -> dict[str, Any]:
return {
"writer_info": {"boost-histogram": {"orig_type": type(storage_).__name__}},
"type": "int" if np.issubdtype(data.dtype, np.integer) else "double",
"values": data,
}


@_storage_to_dict.register(storage.Weight)
def _(_storage: storage.Weight, /, data: Any) -> dict[str, Any]:
return {
"type": "weighted",
"values": data.value,
"variances": data.variance,
}


@_storage_to_dict.register(storage.Mean)
def _(_storage: storage.Mean, /, data: Any) -> dict[str, Any]:
return {
"type": "mean",
"counts": data.count,
"values": data.value,
"variances": data.variance,
}


@_storage_to_dict.register(storage.WeightedMean)
def _(_storage: storage.WeightedMean, /, data: Any) -> dict[str, Any]:
return {
"type": "weighted_mean",
"sum_of_weights": data.sum_of_weights,
"sum_of_weights_squared": data.sum_of_weights_squared,
"values": data.value,
"variances": data.variance,
}


def _storage_from_dict(data: dict[str, Any], /) -> storage.Storage:
"""Convert a dictionary to a storage object."""
storage_type = data["type"]

if storage_type == "int":
return storage.Int64()
if storage_type == "double":
return storage.Double()
if storage_type == "weighted":
return storage.Weight()
if storage_type == "mean":
return storage.Mean()
if storage_type == "weighted_mean":
return storage.WeightedMean()

raise TypeError(f"Unsupported storage type: {storage_type}")


def _data_from_dict(data: dict[str, Any], /) -> np.typing.NDArray[Any]:
"""Convert a dictionary to data."""
storage_type = data["type"]

if storage_type in {"int", "double"}:
return data["values"]
if storage_type == "weighted":
return np.stack([data["values"], data["variances"]]).T
if storage_type == "mean":
return np.stack(
[data["counts"], data["values"], data["variances"]],
).T
if storage_type == "weighted_mean":
return np.stack(
[
data["sum_of_weights"],
data["sum_of_weights_squared"],
data["values"],
data["variances"],
],
).T

raise TypeError(f"Unsupported storage type: {storage_type}")
Loading