Skip to content

feat: starting serialization #997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 12 commits into
base: develop
Choose a base branch
from
7 changes: 3 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,13 @@ jobs:
create-symlink: true

- name: Install python tools
run: |
uv pip install --system --python=python --group dev --only-binary numpy pytest-github-actions-annotate-failures
run: uv pip install --system --python=python --group github

- name: Configure
run: cmake --preset default -DCMAKE_BUILD_TYPE=Debug ${{ matrix.cmake-extras }}
run: cmake --preset default ${{ matrix.cmake-extras }}

- name: Build
run: cmake --build --preset default -j 4
run: cmake --build --preset default

- name: Test
run: ctest --preset default -j 4
Expand Down
3 changes: 2 additions & 1 deletion CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"binaryDir": "${sourceDir}/build/default",
"warnings": { "dev": true },
"cacheVariables": {
"BOOST_HISTOGRAM_ERRORS": "ON"
"BOOST_HISTOGRAM_ERRORS": "ON",
"CMAKE_BUILD_TYPE": "Debug"
}
}
],
Expand Down
12 changes: 12 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Discussions = "https://github.com/scikit-hep/boost-histogram/discussions"
Documentation = "https://boost-histogram.readthedocs.io/"
Homepage = "https://github.com/scikit-hep/boost-histogram"


[dependency-groups]
dev = [
{ include-group = "test" },
Expand Down Expand Up @@ -94,6 +95,11 @@ test = [
"pytest-xdist",
"numpy",
]
github = [
{ include-group = "test" },
"setuptools_scm",
"pytest-github-actions-annotate-failures"
]


[tool.scikit-build]
Expand Down Expand Up @@ -124,6 +130,10 @@ __version__ = version = "$version"
[tool.setuptools_scm]


[tool.uv.pip]
only-binary = ["numpy"]


[tool.pytest.ini_options]
minversion = "6.0"
junit_family = "xunit2"
Expand Down Expand Up @@ -214,6 +224,8 @@ messages_control.disable = [
"too-many-statements",
"too-many-positional-arguments",
"wrong-import-position",
"unused-argument", # Covered by ruff
"unsubscriptable-object", # Wrongly triggered
]

[tool.ruff.lint]
Expand Down
Empty file.
142 changes: 142 additions & 0 deletions src/boost_histogram/serialization/_axis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from __future__ import annotations

import functools
from typing import Any

from .. import axis

__all__ = ["_axis_from_dict", "_axis_to_dict"]


def __dir__() -> list[str]:
return __all__


@functools.singledispatch
def _axis_to_dict(ax: Any, /) -> dict[str, Any]:
"""Convert an axis to a dictionary."""
raise TypeError(f"Unsupported axis type: {type(ax)}")


@_axis_to_dict.register(axis.Regular)
@_axis_to_dict.register(axis.Integer)
def _(ax: axis.Regular | axis.Integer, /) -> dict[str, Any]:
"""Convert a Regular axis to a dictionary."""

# Special handling if the axis has a transform
if isinstance(ax, axis.Regular) and ax.transform is not None:
data = {
"type": "variable",
"edges": ax.edges,
"underflow": ax.traits.underflow,
"overflow": ax.traits.overflow,
"circular": ax.traits.circular,
}
else:
data = {
"type": "regular",
"lower": ax.edges[0],
"upper": ax.edges[-1],
"bins": ax.size,
"underflow": ax.traits.underflow,
"overflow": ax.traits.overflow,
"circular": ax.traits.circular,
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


@_axis_to_dict.register
def _(ax: axis.Variable, /) -> dict[str, Any]:
"""Convert a Variable or Integer axis to a dictionary."""
data = {
"type": "variable",
"edges": ax.edges,
"underflow": ax.traits.underflow,
"overflow": ax.traits.overflow,
"circular": ax.traits.circular,
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


@_axis_to_dict.register
def _(ax: axis.IntCategory, /) -> dict[str, Any]:
"""Convert an IntCategory axis to a dictionary."""
data = {
"type": "category_int",
"categories": list(ax),
"flow": ax.traits.overflow,
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


@_axis_to_dict.register
def _(ax: axis.StrCategory, /) -> dict[str, Any]:
"""Convert a StrCategory axis to a dictionary."""
data = {
"type": "category_str",
"categories": list(ax),
"flow": ax.traits.overflow,
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


@_axis_to_dict.register
def _(ax: axis.Boolean, /) -> dict[str, Any]:
"""Convert a Boolean axis to a dictionary."""
data = {
"type": "boolean",
}
if ax.metadata is not None:
data["metadata"] = ax.metadata

return data


def _axis_from_dict(data: dict[str, Any], /) -> axis.Axis:
hist_type = data["type"]
if hist_type == "regular":
return axis.Regular(
data["bins"],
data["lower"],
data["upper"],
underflow=data["underflow"],
overflow=data["overflow"],
circular=data["circular"],
metadata=data.get("metadata"),
)
if hist_type == "variable":
return axis.Variable(
data["edges"],
underflow=data["underflow"],
overflow=data["overflow"],
circular=data["circular"],
metadata=data.get("metadata"),
)
if hist_type == "category_int":
return axis.IntCategory(
data["categories"],
overflow=data["flow"],
metadata=data.get("metadata"),
)
if hist_type == "category_str":
return axis.StrCategory(
data["categories"],
overflow=data["flow"],
metadata=data.get("metadata"),
)
if hist_type == "boolean":
return axis.Boolean(metadata=data.get("metadata"))

raise TypeError(f"Unsupported axis type: {hist_type}")
109 changes: 109 additions & 0 deletions src/boost_histogram/serialization/_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from __future__ import annotations

import functools
from typing import Any

import numpy as np

from .. import storage

__all__ = ["_data_from_dict", "_storage_from_dict", "_storage_to_dict"]


def __dir__() -> list[str]:
return __all__


@functools.singledispatch
def _storage_to_dict(_storage: Any, /, data: Any) -> dict[str, Any]: # noqa: ARG001
"""Convert a storage to a dictionary."""
msg = f"Unsupported storage type: {_storage}"
raise TypeError(msg)


@_storage_to_dict.register(storage.AtomicInt64)
@_storage_to_dict.register(storage.Double)
@_storage_to_dict.register(storage.Int64)
@_storage_to_dict.register(storage.Unlimited)
def _(
_storage: storage.AtomicInt64 | storage.Double | storage.Int64 | storage.Unlimited,
/,
data: Any,
) -> dict[str, Any]:
return {
"type": "int" if np.issubdtype(data.dtype, np.integer) else "double",
"values": data,
}


@_storage_to_dict.register(storage.Weight)
def _(_storage: storage.Weight, /, data: Any) -> dict[str, Any]:
return {
"type": "weighted",
"values": data.value,
"variances": data.variance,
}


@_storage_to_dict.register(storage.Mean)
def _(_storage: storage.Mean, /, data: Any) -> dict[str, Any]:
return {
"type": "mean",
"counts": data.count,
"values": data.value,
"variances": data.variance,
}


@_storage_to_dict.register(storage.WeightedMean)
def _(_storage: storage.WeightedMean, /, data: Any) -> dict[str, Any]:
return {
"type": "weighted_mean",
"sum_of_weights": data.sum_of_weights,
"sum_of_weights_squared": data.sum_of_weights_squared,
"values": data.value,
"variances": data.variance,
}


def _storage_from_dict(data: dict[str, Any], /) -> storage.Storage:
"""Convert a dictionary to a storage object."""
storage_type = data["type"]

if storage_type == "int":
return storage.Int64()
if storage_type == "double":
return storage.Double()
if storage_type == "weighted":
return storage.Weight()
if storage_type == "mean":
return storage.Mean()
if storage_type == "weighted_mean":
return storage.WeightedMean()

raise TypeError(f"Unsupported storage type: {storage_type}")


def _data_from_dict(data: dict[str, Any], /) -> np.typing.NDArray[Any]:
"""Convert a dictionary to data."""
storage_type = data["type"]

if storage_type in {"int", "double"}:
return data["values"]
if storage_type == "weighted":
return np.stack([data["values"], data["variances"]]).T
if storage_type == "mean":
return np.stack(
[data["counts"], data["values"], data["variances"]],
).T
if storage_type == "weighted_mean":
return np.stack(
[
data["sum_of_weights"],
data["sum_of_weights_squared"],
data["values"],
data["variances"],
],
).T

raise TypeError(f"Unsupported storage type: {storage_type}")
39 changes: 39 additions & 0 deletions src/boost_histogram/serialization/generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from typing import Any

from .. import Histogram
from ._axis import _axis_from_dict, _axis_to_dict
from ._storage import _data_from_dict, _storage_from_dict, _storage_to_dict

__all__ = ["from_dict", "to_dict"]


def __dir__() -> list[str]:
return __all__


def to_dict(h: Histogram, /) -> dict[str, Any]:
"""Convert an Histogram to a dictionary."""

# Convert the histogram to a dictionary
data = {
"axes": [_axis_to_dict(axis) for axis in h.axes],
"storage": _storage_to_dict(h.storage_type(), h.view(flow=True)),
}
if h.metadata is not None:
data["metadata"] = h.metadata

return data


def from_dict(data: dict[str, Any], /) -> Histogram:
"""Convert a dictionary to an Histogram."""

h = Histogram(
*(_axis_from_dict(ax) for ax in data["axes"]),
storage=_storage_from_dict(data["storage"]),
metadata=data.get("metadata"),
)
h[...] = _data_from_dict(data["storage"])
return h
Loading
Loading