Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change default shard size to 1GB #357

Merged
merged 3 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions benchmarks/exp_shard.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
from model_signing.signing import in_toto


KB: Final[int] = 1024
MB: Final[int] = 1024 * KB
GB: Final[int] = 1024 * MB
KB: Final[int] = 1000
MB: Final[int] = 1000 * KB
GB: Final[int] = 1000 * MB


def build_parser() -> argparse.ArgumentParser:
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ def build_parser() -> argparse.ArgumentParser:
)
param_groups.add_argument(
"--shard",
help="shard size (default: 1000000)",
help="shard size (default: 1000000000)",
type=int,
default=1000000,
default=1_000_000_000,
)

shard_group = parser.add_argument_group("Serialization modes")
Expand Down
12 changes: 6 additions & 6 deletions src/model_signing/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self

Check warning on line 42 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following line was not covered in your tests: 42,


def hash(model_path: os.PathLike) -> manifest.Manifest:
Expand All @@ -60,7 +60,7 @@
Returns:
A manifest of the hashed model.
"""
return HashingConfig().hash(model_path)

Check warning on line 63 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following line was not covered in your tests: 63,


def sign(model_path: os.PathLike, signature_path: os.PathLike):
Expand All @@ -70,7 +70,7 @@
model_path: the path to the model to sign.
signature_path: the path of the resulting signature.
"""
SigningConfig().sign(model_path, signature_path)

Check warning on line 73 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following line was not covered in your tests: 73,


def verify(
Expand All @@ -95,7 +95,7 @@
use_staging: Use staging configurations, instead of production. This
is supposed to be set to True only when testing. Default is False.
"""
VerificationConfig().set_sigstore_dsse_verifier(

Check warning on line 98 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following line was not covered in your tests: 98,
identity=identity, oidc_issuer=oidc_issuer, use_staging=use_staging
).verify(model_path, signature_path)

Expand Down Expand Up @@ -127,14 +127,14 @@
paired with their hashes. By default, no file is ignored and any
symbolic link in the model directory results in an error.
"""
self._ignored_paths = frozenset()
self._serializer = serialize_by_file.ManifestSerializer(

Check warning on line 131 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following lines were not covered in your tests: 130 to 131,
self._build_file_hasher_factory(), allow_symlinks=False
)

def hash(self, model_path: os.PathLike) -> manifest.Manifest:
"""Hashes a model using the current configuration."""
return self._serializer.serialize(

Check warning on line 137 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following line was not covered in your tests: 137,
pathlib.Path(model_path), ignore_paths=self._ignored_paths
)

Expand All @@ -150,12 +150,12 @@
An instance of the requested hasher.
"""
# TODO: Once Python 3.9 support is deprecated revert to using `match`
if hashing_algorithm == "sha256":
return memory.SHA256()
if hashing_algorithm == "blake2":
return memory.BLAKE2()

raise ValueError(f"Unsupported hashing method {hashing_algorithm}")

Check warning on line 158 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following lines were not covered in your tests: 153 to 158,

def _build_file_hasher_factory(
self,
Expand All @@ -175,17 +175,17 @@
method.
"""

def factory(path: pathlib.Path) -> file.SimpleFileHasher:
hasher = self._build_stream_hasher(hashing_algorithm)
return file.SimpleFileHasher(path, hasher, chunk_size=chunk_size)

return factory

Check warning on line 182 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following lines were not covered in your tests: 178 to 182,

def _build_sharded_file_hasher_factory(
self,
hashing_algorithm: Literal["sha256", "blake2"] = "sha256",
chunk_size: int = 1048576,
shard_size: int = 1000000,
shard_size: int = 1_000_000_000,
) -> Callable[[pathlib.Path, int, int], file.ShardedFileHasher]:
"""Builds the hasher factory for a serialization by file shards.

Expand All @@ -194,27 +194,27 @@
chunk_size: The amount of file to read at once. Default is 1MB. A
special value of 0 signals to attempt to read everything in a
single call.
shard_size: The size of a file shard. Default is 1,000,000 bytes.
shard_size: The size of a file shard. Default is 1 GB.

Returns:
The hasher factory that should be used by the active serialization
method.
"""
algorithm = self._build_stream_hasher(hashing_algorithm)

def factory(
path: pathlib.Path, start: int, end: int
) -> file.ShardedFileHasher:
return file.ShardedFileHasher(
path,
algorithm,
start=start,
end=end,
chunk_size=chunk_size,
shard_size=shard_size,
)

return factory

Check warning on line 217 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following lines were not covered in your tests: 203 to 217,

def set_serialize_by_file_to_manifest(
self,
Expand Down Expand Up @@ -244,12 +244,12 @@
Returns:
The new hashing configuration with the new serialization method.
"""
self._serializer = serialize_by_file.ManifestSerializer(
self._build_file_hasher_factory(hashing_algorithm, chunk_size),
max_workers=max_workers,
allow_symlinks=allow_symlinks,
)
return self

Check warning on line 252 in src/model_signing/api.py

View workflow job for this annotation

GitHub Actions / Signing with Python 3.11 on Linux

The following lines were not covered in your tests: 247 to 252,

def set_serialize_by_file_to_digest(
self,
Expand Down Expand Up @@ -299,7 +299,7 @@
*,
hashing_algorithm: Literal["sha256", "blake2"] = "sha256",
chunk_size: int = 1048576,
shard_size: int = 1000000,
shard_size: int = 1_000_000_000,
max_workers: Optional[int] = None,
allow_symlinks: bool = False,
) -> Self:
Expand All @@ -315,7 +315,7 @@
chunk_size: The amount of file to read at once. Default is 1MB. A
special value of 0 signals to attempt to read everything in a
single call.
shard_size: The size of a file shard. Default is 1,000,000 bytes.
shard_size: The size of a file shard. Default is 1 GB.
max_workers: Maximum number of workers to use in parallel. Default
is to defer to the `concurrent.futures` library.
allow_symlinks: Controls whether symbolic links are included. If a
Expand All @@ -340,7 +340,7 @@
hashing_algorithm: Literal["sha256", "blake2"] = "sha256",
merge_algorithm: Literal["sha256", "blake2"] = "sha256",
chunk_size: int = 1048576,
shard_size: int = 1000000,
shard_size: int = 1_000_000_000,
max_workers: Optional[int] = None,
allow_symlinks: bool = False,
) -> Self:
Expand All @@ -357,7 +357,7 @@
chunk_size: The amount of file to read at once. Default is 1MB. A
special value of 0 signals to attempt to read everything in a
single call.
shard_size: The size of a file shard. Default is 1,000,000 bytes.
shard_size: The size of a file shard. Default is 1 GB.
max_workers: Maximum number of workers to use in parallel. Default
is to defer to the `concurrent.futures` library.
allow_symlinks: Controls whether symbolic links are included. If a
Expand Down
4 changes: 2 additions & 2 deletions src/model_signing/hashing/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def __init__(
start: int,
end: int,
chunk_size: int = 1048576,
shard_size: int = 1000000,
shard_size: int = 1_000_000_000,
digest_name_override: Optional[str] = None,
):
"""Initializes an instance to hash a file with a specific `HashEngine`.
Expand All @@ -253,7 +253,7 @@ def __init__(
chunk_size: The amount of file to read at once. Default is 1MB. A
special value of 0 signals to attempt to read everything in a
single call.
shard_size: The size of a file shard. Default is 1,000,000 bytes.
shard_size: The size of a file shard. Default is 1 GB.
digest_name_override: Optional string to allow overriding the
`digest_name` property to support shorter, standardized names.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@
"shards": [
{
"digest": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d0/d1/d2/d3/d4/f0:0:16"
},
{
"digest": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d0/d1/d2/d3/d4/f1:0:16"
},
{
"digest": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d0/d1/d2/d3/d4/f2:0:16"
},
{
"digest": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d0/d1/d2/d3/d4/f3:0:16"
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"shards": [
{
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": ".:0:22"
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,52 +13,52 @@
"shards": [
{
"digest": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d0/f00:0:23"
},
{
"digest": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d0/f01:0:23"
},
{
"digest": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d0/f02:0:23"
},
{
"digest": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d1/f10:0:23"
},
{
"digest": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d1/f11:0:23"
},
{
"digest": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "d1/f12:0:23"
},
{
"digest": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "f0:0:24"
},
{
"digest": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "f1:0:24"
},
{
"digest": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "f2:0:24"
},
{
"digest": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "f3:0:24"
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"shards": [
{
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
"algorithm": "file-sha256-1000000",
"algorithm": "file-sha256-1000000000",
"name": "symlink_file:0:22"
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"sha256": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -16,7 +16,7 @@
"sha256": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -25,7 +25,7 @@
"sha256": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -34,7 +34,7 @@
"sha256": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"sha256": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -16,7 +16,7 @@
"sha256": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -25,7 +25,7 @@
"sha256": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -34,7 +34,7 @@
"sha256": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -43,7 +43,7 @@
"sha256": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -52,7 +52,7 @@
"sha256": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -61,7 +61,7 @@
"sha256": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -70,7 +70,7 @@
"sha256": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -79,7 +79,7 @@
"sha256": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
},
{
Expand All @@ -88,7 +88,7 @@
"sha256": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b"
},
"annotations": {
"actual_hash_algorithm": "file-sha256-1000000"
"actual_hash_algorithm": "file-sha256-1000000000"
}
}
],
Expand Down