From fc19342b2ee669a8d8ca6739e7eda8168fb79ddc Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Sat, 20 Jul 2024 16:46:16 -0700 Subject: [PATCH] Refactor sharded serialization to remove code duplication Similar to https://github.com/sigstore/model-transparency/pull/241, there is a duplication in the directory traversal between serializing to a digest and serializing to a manifest. This time, both supported parallelism, so there is really no need for the duplication. We make an abstract `ShardedFilesSerializer` class to contain the logic for the directory traversal and then create the better named `DigestSerializer` and `ManifestSerializer` for the two serializing classes. This time, instead of trying extremely hard to match the old behavior for digest serialization, we just update the goldens. We still had to update some other tests: since the hashes are computed only for files, we no longer differentiate between a model with an empty directory and a model where that empty directory is completely removed. This is a corner case and it is ok to do this. In fact, ignoring empty directories is part of the optimization hinted at in https://github.com/sigstore/model-transparency/issues/197. Signed-off-by: Mihai Maruseac --- .../serialization/serialize_by_file_shard.py | 266 +++++++----------- .../serialize_by_file_shard_test.py | 98 +++---- .../TestDigestSerializer/deep_model_folder | 1 + .../deep_model_folder_small_shards | 1 + .../empty_model_file} | 0 .../empty_model_file_small_shards} | 0 .../TestDigestSerializer/empty_model_folder | 1 + .../empty_model_folder_small_shards | 1 + .../model_folder_with_empty_file | 1 + .../model_folder_with_empty_file_small_shards | 1 + .../TestDigestSerializer/sample_model_file | 1 + .../sample_model_file_small_shards | 1 + .../TestDigestSerializer/sample_model_folder | 1 + .../sample_model_folder_small_shards | 1 + .../deep_model_folder | 0 .../deep_model_folder_small_shards | 0 .../empty_model_file | 0 .../empty_model_file_small_shards | 0 .../empty_model_folder | 0 .../empty_model_folder_small_shards | 0 .../model_folder_with_empty_file | 0 .../model_folder_with_empty_file_small_shards | 0 .../sample_model_file | 0 .../sample_model_file_small_shards | 0 .../sample_model_folder | 0 .../sample_model_folder_small_shards | 0 .../deep_model_folder | 1 - .../deep_model_folder_small_shards | 1 - .../TestShardedDFSSerializer/empty_model_file | 1 - .../empty_model_file_small_shards | 1 - .../model_folder_with_empty_file | 1 - .../model_folder_with_empty_file_small_shards | 1 - .../sample_model_file | 1 - .../sample_model_file_small_shards | 1 - .../sample_model_folder | 1 - .../sample_model_folder_small_shards | 1 - 36 files changed, 155 insertions(+), 229 deletions(-) create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/deep_model_folder create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/deep_model_folder_small_shards rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedDFSSerializer/empty_model_folder => TestDigestSerializer/empty_model_file} (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedDFSSerializer/empty_model_folder_small_shards => TestDigestSerializer/empty_model_file_small_shards} (100%) create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_folder create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_folder_small_shards create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/model_folder_with_empty_file create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/model_folder_with_empty_file_small_shards create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_file create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_file_small_shards create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_folder create mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_folder_small_shards rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/deep_model_folder (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/deep_model_folder_small_shards (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/empty_model_file (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/empty_model_file_small_shards (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/empty_model_folder (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/empty_model_folder_small_shards (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/model_folder_with_empty_file (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/model_folder_with_empty_file_small_shards (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/sample_model_file (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/sample_model_file_small_shards (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/sample_model_folder (100%) rename model_signing/serialization/testdata/serialize_by_file_shard/{TestShardedFilesSerializer => TestManifestSerializer}/sample_model_folder_small_shards (100%) delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder_small_shards delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file_small_shards delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file_small_shards delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file_small_shards delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder delete mode 100644 model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder_small_shards diff --git a/model_signing/serialization/serialize_by_file_shard.py b/model_signing/serialization/serialize_by_file_shard.py index 59aa07e8..c16c1fdd 100644 --- a/model_signing/serialization/serialize_by_file_shard.py +++ b/model_signing/serialization/serialize_by_file_shard.py @@ -14,10 +14,11 @@ """Model serializers that operated at file shard level of granularity.""" +import abc import base64 import concurrent.futures import pathlib -from typing import Callable, Iterable, TypeAlias +from typing import Callable, Iterable, cast from typing_extensions import override from model_signing.hashing import file @@ -27,21 +28,16 @@ from model_signing.serialization import serialize_by_file -_ShardSignTask: TypeAlias = tuple[pathlib.PurePath, str, int, int] - - def _build_header( *, - entry_name: str, - entry_type: str, + name: str, start: int, end: int, ) -> bytes: - """Builds a header to encode a path with given name and type. + """Builds a header to encode a path with given name and shard range. Args: entry_name: The name of the entry to build the header for. - entry_type: The type of the entry (file or directory). start: Offset for the start of the path shard. end: Offset for the end of the path shard. @@ -50,14 +46,11 @@ def _build_header( bytes. Each argument is separated by dots and the last byte is also a dot (so the file digest can be appended unambiguously). """ - # Note: This will get replaced in subsequent change, right now we're just - # moving existing code around. - encoded_type = entry_type.encode("utf-8") # Prevent confusion if name has a "." inside by encoding to base64. - encoded_name = base64.b64encode(entry_name.encode("utf-8")) + encoded_name = base64.b64encode(name.encode("utf-8")) encoded_range = f"{start}-{end}".encode("utf-8") # Note: empty string at the end, to terminate header with a "." - return b".".join([encoded_type, encoded_name, encoded_range, b""]) + return b".".join([encoded_name, encoded_range, b""]) def _endpoints(step: int, end: int) -> Iterable[int]: @@ -83,164 +76,15 @@ def _endpoints(step: int, end: int) -> Iterable[int]: yield end -class ShardedDFSSerializer(serialization.Serializer): - """DFSSerializer that uses a sharded hash engine to exploit parallelism.""" - - def __init__( - self, - file_hasher_factory: Callable[ - [pathlib.Path, int, int], file.ShardedFileHasher - ], - merge_hasher: hashing.StreamingHashEngine, - max_workers: int | None = None, - ): - """Initializes an instance to serialize a model with this serializer. - - Args: - hasher_factory: A callable to build the hash engine used to hash - every shard of the files in the model. Because each shard is - processed in parallel, every thread needs to call the factory to - start hashing. The arguments are the file, and the endpoints of - the shard. - merge_hasher: A `hashing.StreamingHashEngine` instance used to merge - individual file digests to compute an aggregate digest. - max_workers: Maximum number of workers to use in parallel. Default - is to defer to the `concurent.futures` library. - """ - self._file_hasher_factory = file_hasher_factory - self._merge_hasher = merge_hasher - self._max_workers = max_workers - - # Precompute some private values only once by using a mock file hasher. - # None of the arguments used to build the hasher are used. - hasher = file_hasher_factory(pathlib.Path(), 0, 1) - self._shard_size = hasher.shard_size - - @override - def serialize(self, model_path: pathlib.Path) -> manifest.DigestManifest: - # Note: This function currently uses `pathlib.Path.glob` so the DFS - # expansion relies on the `glob` implementation performing a DFS. We - # will be truthful again when switching to `pathlib.Path.walk`, after - # Python 3.12 is the minimum version we support. - - # TODO: github.com/sigstore/model-transparency/issues/196 - Add checks - # to exclude symlinks if desired. - serialize_by_file.check_file_or_directory(model_path) - - if model_path.is_file(): - entries = [model_path] - else: - # TODO: github.com/sigstore/model-transparency/issues/200 - When - # Python3.12 is the minimum supported version, this can be replaced - # with `pathlib.Path.walk` for a clearer interface, and some speed - # improvement. - entries = sorted(model_path.glob("**/*")) - - tasks = self._convert_paths_to_tasks(entries, model_path) - - digest_len = self._merge_hasher.digest_size - digests_buffer = bytearray(len(tasks) * digest_len) - - with concurrent.futures.ThreadPoolExecutor( - max_workers=self._max_workers - ) as tpe: - futures_dict = { - tpe.submit(self._perform_hash_task, model_path, task): i - for i, task in enumerate(tasks) - } - for future in concurrent.futures.as_completed(futures_dict): - i = futures_dict[future] - task_digest = future.result() - - task_path, task_type, task_start, task_end = tasks[i] - header = _build_header( - entry_name=task_path.name, - entry_type=task_type, - start=task_start, - end=task_end, - ) - self._merge_hasher.reset(header) - self._merge_hasher.update(task_digest) - digest = self._merge_hasher.compute().digest_value - - start = i * digest_len - end = start + digest_len - digests_buffer[start:end] = digest - - self._merge_hasher.reset(digests_buffer) - return manifest.DigestManifest(self._merge_hasher.compute()) - - def _convert_paths_to_tasks( - self, paths: Iterable[pathlib.Path], root_path: pathlib.Path - ) -> list[_ShardSignTask]: - """Returns the tasks that would hash shards of files in parallel. - - Every file in `paths` is replaced by a set of tasks. Each task computes - the digest over a shard of the file. Directories result in a single - task, just to compute a digest over a header. - - To differentiate between (empty) files and directories with the same - name, every task needs to also include a header. The header needs to - include relative path to the model root, as we want to obtain the same - digest if the model is moved. - - We don't construct an enum for the type of the entry, because these will - never escape this class. - - Note that the path component of the tasks is a `pathlib.PurePath`, so - operations on it cannot touch the filesystem. - """ - # TODO: github.com/sigstore/model-transparency/issues/196 - Add support - # for excluded files. - - tasks = [] - for path in paths: - serialize_by_file.check_file_or_directory(path) - relative_path = path.relative_to(root_path) - - if path.is_file(): - path_size = path.stat().st_size - start = 0 - for end in _endpoints(self._shard_size, path_size): - tasks.append((relative_path, "file", start, end)) - start = end - else: - tasks.append((relative_path, "dir", 0, 0)) - - return tasks - - def _perform_hash_task( - self, model_path: pathlib.Path, task: _ShardSignTask - ) -> bytes: - """Produces the hash of the file shard included in `task`.""" - task_path, task_type, task_start, task_end = task - - # TODO: github.com/sigstore/model-transparency/issues/197 - Directories - # don't need to use the file hasher. Rather than starting a process - # just for them, we should filter these ahead of time, and only use - # threading for file shards. For now, just return an empty result. - if task_type == "dir": - return b"" - - # TODO: github.com/sigstore/model-transparency/issues/197 - Similarly, - # empty files should be hashed outside of a parallel task, to not waste - # resources. - if task_start == task_end: - return b"" - - full_path = model_path.joinpath(task_path) - hasher = self._file_hasher_factory(full_path, task_start, task_end) - return hasher.compute().digest_value - - class ShardedFilesSerializer(serialization.Serializer): - """Model serializers that produces an itemized manifest, at shard level. + """Generic file shard serializer. Traverses the model directory and creates digests for every file found, sharding the file in equal shards and computing the digests in parallel. - Since the manifest lists each item individually, this will also enable - support for incremental updates (to be added later). + Subclasses can then create a manifest with these digests, either listing + them item by item, combining them into file digests, or combining all of + them into a single digest. """ def __init__( @@ -270,9 +114,7 @@ def __init__( self._shard_size = hasher.shard_size @override - def serialize( - self, model_path: pathlib.Path - ) -> manifest.ShardLevelManifest: + def serialize(self, model_path: pathlib.Path) -> manifest.Manifest: # TODO: github.com/sigstore/model-transparency/issues/196 - Add checks # to exclude symlinks if desired. serialize_by_file.check_file_or_directory(model_path) @@ -337,12 +179,96 @@ def _compute_hash( path=relative_path, digest=digest, start=start, end=end ) + @abc.abstractmethod def _build_manifest( self, items: Iterable[manifest.ShardedFileManifestItem] - ) -> manifest.ShardLevelManifest: + ) -> manifest.Manifest: """Builds an itemized manifest from a given list of items. Every subclass needs to implement this method to determine the format of the manifest. """ + pass + + +class ManifestSerializer(ShardedFilesSerializer): + """Model serializers that produces an itemized manifest, at shard level. + + Since the manifest lists each item individually, this will also enable + support for incremental updates (to be added later). + """ + + @override + def serialize( + self, model_path: pathlib.Path + ) -> manifest.ShardLevelManifest: + """Serializes the model given by the `model_path` argument. + + The only reason for the override is to change the return type, to be + more restrictive. This is to signal that the only manifests that can be + returned are `manifest.FileLevelManifest` instances. + """ + return cast(manifest.ShardLevelManifest, super().serialize(model_path)) + + @override + def _build_manifest( + self, items: Iterable[manifest.ShardedFileManifestItem] + ) -> manifest.ShardLevelManifest: return manifest.ShardLevelManifest(items) + + +class DigestSerializer(ShardedFilesSerializer): + """Serializer for a model that performs a traversal of the model directory. + + This serializer produces a single hash for the entire model. + """ + + def __init__( + self, + file_hasher_factory: Callable[ + [pathlib.Path, int, int], file.ShardedFileHasher + ], + merge_hasher: hashing.StreamingHashEngine, + max_workers: int | None = None, + ): + """Initializes an instance to serialize a model with this serializer. + + Args: + hasher_factory: A callable to build the hash engine used to hash + every shard of the files in the model. Because each shard is + processed in parallel, every thread needs to call the factory to + start hashing. The arguments are the file, and the endpoints of + the shard. + merge_hasher: A `hashing.StreamingHashEngine` instance used to merge + individual file shard digests to compute an aggregate digest. + max_workers: Maximum number of workers to use in parallel. Default + is to defer to the `concurent.futures` library. + """ + super().__init__(file_hasher_factory, max_workers) + self._merge_hasher = merge_hasher + + @override + def serialize(self, model_path: pathlib.Path) -> manifest.DigestManifest: + """Serializes the model given by the `model_path` argument. + + The only reason for the override is to change the return type, to be + more restrictive. This is to signal that the only manifests that can be + returned are `manifest.FileLevelManifest` instances. + """ + return cast(manifest.DigestManifest, super().serialize(model_path)) + + @override + def _build_manifest( + self, items: Iterable[manifest.ShardedFileManifestItem] + ) -> manifest.DigestManifest: + self._merge_hasher.reset() + + for item in sorted(items, key=lambda i: (i.path, i.start, i.end)): + header = _build_header( + name=item.path.name, start=item.start, end=item.end + ) + self._merge_hasher.update(header) + self._merge_hasher.update(item.digest.digest_value) + + digest = self._merge_hasher.compute() + return manifest.DigestManifest(digest) diff --git a/model_signing/serialization/serialize_by_file_shard_test.py b/model_signing/serialization/serialize_by_file_shard_test.py index 59f94047..33510ef0 100644 --- a/model_signing/serialization/serialize_by_file_shard_test.py +++ b/model_signing/serialization/serialize_by_file_shard_test.py @@ -29,7 +29,7 @@ # pytest model_signing/serialization/ --update_goldens -class TestShardedDFSSerializer: +class TestDigestSerializer: def _hasher_factory( self, path: pathlib.Path, start: int, end: int @@ -50,13 +50,13 @@ def test_known_models(self, request, model_fixture_name): # Set up variables (arrange) testdata_path = request.path.parent / "testdata" test_path = testdata_path / "serialize_by_file_shard" - test_class_path = test_path / "TestShardedDFSSerializer" + test_class_path = test_path / "TestDigestSerializer" golden_path = test_class_path / model_fixture_name should_update = request.config.getoption("update_goldens") model = request.getfixturevalue(model_fixture_name) # Compute model manifest (act) - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(model) @@ -76,13 +76,13 @@ def test_known_models_small_shards(self, request, model_fixture_name): # Set up variables (arrange) testdata_path = request.path.parent / "testdata" test_path = testdata_path / "serialize_by_file_shard" - test_class_path = test_path / "TestShardedDFSSerializer" + test_class_path = test_path / "TestDigestSerializer" golden_path = test_class_path / f"{model_fixture_name}_small_shards" should_update = request.config.getoption("update_goldens") model = request.getfixturevalue(model_fixture_name) # Compute model manifest (act) - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory_small_shards, memory.SHA256() ) manifest = serializer.serialize(model) @@ -98,7 +98,7 @@ def test_known_models_small_shards(self, request, model_fixture_name): assert manifest.digest.digest_hex == expected_digest def test_file_hash_is_not_same_as_hash_of_content(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) @@ -108,7 +108,7 @@ def test_file_hash_is_not_same_as_hash_of_content(self, sample_model_file): assert manifest.digest.digest_hex != digest.digest_hex def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_file) @@ -122,7 +122,7 @@ def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): def test_file_model_hash_changes_if_content_changes( self, sample_model_file ): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_file) @@ -134,7 +134,7 @@ def test_file_model_hash_changes_if_content_changes( assert manifest.digest.digest_value != new_manifest.digest.digest_value def test_directory_model_with_only_known_file(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest_file = serializer.serialize(sample_model_file) @@ -148,7 +148,7 @@ def test_directory_model_with_only_known_file(self, sample_model_file): def test_folder_model_hash_is_same_if_model_is_moved( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -159,8 +159,8 @@ def test_folder_model_hash_is_same_if_model_is_moved( assert manifest == new_manifest - def test_folder_model_empty_folder_gets_included(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + def test_folder_model_empty_folder_not_included(self, sample_model_folder): + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -170,10 +170,10 @@ def test_folder_model_empty_folder_gets_included(self, sample_model_folder): new_empty_dir.mkdir() new_manifest = serializer.serialize(sample_model_folder) - assert manifest != new_manifest + assert manifest == new_manifest - def test_folder_model_empty_file_gets_included(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + def test_folder_model_empty_file_not_included(self, sample_model_folder): + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -183,10 +183,10 @@ def test_folder_model_empty_file_gets_included(self, sample_model_folder): new_empty_file.write_text("") new_manifest = serializer.serialize(sample_model_folder) - assert manifest != new_manifest + assert manifest == new_manifest def test_folder_model_rename_file(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -200,7 +200,7 @@ def test_folder_model_rename_file(self, sample_model_folder): assert manifest != new_manifest def test_folder_model_rename_dir(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -213,7 +213,7 @@ def test_folder_model_rename_dir(self, sample_model_folder): assert manifest != new_manifest def test_folder_model_replace_file_empty_folder(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -227,7 +227,7 @@ def test_folder_model_replace_file_empty_folder(self, sample_model_folder): assert manifest != new_manifest def test_folder_model_change_file(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest = serializer.serialize(sample_model_folder) @@ -239,22 +239,22 @@ def test_folder_model_change_file(self, sample_model_folder): assert manifest != new_manifest - def test_empty_folder_hashes_differently_than_empty_file( + def test_empty_folder_hashes_same_as_empty_file( self, empty_model_file, empty_model_folder ): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) folder_manifest = serializer.serialize(empty_model_folder) file_manifest = serializer.serialize(empty_model_file) - assert folder_manifest != file_manifest + assert folder_manifest == file_manifest - def test_model_with_empty_folder_hashes_differently_than_with_empty_file( + def test_model_with_empty_folder_hashes_same_as_with_empty_file( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedDFSSerializer( + serializer = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) @@ -270,15 +270,15 @@ def test_model_with_empty_folder_hashes_differently_than_with_empty_file( new_empty_file.write_text("") file_manifest = serializer.serialize(sample_model_folder) - assert folder_manifest != file_manifest + assert folder_manifest == file_manifest def test_max_workers_does_not_change_digest(self, sample_model_folder): - serializer1 = serialize_by_file_shard.ShardedDFSSerializer( + serializer1 = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest1 = serializer1.serialize(sample_model_folder) - serializer2 = serialize_by_file_shard.ShardedDFSSerializer( + serializer2 = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256(), max_workers=2 ) manifest2 = serializer2.serialize(sample_model_folder) @@ -286,12 +286,12 @@ def test_max_workers_does_not_change_digest(self, sample_model_folder): assert manifest1 == manifest2 def test_shard_size_changes_digests(self, sample_model_folder): - serializer1 = serialize_by_file_shard.ShardedDFSSerializer( + serializer1 = serialize_by_file_shard.DigestSerializer( self._hasher_factory, memory.SHA256() ) manifest1 = serializer1.serialize(sample_model_folder) - serializer2 = serialize_by_file_shard.ShardedDFSSerializer( + serializer2 = serialize_by_file_shard.DigestSerializer( self._hasher_factory_small_shards, memory.SHA256() ) manifest2 = serializer2.serialize(sample_model_folder) @@ -299,7 +299,7 @@ def test_shard_size_changes_digests(self, sample_model_folder): assert manifest1.digest.digest_value != manifest2.digest.digest_value -@dataclasses.dataclass(frozen=True) +@dataclasses.dataclass(frozen=True, order=True) class _Shard: """A shard of a file from a manifest.""" @@ -338,7 +338,7 @@ def _parse_shard_and_digest(line: str) -> tuple[_Shard, str]: return shard, digest -class TestShardedFilesSerializer: +class TestManifestSerializer: def _hasher_factory( self, path: pathlib.Path, start: int, end: int @@ -359,13 +359,13 @@ def test_known_models(self, request, model_fixture_name): # Set up variables (arrange) testdata_path = request.path.parent / "testdata" test_path = testdata_path / "serialize_by_file_shard" - test_class_path = test_path / "TestShardedFilesSerializer" + test_class_path = test_path / "TestManifestSerializer" golden_path = test_class_path / model_fixture_name should_update = request.config.getoption("update_goldens") model = request.getfixturevalue(model_fixture_name) # Compute model manifest (act) - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(model) @@ -392,13 +392,13 @@ def test_known_models_small_shards(self, request, model_fixture_name): # Set up variables (arrange) testdata_path = request.path.parent / "testdata" test_path = testdata_path / "serialize_by_file_shard" - test_class_path = test_path / "TestShardedFilesSerializer" + test_class_path = test_path / "TestManifestSerializer" golden_path = test_class_path / f"{model_fixture_name}_small_shards" should_update = request.config.getoption("update_goldens") model = request.getfixturevalue(model_fixture_name) # Compute model manifest (act) - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory_small_shards ) manifest = serializer.serialize(model) @@ -421,7 +421,7 @@ def test_known_models_small_shards(self, request, model_fixture_name): assert items == found_items def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_file) @@ -433,7 +433,7 @@ def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): assert manifest == new_manifest def test_file_manifest_changes_if_content_changes(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_file) @@ -450,7 +450,7 @@ def test_file_manifest_changes_if_content_changes(self, sample_model_file): assert digests != new_digests def test_directory_model_with_only_known_file(self, sample_model_file): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest_file = serializer.serialize(sample_model_file) @@ -467,7 +467,7 @@ def test_directory_model_with_only_known_file(self, sample_model_file): def test_folder_model_hash_is_same_if_model_is_moved( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -479,7 +479,7 @@ def test_folder_model_hash_is_same_if_model_is_moved( assert manifest == new_manifest def test_folder_model_empty_folder_not_included(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -492,7 +492,7 @@ def test_folder_model_empty_folder_not_included(self, sample_model_folder): assert manifest == new_manifest def test_folder_model_empty_file_not_included(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -530,7 +530,7 @@ def _check_manifests_match_except_on_renamed_file( def test_folder_model_rename_file_only_changes_path_part( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -578,7 +578,7 @@ def _check_manifests_match_except_on_renamed_dir( def test_folder_model_rename_dir_only_changes_path_part( self, sample_model_folder ): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -594,7 +594,7 @@ def test_folder_model_rename_dir_only_changes_path_part( ) def test_folder_model_replace_file_empty_folder(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -633,7 +633,7 @@ def _check_manifests_match_except_on_entry( assert old_manifest._item_to_digest[shard] == digest def test_folder_model_change_file(self, sample_model_folder): - serializer = serialize_by_file_shard.ShardedFilesSerializer( + serializer = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) manifest = serializer.serialize(sample_model_folder) @@ -650,13 +650,13 @@ def test_folder_model_change_file(self, sample_model_folder): ) def test_max_workers_does_not_change_digest(self, sample_model_folder): - serializer1 = serialize_by_file_shard.ShardedFilesSerializer( + serializer1 = serialize_by_file_shard.ManifestSerializer( self._hasher_factory ) - serializer2 = serialize_by_file_shard.ShardedFilesSerializer( + serializer2 = serialize_by_file_shard.ManifestSerializer( self._hasher_factory, max_workers=1 ) - serializer3 = serialize_by_file_shard.ShardedFilesSerializer( + serializer3 = serialize_by_file_shard.ManifestSerializer( self._hasher_factory, max_workers=3 ) diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/deep_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/deep_model_folder new file mode 100644 index 00000000..b3a94824 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/deep_model_folder @@ -0,0 +1 @@ +6deb22c4330a8a9eb5a2d5faa73bf56c64a5c2888961f0f0df51912798fc4954 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/deep_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/deep_model_folder_small_shards new file mode 100644 index 00000000..f826b95f --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/deep_model_folder_small_shards @@ -0,0 +1 @@ +f5203504bea9ec90a7b7453a53c0aaab98a5db5d038dc1fac3613b47f6018959 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_file similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_folder rename to model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_file diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_file_small_shards similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_folder_small_shards rename to model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_file_small_shards diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_folder new file mode 100644 index 00000000..c3068040 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_folder @@ -0,0 +1 @@ +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_folder_small_shards new file mode 100644 index 00000000..c3068040 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/empty_model_folder_small_shards @@ -0,0 +1 @@ +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/model_folder_with_empty_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/model_folder_with_empty_file new file mode 100644 index 00000000..c3068040 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/model_folder_with_empty_file @@ -0,0 +1 @@ +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/model_folder_with_empty_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/model_folder_with_empty_file_small_shards new file mode 100644 index 00000000..c3068040 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/model_folder_with_empty_file_small_shards @@ -0,0 +1 @@ +e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_file new file mode 100644 index 00000000..8ec1d11f --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_file @@ -0,0 +1 @@ +14aebf2e466ad30ef59ea6fce67de44dc133c673784bd543b45f75b8efc3d821 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_file_small_shards new file mode 100644 index 00000000..7b4ad705 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_file_small_shards @@ -0,0 +1 @@ +beb3cbbd9d73133e85a102a3cbda2ef1dc2bc61e9323e32e576e4adb0571bf86 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_folder new file mode 100644 index 00000000..c94ba5d0 --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_folder @@ -0,0 +1 @@ +865a7da87d90b261ce99086bfc61986a6230e6914ad885912b4d22464a9fda13 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_folder_small_shards new file mode 100644 index 00000000..6e6fd67f --- /dev/null +++ b/model_signing/serialization/testdata/serialize_by_file_shard/TestDigestSerializer/sample_model_folder_small_shards @@ -0,0 +1 @@ +02be357fc0015ab3d15dbbd363a172f35d2cbd1a854b8e0a6c67fad2e2c3390f diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/deep_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/deep_model_folder similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/deep_model_folder rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/deep_model_folder diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/deep_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/deep_model_folder_small_shards similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/deep_model_folder_small_shards rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/deep_model_folder_small_shards diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/empty_model_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/empty_model_file similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/empty_model_file rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/empty_model_file diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/empty_model_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/empty_model_file_small_shards similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/empty_model_file_small_shards rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/empty_model_file_small_shards diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/empty_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/empty_model_folder similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/empty_model_folder rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/empty_model_folder diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/empty_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/empty_model_folder_small_shards similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/empty_model_folder_small_shards rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/empty_model_folder_small_shards diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/model_folder_with_empty_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/model_folder_with_empty_file similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/model_folder_with_empty_file rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/model_folder_with_empty_file diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/model_folder_with_empty_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/model_folder_with_empty_file_small_shards similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/model_folder_with_empty_file_small_shards rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/model_folder_with_empty_file_small_shards diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/sample_model_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/sample_model_file similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/sample_model_file rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/sample_model_file diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/sample_model_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/sample_model_file_small_shards similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/sample_model_file_small_shards rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/sample_model_file_small_shards diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/sample_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/sample_model_folder similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/sample_model_folder rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/sample_model_folder diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/sample_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/sample_model_folder_small_shards similarity index 100% rename from model_signing/serialization/testdata/serialize_by_file_shard/TestShardedFilesSerializer/sample_model_folder_small_shards rename to model_signing/serialization/testdata/serialize_by_file_shard/TestManifestSerializer/sample_model_folder_small_shards diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder deleted file mode 100644 index 528ab87c..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder +++ /dev/null @@ -1 +0,0 @@ -52fa3c459aec58bc5f9702c73cb3c6b8fd19e9342aa3e4db851e1bde69ab1727 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder_small_shards deleted file mode 100644 index a4f2f81e..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/deep_model_folder_small_shards +++ /dev/null @@ -1 +0,0 @@ -abd66cd0d8a01f3f552ac5af717f49dc6e6575f0849ec3bfb3c9051962314ce6 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file deleted file mode 100644 index 9ac3ea65..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file +++ /dev/null @@ -1 +0,0 @@ -5f2d126b0d3540c17481fdf724e31cf03b4436a2ebabaa1d2e94fe09831be64d diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file_small_shards deleted file mode 100644 index 9ac3ea65..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/empty_model_file_small_shards +++ /dev/null @@ -1 +0,0 @@ -5f2d126b0d3540c17481fdf724e31cf03b4436a2ebabaa1d2e94fe09831be64d diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file deleted file mode 100644 index b6d24eaf..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file +++ /dev/null @@ -1 +0,0 @@ -230d217d5f4f388f5087ac4174dbc9b0ff358e3122a1267b0a56669a44f11ea1 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file_small_shards deleted file mode 100644 index b6d24eaf..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/model_folder_with_empty_file_small_shards +++ /dev/null @@ -1 +0,0 @@ -230d217d5f4f388f5087ac4174dbc9b0ff358e3122a1267b0a56669a44f11ea1 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file deleted file mode 100644 index a94a0fa0..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file +++ /dev/null @@ -1 +0,0 @@ -2ca48c47d5311a9b2f9305519cd5f927dcef09404fc32ef7886abe8f11450eff diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file_small_shards deleted file mode 100644 index 5b6697c8..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_file_small_shards +++ /dev/null @@ -1 +0,0 @@ -284b613e2e1576d87e5e1c912c82da8d87b6350276f36940516404b2a35f1a74 diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder deleted file mode 100644 index 7fa49a73..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder +++ /dev/null @@ -1 +0,0 @@ -d22e0441cfa5ac2bc09715ddd88c802a7f97e29c93dc50f5498bab2954958ebb diff --git a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder_small_shards b/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder_small_shards deleted file mode 100644 index 161cafdf..00000000 --- a/model_signing/serialization/testdata/serialize_by_file_shard/TestShardedDFSSerializer/sample_model_folder_small_shards +++ /dev/null @@ -1 +0,0 @@ -82bb608d88cf741730c5bcb75a7630f560643acafdd8fa02ad24be20f51c1250