diff --git a/model_signing/serialization/serialize_by_file.py b/model_signing/serialization/serialize_by_file.py index d182047a..19d54083 100644 --- a/model_signing/serialization/serialize_by_file.py +++ b/model_signing/serialization/serialize_by_file.py @@ -14,10 +14,11 @@ """Model serializers that operated at file level granularity.""" +import abc import base64 import concurrent.futures import pathlib -from typing import Callable, Iterable +from typing import Callable, Iterable, cast from typing_extensions import override from model_signing.hashing import file @@ -65,8 +66,6 @@ def _build_header( bytes. Each argument is separated by dots and the last byte is also a dot (so the file digest can be appended unambiguously). """ - # Note: This will get replaced in subsequent change, right now we're just - # moving existing code around. encoded_type = entry_type.encode("utf-8") # Prevent confusion if name has a "." inside by encoding to base64. encoded_name = base64.b64encode(entry_name.encode("utf-8")) @@ -74,75 +73,14 @@ def _build_header( return b".".join([encoded_type, encoded_name, b""]) -class DFSSerializer(serialization.Serializer): - """Serializer for a model that performs a traversal of the model directory. - - This serializer produces a single hash for the entire model. If the model is - a file, the hash is the digest of the file. If the model is a directory, we - perform a depth-first traversal of the directory, hash each individual files - and aggregate the hashes together. - """ - - def __init__( - self, - file_hasher: file.SimpleFileHasher, - merge_hasher_factory: Callable[[], hashing.StreamingHashEngine], - ): - """Initializes an instance to serialize a model with this serializer. - - Args: - hasher: The hash engine used to hash the individual files. - merge_hasher_factory: A callable that returns a - `hashing.StreamingHashEngine` instance used to merge individual - file digests to compute an aggregate digest. - """ - self._file_hasher = file_hasher - self._merge_hasher_factory = merge_hasher_factory - - @override - def serialize(self, model_path: pathlib.Path) -> manifest.DigestManifest: - # TODO: github.com/sigstore/model-transparency/issues/196 - Add checks - # to exclude symlinks if desired. - check_file_or_directory(model_path) - - if model_path.is_file(): - self._file_hasher.set_file(model_path) - return manifest.DigestManifest(self._file_hasher.compute()) - - return manifest.DigestManifest(self._dfs(model_path)) - - def _dfs(self, directory: pathlib.Path) -> hashing.Digest: - # TODO: github.com/sigstore/model-transparency/issues/196 - Add support - # for excluded files. - children = sorted([x for x in directory.iterdir()]) - - hasher = self._merge_hasher_factory() - for child in children: - check_file_or_directory(child) - - if child.is_file(): - header = _build_header(entry_name=child.name, entry_type="file") - hasher.update(header) - self._file_hasher.set_file(child) - digest = self._file_hasher.compute() - hasher.update(digest.digest_value) - else: - header = _build_header(entry_name=child.name, entry_type="dir") - hasher.update(header) - digest = self._dfs(child) - hasher.update(digest.digest_value) - - return hasher.compute() - - class FilesSerializer(serialization.Serializer): - """Model serializers that produces an itemized manifest, at file level. + """Generic file serializer. Traverses the model directory and creates digests for every file found, possibly in parallel. - Since the manifest lists each item individually, this will also enable - support for incremental updates (to be added later). + Subclasses can then create a manifest with these digests, either listing + them item by item, or combining everything into a single digest. """ def __init__( @@ -162,7 +100,7 @@ def __init__( self._max_workers = max_workers @override - def serialize(self, model_path: pathlib.Path) -> manifest.FileLevelManifest: + def serialize(self, model_path: pathlib.Path) -> manifest.Manifest: # TODO: github.com/sigstore/model-transparency/issues/196 - Add checks # to exclude symlinks if desired. check_file_or_directory(model_path) @@ -210,12 +148,176 @@ def _compute_hash( digest = self._hasher_factory(path).compute() return manifest.FileManifestItem(path=relative_path, digest=digest) + @abc.abstractmethod def _build_manifest( self, items: Iterable[manifest.FileManifestItem] - ) -> manifest.FileLevelManifest: - """Builds an itemized manifest from a given list of items. + ) -> manifest.Manifest: + """Builds the manifest representing the serialization of the model.""" + pass + + +class ManifestSerializer(FilesSerializer): + """Model serializer that produces an itemized manifest, at file level. + + Since the manifest lists each item individually, this will also enable + support for incremental updates (to be added later). + """ - Every subclass needs to implement this method to determine the format of - the manifest. + @override + def serialize(self, model_path: pathlib.Path) -> manifest.FileLevelManifest: + """Serializes the model given by the `model_path` argument. + + The only reason for the override is to change the return type, to be + more restrictive. This is to signal that the only manifests that can be + returned are `manifest.FileLevelManifest` instances. """ + return cast(manifest.FileLevelManifest, super().serialize(model_path)) + + @override + def _build_manifest( + self, items: Iterable[manifest.FileManifestItem] + ) -> manifest.FileLevelManifest: return manifest.FileLevelManifest(items) + + +class _FileDigestTree: + """A tree of files with their digests. + + Every leaf in the tree is a file, paired with its digest. Every intermediate + node represents a directory. We need to pair every directory with a digest, + in a bottom-up fashion. + """ + + def __init__( + self, path: pathlib.PurePath, digest: hashing.Digest | None = None + ): + """Builds a node in the digest tree. + + Don't call this from outside of the class. Instead, use `build_tree`. + + Args: + path: Path included in the node. + digest: Optional hash of the path. Files must have a digest, + directories never have one. + """ + self._path = path + self._digest = digest + self._children: list[_FileDigestTree] = [] + + @classmethod + def build_tree( + cls, items: Iterable[manifest.FileManifestItem] + ) -> "_FileDigestTree": + """Builds a tree out of the sequence of manifest items.""" + path_to_node: dict[pathlib.PurePath, _FileDigestTree] = {} + + for file_item in items: + file = file_item.path + node = cls(file, file_item.digest) + for parent in file.parents: + if parent in path_to_node: + parent_node = path_to_node[parent] + parent_node._children.append(node) + break # everything else already exists + + parent_node = cls(parent) # no digest for directories + parent_node._children.append(node) + path_to_node[parent] = parent_node + node = parent_node + + # Handle empty model + if not path_to_node: + return cls(pathlib.PurePosixPath()) + + return path_to_node[pathlib.PurePosixPath()] + + def get_digest( + self, + hasher_factory: Callable[[], hashing.StreamingHashEngine], + ) -> hashing.Digest: + """Returns the digest of this tree of files. + + Args: + hasher_factory: A callable that returns a + `hashing.StreamingHashEngine` instance used to merge individual + digests to compute an aggregate digest. + """ + hasher = hasher_factory() + + for child in sorted(self._children, key=lambda c: c._path): + name = child._path.name + if child._digest is not None: + header = _build_header(entry_name=name, entry_type="file") + hasher.update(header) + hasher.update(child._digest.digest_value) + else: + header = _build_header(entry_name=name, entry_type="dir") + hasher.update(header) + digest = child.get_digest(hasher_factory) + hasher.update(digest.digest_value) + + return hasher.compute() + + +class DigestSerializer(FilesSerializer): + """Serializer for a model that performs a traversal of the model directory. + + This serializer produces a single hash for the entire model. If the model is + a file, the hash is the digest of the file. If the model is a directory, we + perform a depth-first traversal of the directory, hash each individual files + and aggregate the hashes together. + + Currently, this has a different initialization than `FilesSerializer`, but + this will likely change in a subsequent change. Similarly, currently, this + only supports one single worker, but this will change in the future. + """ + + def __init__( + self, + file_hasher: file.SimpleFileHasher, + merge_hasher_factory: Callable[[], hashing.StreamingHashEngine], + ): + """Initializes an instance to serialize a model with this serializer. + + Args: + hasher: The hash engine used to hash the individual files. + merge_hasher_factory: A callable that returns a + `hashing.StreamingHashEngine` instance used to merge individual + file digests to compute an aggregate digest. + """ + + def _factory(path: pathlib.Path) -> file.FileHasher: + file_hasher.set_file(path) + return file_hasher + + super().__init__(_factory, max_workers=1) + self._merge_hasher_factory = merge_hasher_factory + + @override + def serialize(self, model_path: pathlib.Path) -> manifest.DigestManifest: + """Serializes the model given by the `model_path` argument. + + The only reason for the override is to change the return type, to be + more restrictive. This is to signal that the only manifests that can be + returned are `manifest.DigestManifest` instances. + """ + return cast(manifest.DigestManifest, super().serialize(model_path)) + + @override + def _build_manifest( + self, items: Iterable[manifest.FileManifestItem] + ) -> manifest.DigestManifest: + # Note: we do several computations here to try and match the old + # behavior but these would be simplified in the future. Since we are + # defining the hashing behavior, we can freely change this. + + # If the model is just one file, return the hash of the file. + # A model is a file if we have one item only and its path is empty. + items = list(items) + if len(items) == 1 and not items[0].path.name: + return manifest.DigestManifest(items[0].digest) + + # Otherwise, build a tree of files and compute the digests. + tree = _FileDigestTree.build_tree(items) + digest = tree.get_digest(self._merge_hasher_factory) + return manifest.DigestManifest(digest) diff --git a/model_signing/serialization/serialize_by_file_test.py b/model_signing/serialization/serialize_by_file_test.py index 8cd09ab5..e0a799b0 100644 --- a/model_signing/serialization/serialize_by_file_test.py +++ b/model_signing/serialization/serialize_by_file_test.py @@ -23,13 +23,15 @@ from model_signing.serialization import test_support -class TestDFSSerializer: +class TestDigestSerializer: def test_known_file(self, sample_model_file): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) expected = ( "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b" ) @@ -42,7 +44,9 @@ def test_file_hash_is_same_as_hash_of_content(self, sample_model_file): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_file) digest = memory.SHA256(test_support.KNOWN_MODEL_TEXT).compute() @@ -53,7 +57,9 @@ def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_file) new_name = sample_model_file.with_name("new-file") @@ -66,7 +72,9 @@ def test_file_manifest_changes_if_content_changes(self, sample_model_file): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_file) sample_model_file.write_bytes(test_support.ANOTHER_MODEL_TEXT) @@ -80,7 +88,9 @@ def test_directory_model_with_only_known_file(self, sample_model_file): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest_file = serializer.serialize(sample_model_file) content_digest = memory.SHA256(test_support.KNOWN_MODEL_TEXT).compute() @@ -93,7 +103,9 @@ def test_known_folder(self, sample_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) expected = ( "310af4fc4c52bf63cd1687c67076ed3e56bc5480a1b151539e6c550506ae0301" ) @@ -108,7 +120,9 @@ def test_folder_model_hash_is_same_if_model_is_moved( file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_folder) new_name = sample_model_folder.with_name("new-root") @@ -117,11 +131,13 @@ def test_folder_model_hash_is_same_if_model_is_moved( assert manifest == new_manifest - def test_folder_model_empty_folder_gets_included(self, sample_model_folder): + def test_folder_model_empty_folder_not_included(self, sample_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -129,13 +145,15 @@ def test_folder_model_empty_folder_gets_included(self, sample_model_folder): new_empty_dir.mkdir() new_manifest = serializer.serialize(sample_model_folder) - assert manifest != new_manifest + assert manifest == new_manifest def test_folder_model_empty_file_gets_included(self, sample_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -149,7 +167,9 @@ def test_folder_model_rename_file(self, sample_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -164,7 +184,9 @@ def test_folder_model_rename_dir(self, sample_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_folder) dir_to_rename = test_support.get_first_directory(sample_model_folder) @@ -178,7 +200,9 @@ def test_folder_model_replace_file_empty_folder(self, sample_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -193,7 +217,9 @@ def test_folder_model_change_file(self, sample_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -207,7 +233,9 @@ def test_deep_folder(self, deep_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) expected = ( "36eed9389ebbbe15ac15d33c81dabb60ccb7c945ff641d78f59db9aa9dc47ac9" ) @@ -220,7 +248,9 @@ def test_empty_file(self, empty_model_file): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) expected = ( "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ) @@ -233,7 +263,9 @@ def test_empty_folder(self, empty_model_folder): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) expected = ( "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ) @@ -246,7 +278,9 @@ def test_directory_model_with_only_empty_file(self, empty_model_file): file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) expected = ( "8a587b2129fdecfbea38d5152b626299f5994d9b99d36b321aea356f69b38c61" ) @@ -261,7 +295,9 @@ def test_empty_folder_hashes_differently_than_empty_file( file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) folder_manifest = serializer.serialize(empty_model_folder) file_manifest = serializer.serialize(empty_model_file) @@ -274,7 +310,9 @@ def test_model_with_empty_folder_hashes_differently_than_with_empty_file( file_hasher = file.SimpleFileHasher( test_support.UNUSED_PATH, memory.SHA256() ) - serializer = serialize_by_file.DFSSerializer(file_hasher, memory.SHA256) + serializer = serialize_by_file.DigestSerializer( + file_hasher, memory.SHA256 + ) # Compute digest of model with empty folder altered_dir = test_support.get_first_directory(sample_model_folder) @@ -291,13 +329,13 @@ def test_model_with_empty_folder_hashes_differently_than_with_empty_file( assert folder_manifest != file_manifest -class TestFilesSerializer: +class TestManifestSerializer: def _hasher_factory(self, path: pathlib.Path) -> file.FileHasher: return file.SimpleFileHasher(path, memory.SHA256()) def test_known_file(self, sample_model_file): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) expected = [ "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b" ] @@ -308,7 +346,7 @@ def test_known_file(self, sample_model_file): assert digests == expected def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_file) new_name = sample_model_file.with_name("new-file") @@ -318,7 +356,7 @@ def test_file_manifest_unchanged_when_model_moved(self, sample_model_file): assert manifest == new_manifest def test_file_manifest_changes_if_content_changes(self, sample_model_file): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_file) digests = set(test_support.extract_digests_from_manifest(manifest)) @@ -333,7 +371,7 @@ def test_file_manifest_changes_if_content_changes(self, sample_model_file): assert len(digests) == len(new_digests) def test_directory_model_with_only_known_file(self, sample_model_file): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest_file = serializer.serialize(sample_model_file) digests_file = set( test_support.extract_digests_from_manifest(manifest_file) @@ -346,7 +384,7 @@ def test_directory_model_with_only_known_file(self, sample_model_file): assert digests == digests_file def test_known_folder(self, sample_model_folder): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) # Long hashes, want to update easily, so pylint: disable=line-too-long expected_items = { "f0": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc", @@ -370,7 +408,7 @@ def test_known_folder(self, sample_model_folder): def test_folder_model_hash_is_same_if_model_is_moved( self, sample_model_folder ): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_folder) new_name = sample_model_folder.with_name("new-root") @@ -380,7 +418,7 @@ def test_folder_model_hash_is_same_if_model_is_moved( assert manifest == new_manifest def test_folder_model_empty_folder_not_included(self, sample_model_folder): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -391,7 +429,7 @@ def test_folder_model_empty_folder_not_included(self, sample_model_folder): assert manifest == new_manifest def test_folder_model_empty_file_gets_included(self, sample_model_folder): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -431,7 +469,7 @@ def _check_manifests_match_except_on_renamed_file( def test_folder_model_rename_file_only_changes_path_part( self, sample_model_folder ): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -476,7 +514,7 @@ def _check_manifests_match_except_on_renamed_dir( def test_folder_model_rename_dir_only_changes_path_part( self, sample_model_folder ): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_folder) dir_to_rename = test_support.get_first_directory(sample_model_folder) @@ -490,7 +528,7 @@ def test_folder_model_rename_dir_only_changes_path_part( ) def test_folder_model_replace_file_empty_folder(self, sample_model_folder): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -525,7 +563,7 @@ def _check_manifests_match_except_on_entry( assert old_manifest._item_to_digest[path] == digest def test_folder_model_change_file(self, sample_model_folder): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(sample_model_folder) altered_dir = test_support.get_first_directory(sample_model_folder) @@ -540,7 +578,7 @@ def test_folder_model_change_file(self, sample_model_folder): ) def test_deep_folder(self, deep_model_folder): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) # Long hashes, want to update easily, so pylint: disable=line-too-long expected_items = { "d0/d1/d2/d3/d4/f0": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff", @@ -556,7 +594,7 @@ def test_deep_folder(self, deep_model_folder): assert items == expected_items def test_empty_file(self, empty_model_file): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) expected = [ "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ] @@ -567,16 +605,16 @@ def test_empty_file(self, empty_model_file): assert digests == expected def test_empty_folder(self, empty_model_folder): - serializer = serialize_by_file.FilesSerializer(self._hasher_factory) + serializer = serialize_by_file.ManifestSerializer(self._hasher_factory) manifest = serializer.serialize(empty_model_folder) assert not manifest._item_to_digest def test_max_workers_does_not_change_digest(self, sample_model_folder): - serializer1 = serialize_by_file.FilesSerializer(self._hasher_factory) - serializer2 = serialize_by_file.FilesSerializer( + serializer1 = serialize_by_file.ManifestSerializer(self._hasher_factory) + serializer2 = serialize_by_file.ManifestSerializer( self._hasher_factory, max_workers=1 ) - serializer3 = serialize_by_file.FilesSerializer( + serializer3 = serialize_by_file.ManifestSerializer( self._hasher_factory, max_workers=3 )