diff --git a/.coveragerc b/.coveragerc index a6bc701e3a..ddd0491b78 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,3 +1,2 @@ [run] -omit = */tests/* - +omit = */tests/*, hub/collections/*, hub/codec/* diff --git a/hub/api/tests/test_dataset.py b/hub/api/tests/test_dataset.py index 35e918ede0..41880362f6 100644 --- a/hub/api/tests/test_dataset.py +++ b/hub/api/tests/test_dataset.py @@ -20,12 +20,35 @@ def test_dataset2(): dt = {"first": "float", "second": "float"} - ds = Dataset(schema=dt, shape=(2,), url="./data/test/model", mode="w") + ds = Dataset(schema=dt, shape=(2,), url="./data/test/test_dataset2", mode="w") ds["first"][0] = 2.3 assert ds["second"][0].numpy() != 2.3 +def test_dataset_append_and_read(): + dt = {"first": "float", "second": "float"} + ds = Dataset( + schema=dt, + shape=(2,), + url="./data/test/test_dataset_append_and_read", + mode="a", + ) + + ds["first"][0] = 2.3 + assert ds["second"][0].numpy() != 2.3 + ds.commit() + + ds = Dataset( + url="./data/test/test_dataset_append_and_read", + mode="r", + ) + ds.delete() + ds.commit() + + # TODO Add case when non existing dataset is opened in read mode + + def test_dataset(url="./data/test/dataset", token=None): ds = Dataset(url, token=token, shape=(10000,), mode="w", schema=my_schema) @@ -194,8 +217,12 @@ def test_dataset_s3(): @pytest.mark.skipif(not azure_creds_exist(), reason="requires azure credentials") def test_dataset_azure(): import os + token = {"account_key": os.getenv("ACCOUNT_KEY")} - test_dataset("https://activeloop.blob.core.windows.net/activeloop-hub/test_dataset_azure", token=token) + test_dataset( + "https://activeloop.blob.core.windows.net/activeloop-hub/test_dataset_azure", + token=token, + ) if __name__ == "__main__": diff --git a/hub/exceptions.py b/hub/exceptions.py index 0706748ec3..556a83bf17 100644 --- a/hub/exceptions.py +++ b/hub/exceptions.py @@ -151,73 +151,92 @@ def __init__(self, response): message = f"No permision to store the dataset at {response}" super(PermissionException, self).__init__(message=message) + class ShapeArgumentNotFoundException(HubException): def __init__(self): message = f"Parameter 'shape' should be provided for Dataset creation." super(HubException, self).__init__(message=message) + class SchemaArgumentNotFoundException(HubException): def __init__(self): message = f"Parameter 'schema' should be provided for Dataset creation." super(HubException, self).__init__(message=message) + class ValueShapeError(HubException): def __init__(self, correct_shape, wrong_shape): message = f"parameter 'value': expected array with shape {correct_shape}, got {wrong_shape}" super(HubException, self).__init__(message=message) + class NoneValueException(HubException): def __init__(self, param): message = f"Parameter '{param}' should be provided" super(HubException, self).__init__(message=message) + class ShapeLengthException(HubException): def __init__(self): message = f"Parameter 'shape' should be a tuple of length 1" super(HubException, self).__init__(message=message) + class ModuleNotInstalledException(HubException): def __init__(self, module_name): message = f"Module '{module_name}' should be installed to convert the Dataset to the {module_name} format" super(HubException, self).__init__(message=message) + class WrongUsernameException(HubException): def __init__(self, username): - message = f"The username {username} was not found. Make sure that the username provided in the url " \ - "matches the one used during login." + message = ( + f"The username {username} was not found. Make sure that the username provided in the url " + "matches the one used during login." + ) super(HubException, self).__init__(message=message) + class NotHubDatasetToOverwriteException(HubException): def __init__(self): - message = "Unable to overwrite the dataset. " \ - "The provided directory is not empty and doesn't contain information about any Hub Dataset " + message = ( + "Unable to overwrite the dataset. " + "The provided directory is not empty and doesn't contain information about any Hub Dataset " + ) super(HubException, self).__init__(message=message) + class NotHubDatasetToAppendException(HubException): def __init__(self): - message = "Unable to append to the dataset. " \ - "The provided directory is not empty and doesn't contain information about any Hub Dataset " + message = ( + "Unable to append to the dataset. " + "The provided directory is not empty and doesn't contain information about any Hub Dataset " + ) super(HubException, self).__init__(message=message) -class DynamicTensorNotFoundException(Exception): + +class DynamicTensorNotFoundException(HubException): def __init__(self): message = f"Unable to find dynamic tensor" super(HubException, self).__init__(message=message) -class DynamicTensorShapeException(Exception): + +class DynamicTensorShapeException(HubException): def __init__(self, exc_type): - if exc_type == 'none': + if exc_type == "none": message = f"Parameter 'max_shape' shouldn't contain any 'None' value" - elif exc_type == 'length': + elif exc_type == "length": message = "Lengths of 'shape' and 'max_shape' should be equal" - elif exc_type == 'not_equal': + elif exc_type == "not_equal": message = "All not-None values from 'shape' should be equal to the corresponding values in 'max_shape'" else: message = "Wrong 'shape' or 'max_shape' values" super(HubException, self).__init__(message=message) + class NotZarrFolderException(Exception): pass + class StorageTensorNotFoundException(Exception): pass diff --git a/hub/features/tests/class_label_names.txt b/hub/features/tests/class_label_names.txt new file mode 100644 index 0000000000..85c30401ce --- /dev/null +++ b/hub/features/tests/class_label_names.txt @@ -0,0 +1,3 @@ +alpha +beta +gamma diff --git a/hub/features/tests/test_features.py b/hub/features/tests/test_features.py new file mode 100644 index 0000000000..64d0eff675 --- /dev/null +++ b/hub/features/tests/test_features.py @@ -0,0 +1,26 @@ +from hub.features.class_label import ClassLabel, _load_names_from_file + +names_file = "./hub/features/tests/class_label_names.txt" + + +def test_load_names_from_file(): + assert _load_names_from_file(names_file) == [ + "alpha", + "beta", + "gamma", + ] + + +def test_class_label(): + bel1 = ClassLabel(num_classes=4) + bel2 = ClassLabel(names=["alpha", "beta", "gamma"]) + ClassLabel(names_file=names_file) + assert bel1.names == ["0", "1", "2", "3"] + assert bel2.names == ["alpha", "beta", "gamma"] + assert bel1.str2int("1") == 1 + assert bel2.str2int("gamma") == 2 + assert bel1.int2str(2) is None # FIXME This is a bug, should raise an error + assert bel2.int2str(0) == "alpha" + assert bel1.num_classes == 4 + assert bel2.num_classes == 3 + bel1.get_attr_dict() \ No newline at end of file diff --git a/hub/store/dynamic_tensor.py b/hub/store/dynamic_tensor.py index 1691cfacec..43859199e1 100644 --- a/hub/store/dynamic_tensor.py +++ b/hub/store/dynamic_tensor.py @@ -8,10 +8,11 @@ from hub.store.nested_store import NestedStore -from hub.exceptions import (DynamicTensorNotFoundException, -ValueShapeError, -DynamicTensorShapeException -) +from hub.exceptions import ( + DynamicTensorNotFoundException, + ValueShapeError, + DynamicTensorShapeException, +) from hub.api.dataset_utils import slice_extract_info @@ -159,14 +160,14 @@ def __init__( self.max_shape = self._storage_tensor.shape self.dtype = self._storage_tensor.dtype if len(self.shape) != len(self.max_shape): - raise DynamicTensorShapeException('length') + raise DynamicTensorShapeException("length") for item in self.max_shape: if item is None: - raise DynamicTensorShapeException('none') + raise DynamicTensorShapeException("none") for item in zip(self.shape, self.max_shape): if item[0] is not None: if item[0] != item[1]: - raise DynamicTensorShapeException('not_equal') + raise DynamicTensorShapeException("not_equal") def __getitem__(self, slice_): """Gets a slice or slices from tensor""" @@ -198,7 +199,7 @@ def __setitem__(self, slice_, value): def check_value_shape(self, value, slice_): """Checks if value can be set to the slice""" - if None not in self.shape and self.dtype != 'O': + if None not in self.shape and self.dtype != "O": if not all([isinstance(sh, int) for sh in slice_]): expected_value_shape = tuple( [ @@ -206,7 +207,7 @@ def check_value_shape(self, value, slice_): for i, slice_shape in enumerate(slice_) if not isinstance(slice_shape, int) ] - ) + ) if expected_value_shape[0] == 1 and len(expected_value_shape) > 1: expected_value_shape = expected_value_shape[1:] @@ -222,8 +223,11 @@ def check_value_shape(self, value, slice_): else: expected_value_shape = (1,) if isinstance(value, list): - value = np.array(value) - if isinstance(value, np.ndarray) and value.shape != expected_value_shape: + value = np.array(value) + if ( + isinstance(value, np.ndarray) + and value.shape != expected_value_shape + ): raise ValueShapeError(expected_value_shape, value.shape) return value @@ -295,6 +299,7 @@ def _get_slice(self, slice_, real_shapes): ) return tuple(slice_) + # FIXME I don't see this class being used anywhere @classmethod def _get_slice_upper_boundary(cls, slice_): if isinstance(slice_, slice): diff --git a/hub/store/tests/test_dynamic_tensor.py b/hub/store/tests/test_dynamic_tensor.py index d7729413cc..1e3c8c4ee2 100644 --- a/hub/store/tests/test_dynamic_tensor.py +++ b/hub/store/tests/test_dynamic_tensor.py @@ -2,21 +2,44 @@ import numpy as np import fsspec +from zarr.creation import create from hub.store.dynamic_tensor import DynamicTensor from hub.store.store import StorageMapWrapperWithCommit -def create_store(path: str): +def create_store(path: str, overwrite=True): fs: fsspec.AbstractFileSystem = fsspec.filesystem("file") - if fs.exists(path): + if fs.exists(path) and overwrite: fs.rm(path, recursive=True) - fs.makedirs(posixpath.join(path, "--dynamic--")) + dynpath = posixpath.join(path, "--dynamic--") + if not fs.exists(dynpath): + fs.makedirs(dynpath) mapper = fs.get_mapper(path) mapper["--dynamic--/hello.txt"] = bytes("Hello World", "utf-8") return StorageMapWrapperWithCommit(mapper) +def test_read_and_append_modes(): + t = DynamicTensor( + create_store("./data/test/test_read_and_append_modes"), + mode="a", + shape=(5, 100, 100), + max_shape=(5, 100, 100), + dtype="int32", + ) + t[0, 80:, 80:] = np.ones((20, 20), dtype="int32") + assert t[0, -5, 90:].tolist() == [1] * 10 + t.commit() + t = DynamicTensor( + create_store("./data/test/test_read_and_append_modes", overwrite=False), + mode="r", + ) + t.get_shape(0) == (100, 100) + assert t[0, -5, 90:].tolist() == [1] * 10 + t.commit() + + def test_dynamic_tensor(): t = DynamicTensor( create_store("./data/test/test_dynamic_tensor"), @@ -42,6 +65,11 @@ def test_dynamic_tensor_2(): assert t[0, 5, :].tolist() == [1] * 10 t[0, 6] = 2 * np.ones((20,), dtype="int32") assert t[0, 5, :].tolist() == [1] * 10 + [0] * 10 + assert t.get_shape([0]) == ( + 10, + 20, + ) # FIXME This is a bug accessing [0], should be just 0 + assert t.get_shape([slice(0, 1)]) == (10, 20) # FIXME This is also a bug def test_dynamic_tensor_3(): @@ -69,6 +97,7 @@ def test_dynamic_tensor_shapes(): t[0] = np.ones((5, 10), dtype="int32") t[0, 6] = 2 * np.ones((20,), dtype="int32") assert t[0, -1].tolist() == [2] * 20 + t.commit() def test_dynamic_tensor_4(): @@ -97,6 +126,6 @@ def test_chunk_iterator(): if __name__ == "__main__": - test_dynamic_tensor_2() + test_read_and_append_modes() # test_chunk_iterator() # test_dynamic_tensor_shapes() \ No newline at end of file diff --git a/hub/store/tests/test_nested_store.py b/hub/store/tests/test_nested_store.py new file mode 100644 index 0000000000..66c3eaa2fb --- /dev/null +++ b/hub/store/tests/test_nested_store.py @@ -0,0 +1,23 @@ +from hub.store.nested_store import NestedStore + +import zarr + + +def test_nested_store(): + store = NestedStore(zarr.MemoryStore(), "hello") + store["item"] = bytes("Hello World", "utf-8") + assert store["item"] == bytes("Hello World", "utf-8") + del store["item"] + assert store.get("item") is None + store["item1"] = bytes("Hello World 1", "utf-8") + store["item2"] = bytes("Hello World 2", "utf-8") + assert len(store) == 2 + assert tuple(store) == ("item1", "item2") + try: + store.commit() + except AttributeError as ex: + assert "'MemoryStore' object has no attribute 'commit'" in str(ex) + + +if __name__ == "__main__": + test_nested_store() \ No newline at end of file diff --git a/hub/tests/test_exceptions.py b/hub/tests/test_exceptions.py new file mode 100644 index 0000000000..f7ad5612c5 --- /dev/null +++ b/hub/tests/test_exceptions.py @@ -0,0 +1,67 @@ +from hub.exceptions import ( + HubException, + AuthenticationException, + AuthorizationException, + NotFound, + NotFoundException, + BadRequestException, + OverLimitException, + ServerException, + BadGatewayException, + GatewayTimeoutException, + WaitTimeoutException, + LockedException, + HubDatasetNotFoundException, + PermissionException, + ShapeArgumentNotFoundException, + SchemaArgumentNotFoundException, + ValueShapeError, + NoneValueException, + ShapeLengthException, + ModuleNotInstalledException, + WrongUsernameException, + NotHubDatasetToAppendException, + NotHubDatasetToOverwriteException, + DynamicTensorNotFoundException, + DynamicTensorShapeException, +) + + +class Response: + def __init__(self, noerror=False): + self.content = "Hello World" + if noerror: + self.json = lambda: {"message": "Hello There"} + + +def test_exceptions(): + HubException() + AuthenticationException() + AuthorizationException(Response()) + AuthorizationException(Response(noerror=True)) + NotFoundException() + BadRequestException(Response()) + BadRequestException(Response(noerror=True)) + OverLimitException() + ServerException() + BadGatewayException() + GatewayTimeoutException() + WaitTimeoutException() + LockedException() + HubDatasetNotFoundException("Hello") + PermissionException("Hello") + ShapeLengthException() + ShapeArgumentNotFoundException() + SchemaArgumentNotFoundException() + ValueShapeError("Shape 1", "Shape 2") + NoneValueException("Yahoo!") + ModuleNotInstalledException("my_module") + WrongUsernameException("usernameX") + NotHubDatasetToOverwriteException() + NotHubDatasetToAppendException() + DynamicTensorNotFoundException() + + DynamicTensorShapeException("none") + DynamicTensorShapeException("length") + DynamicTensorShapeException("not_equal") + DynamicTensorShapeException("another_cause")