Skip to content

Commit

Permalink
Raise exception if a view to save's id already exists (#2921)
Browse files Browse the repository at this point in the history
- Raise exception if a view to save's id already exists
- Added overwrite flag to save_view
  • Loading branch information
nvoxland-al authored Aug 2, 2024
1 parent c84d7e1 commit 3794f91
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
22 changes: 20 additions & 2 deletions deeplake/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3615,8 +3615,17 @@ def _save_view_in_subdir(
num_workers: int,
scheduler: str,
ignore_errors: bool,
overwrite: bool = False,
):
"""Saves this view under ".queries" sub directory of same storage."""
"""Saves this view under ".queries" subdirectory of same storage."""
if not overwrite:
existing_views = self.get_views()
for v in existing_views:
if v.id == id:
raise DatasetViewSavingError(
f"View with id {id} already exists. Use a different id or delete the existing view."
)

info = self._get_view_info(id, message, copy)
hash = info["id"]
# creating sub-view of optimized view
Expand Down Expand Up @@ -3654,13 +3663,14 @@ def _save_view_in_path(
num_workers: int,
scheduler: str,
ignore_errors: bool,
overwrite: bool = False,
**ds_args,
):
"""Saves this view at a given dataset path"""
if os.path.abspath(path) == os.path.abspath(self.path):
raise DatasetViewSavingError("Rewriting parent dataset is not allowed.")
try:
vds = deeplake.empty(path, **ds_args)
vds = deeplake.empty(path, overwrite=overwrite, **ds_args)
except Exception as e:
raise DatasetViewSavingError from e
info = self._get_view_info(id, message, copy)
Expand All @@ -3678,6 +3688,7 @@ def save_view(
scheduler: str = "threaded",
verbose: bool = True,
ignore_errors: bool = False,
overwrite: bool = False,
**ds_args,
) -> str:
"""Saves a dataset view as a virtual dataset (VDS)
Expand Down Expand Up @@ -3713,6 +3724,7 @@ def save_view(
scheduler (str): The scheduler to be used for optimization. Supported values include: 'serial', 'threaded', and 'processed'. Only applicable if ``optimize=True``. Defaults to 'threaded'.
verbose (bool): If ``True``, logs will be printed. Defaults to ``True``.
ignore_errors (bool): Skip samples that cause errors while saving views. Only applicable if ``optimize=True``. Defaults to ``False``.
overwrite (bool): If true, any existing view with the same id is silently overwritten. If false, an exception is thrown if a view with the same the id exists. Defaults to ``False``.
ds_args (dict): Additional args for creating VDS when path is specified. (See documentation for :func:`deeplake.dataset()`)
Returns:
Expand Down Expand Up @@ -3753,6 +3765,7 @@ def save_view(
verbose,
False,
ignore_errors,
overwrite,
**ds_args,
)

Expand All @@ -3768,6 +3781,7 @@ def _save_view(
verbose: bool = True,
_ret_ds: bool = False,
ignore_errors: bool = False,
overwrite: bool = False,
**ds_args,
) -> Union[str, Any]:
"""Saves a dataset view as a virtual dataset (VDS)
Expand All @@ -3786,6 +3800,7 @@ def _save_view(
_ret_ds (bool): If ``True``, the VDS is retured as such without converting it to a view. If ``False``, the VDS path is returned.
Default False.
ignore_errors (bool): Skip samples that cause errors while saving views. Only applicable if ``optimize=True``. Defaults to ``False``.
overwrite (bool): If true, any existing view with the same id is silently overwritten. If false, an exception is thrown if a view with the same the id exists. Defaults to ``False``.
ds_args (dict): Additional args for creating VDS when path is specified. (See documentation for `deeplake.dataset()`)
Returns:
Expand Down Expand Up @@ -3829,6 +3844,7 @@ def _save_view(
num_workers,
scheduler,
ignore_errors,
overwrite,
)
except ReadOnlyModeError as e:
raise ReadOnlyModeError(
Expand All @@ -3848,6 +3864,7 @@ def _save_view(
num_workers,
scheduler,
ignore_errors,
overwrite,
)
else:
vds = self._save_view_in_path(
Expand All @@ -3859,6 +3876,7 @@ def _save_view(
num_workers,
scheduler,
ignore_errors,
overwrite,
**ds_args,
)
if verbose and self.verbose:
Expand Down
6 changes: 5 additions & 1 deletion deeplake/core/query/test/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,11 @@ def test_sub_sample_view_save(optimize, idx_subscriptable, compressed_image_path
view.save_view(optimize=optimize)
ds.commit()
ds.save_view(optimize=optimize, id=id)
view.save_view(optimize=optimize, id=id) # test overwrite

with pytest.raises(DatasetViewSavingError):
ds.save_view(optimize=optimize, id=id)

view.save_view(optimize=optimize, id=id, overwrite=True) # test overwrite
assert len(ds.get_views()) == 1
view2 = ds.get_views()[0].load()
np.testing.assert_array_equal(view.x.numpy(), view2.x.numpy())
Expand Down

0 comments on commit 3794f91

Please sign in to comment.