Skip to content

Commit

Permalink
hotfix: release v1.7.1 (#3090)
Browse files Browse the repository at this point in the history
* feat(core): add existing data directory files to dataset on creation
  • Loading branch information
Panaetius authored Sep 6, 2022
1 parent 8358a0e commit 038c114
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 13 deletions.
15 changes: 15 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@
Changes
=======

`1.7.1 <https://github.com/SwissDataScienceCenter/renku-python/compare/v1.7.0...v1.7.1>`__ (2022-09-06)
-------------------------------------------------------------------------------------------------------

Bug Fixes
~~~~~~~~~

- **cli:** fix bug with adding file to dataset that's already in its data directory
(`#3090 <https://github.com/SwissDataScienceCenter/renku-python/pull/3090>`__)

Features
~~~~~~~~

- **cli:** add existing data directory files to dataset on creation
(`#3090 <https://github.com/SwissDataScienceCenter/renku-python/pull/3090>`__)

`1.7.0 <https://github.com/SwissDataScienceCenter/renku-python/compare/v1.6.0...v1.7.0>`__ (2022-09-05)
-------------------------------------------------------------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion helm-chart/renku-core/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ appVersion: "1.0"
description: A Helm chart for Kubernetes
name: renku-core
icon: https://avatars0.githubusercontent.com/u/53332360?s=400&u=a4311d22842343604ef61a8c8a1e5793209a67e9&v=4
version: 1.7.0
version: 1.7.1
2 changes: 1 addition & 1 deletion helm-chart/renku-core/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ versions:
fullnameOverride: ""
image:
repository: renku/renku-core
tag: "v1.7.0"
tag: "v1.7.1"
pullPolicy: IfNotPresent
v8:
name: v8
Expand Down
36 changes: 34 additions & 2 deletions renku/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
delete_dataset_file,
delete_path,
get_absolute_path,
get_files,
get_safe_relative_path,
hash_file,
is_path_empty,
Expand Down Expand Up @@ -180,6 +181,8 @@ def create_dataset(
if storage:
provider = ProviderFactory.get_create_provider(uri=storage)
provider.on_create(dataset=dataset)
else:
add_datadir_files_to_dataset(client, dataset)

if update_provenance:
datasets_provenance.add_or_update(dataset)
Expand Down Expand Up @@ -794,11 +797,38 @@ def show_dataset(name: str, tag: Optional[str] = None):
return DatasetDetailsJson().dump(dataset)


def add_datadir_files_to_dataset(client: "LocalClient", dataset: Dataset) -> None:
"""Add all files in a datasets data directory to the dataset.
Args:
client(LocalClient): The ``LocalClient``.
dataset(Dataset): The dataset to add data dir files to.
"""
datadir = get_safe_relative_path(dataset.get_datadir(), client.path)

if datadir.exists():
# NOTE: Add existing files to dataset
dataset_files: List[DatasetFile] = []
files: List[Path] = []
for file in get_files(datadir):
files.append(file)
dataset_files.append(DatasetFile.from_path(client=client, path=file, source=file))

if not dataset_files:
return

if client.check_external_storage():
client.track_paths_in_storage(*files)
client.repository.add(*files)

dataset.add_or_update_files(dataset_files)


def set_dataset_images(client: "LocalClient", dataset: Dataset, images: Optional[List[ImageRequestModel]]):
"""Set a dataset's images.
Args:
client("LocalClient"): The ``LocalClient``.
client(LocalClient): The ``LocalClient``.
dataset(Dataset): The dataset to set images on.
images(List[ImageRequestModel]): The images to set.
Expand Down Expand Up @@ -1238,7 +1268,9 @@ def pull_external_data(
"""Pull/copy data for an external storage to a dataset's data directory or a specified location.
Args:
name(str): Name of the dataset
client_dispatcher(IClientDispatcher): The client dispatcher.
storage_factory(IStorageFactory): The storage factory.
name(str): Name of the dataset.
location(Optional[Path]): A directory to copy data to (Default value = None).
"""
client = client_dispatcher.current_client
Expand Down
8 changes: 7 additions & 1 deletion renku/core/dataset/dataset_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from renku.core.util.dataset import check_url
from renku.core.util.dispatcher import get_client, get_database
from renku.core.util.git import get_git_user
from renku.core.util.os import delete_dataset_file, get_relative_path
from renku.core.util.os import delete_dataset_file, get_files, get_relative_path
from renku.domain_model.dataset import Dataset, DatasetFile

if TYPE_CHECKING:
Expand Down Expand Up @@ -83,6 +83,12 @@ def add_to_dataset(

client.check_external_storage() # TODO: This is not required for external storages

datadir = cast(Path, client.path / dataset.get_datadir())
if create and datadir.exists():
# NOTE: Add datadir to paths to add missing files on create
for file in get_files(datadir):
urls.append(str(file))

files = _download_files(
client=client,
urls=urls,
Expand Down
13 changes: 8 additions & 5 deletions renku/core/dataset/providers/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from renku.core.dataset.providers.api import ExporterApi, ProviderApi, ProviderPriority
from renku.core.util import communication
from renku.core.util.dataset import check_url
from renku.core.util.os import get_absolute_path, is_path_empty
from renku.core.util.os import get_absolute_path, is_path_empty, is_subpath

if TYPE_CHECKING:
from renku.core.dataset.providers.models import DatasetAddMetadata, ProviderParameter
Expand Down Expand Up @@ -166,6 +166,7 @@ def get_destination_root():

def get_metadata(src: Path) -> DatasetAddMetadata:
is_tracked = client.repository.contains(src)
in_datadir = is_subpath(src, absolute_dataset_data_dir)

relative_path = src.relative_to(source_root)
dst = destination_root / relative_path
Expand All @@ -175,12 +176,14 @@ def get_metadata(src: Path) -> DatasetAddMetadata:

if not is_tracked and not external and action == DatasetAddAction.SYMLINK:
# NOTE: we need to commit src if it is linked to and not external.
if client.check_external_storage():
client.track_paths_in_storage(src)
client.repository.add(src)

source_url = os.path.relpath(src, client.path)
return DatasetAddMetadata(
entity_path=dst.relative_to(client.path),
url=os.path.relpath(src, client.path),
action=action,
entity_path=Path(source_url) if in_datadir else dst.relative_to(client.path),
url=source_url,
action=DatasetAddAction.NONE if in_datadir else action,
source=src,
destination=dst,
)
Expand Down
2 changes: 1 addition & 1 deletion renku/ui/cli/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,7 +818,7 @@ def add(name, urls, force, overwrite, create, destination, datadir, **kwargs):
.with_communicator(communicator)
.build()
.execute(
urls=urls,
urls=list(urls),
dataset_name=name,
force=force,
overwrite=overwrite,
Expand Down
2 changes: 1 addition & 1 deletion renku/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
except ImportError:
from importlib_metadata import distribution # type: ignore

__version__ = "1.7.0"
__version__ = "1.7.1"
__template_version__ = "0.3.1"
__minimum_project_version__ = "1.7.0"

Expand Down
32 changes: 31 additions & 1 deletion tests/cli/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,27 @@ def test_datasets_create_clean_with_datadir(runner, project, client, load_datase
assert not client.repository.is_dirty(untracked_files=True)


def test_datasets_create_with_datadir_with_files(runner, project, client, load_dataset_with_injection):
"""Test creating a dataset in clean repository."""

datadir = Path("my/data/dir")
datadir.mkdir(parents=True, exist_ok=True)

file = datadir / "my_file"
file.write_text("content")

result = runner.invoke(cli, ["dataset", "create", "--datadir", datadir, "dataset"])
assert 0 == result.exit_code, format_result_exception(result)
assert "OK" in result.output

dataset = load_dataset_with_injection("dataset", client)
assert isinstance(dataset, Dataset)
assert datadir == dataset.get_datadir(client)
assert dataset.find_file(file)

assert not client.repository.is_dirty(untracked_files=True)


def test_datasets_create_dirty(runner, project, client, load_dataset_with_injection):
"""Test creating a dataset in a dirty repository."""
(client.path / "untracked").write_text("untracked")
Expand Down Expand Up @@ -475,6 +496,13 @@ def test_add_and_create_dataset(
assert 1 == result.exit_code
assert 'Dataset "new-dataset" does not exist.' in result.output

existing_file = client.path / datadir / "myfolder" / "myfile"
existing_file.parent.mkdir(parents=True, exist_ok=True)
existing_file.write_text("content")

existing_folder = client.path / datadir / "my_other_folder"
existing_folder.mkdir(parents=True, exist_ok=True)

# Add succeeds with --create
result = runner.invoke(
cli,
Expand All @@ -491,7 +519,9 @@ def test_add_and_create_dataset(
assert os.stat(path2)
assert os.stat(path3)
dataset = load_dataset_with_injection("new-dataset", client)
assert {os.path.relpath(p, client.path) for p in [path1, path2, path3]} == {f.entity.path for f in dataset.files}
assert {os.path.relpath(p, client.path) for p in [path1, path2, path3, existing_file]} == {
f.entity.path for f in dataset.files
}

# Further, add with --create fails
result = runner.invoke(cli, ["dataset", "add", "--copy", "--create", "new-dataset", str(directory_tree)])
Expand Down

0 comments on commit 038c114

Please sign in to comment.