Skip to content

Commit

Permalink
[asset-graph-view] Make deserializing subsets safer (#25340)
Browse files Browse the repository at this point in the history
## Summary & Motivation

As title. This guards against situations in which an asset is completely removed from the graph and we try to get an EntitySubset from a SerializedEntitySubset.

## How I Tested These Changes

## Changelog

NOCHANGELOG
  • Loading branch information
OwenKephart authored Oct 22, 2024
1 parent 5e5af4a commit 3fec910
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,12 @@ def get_empty_subset(self, *, key: T_EntityKey) -> EntitySubset[T_EntityKey]:
def get_subset_from_serializable_subset(
self, serializable_subset: SerializableEntitySubset[T_EntityKey]
) -> Optional[EntitySubset[T_EntityKey]]:
if serializable_subset.is_compatible_with_partitions_def(
self._get_partitions_def(serializable_subset.key)
key = serializable_subset.key
if self.asset_graph.has(key) and serializable_subset.is_compatible_with_partitions_def(
self._get_partitions_def(key)
):
return EntitySubset(
self,
key=serializable_subset.key,
value=_ValidatedEntitySubsetValue(serializable_subset.value),
self, key=key, value=_ValidatedEntitySubsetValue(serializable_subset.value)
)
else:
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ def asset_nodes(self) -> Iterable[T_AssetNode]:
def nodes(self) -> Iterable[BaseEntityNode]:
return [*self._asset_nodes_by_key.values(), *self._asset_check_nodes_by_key.values()]

def has(self, asset_key: AssetKey) -> bool:
return asset_key in self._asset_nodes_by_key
def has(self, key: EntityKey) -> bool:
return key in self._asset_nodes_by_key or key in self._asset_check_nodes_by_key

@overload
def get(self, key: AssetKey) -> T_AssetNode: ...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
PartitionsDefinition,
StaticPartitionsDefinition,
asset,
deserialize_value,
serialize_value,
)
from dagster._core.asset_graph_view.asset_graph_view import AssetGraphView
from dagster._core.asset_graph_view.asset_graph_view import AssetGraphView, SerializableEntitySubset

partitions_defs = [
None,
Expand Down Expand Up @@ -65,3 +67,31 @@ def _assert_matches_operation(res, oper):
_assert_matches_operation(and_res, operator.and_)
sub_res = a.compute_difference(b)
_assert_matches_operation(sub_res, operator.sub)


@pytest.mark.parametrize("partitions_def", partitions_defs)
def test_round_trip(partitions_def: Optional[PartitionsDefinition]) -> None:
@asset(partitions_def=partitions_def)
def foo() -> None: ...

defs = Definitions([foo])
instance = DagsterInstance.ephemeral()
asset_graph_view = AssetGraphView.for_test(defs, instance)

initial_subset = asset_graph_view.get_full_subset(key=foo.key)

inner_subset = deserialize_value(
serialize_value(initial_subset.convert_to_serializable_subset()), SerializableEntitySubset
)
subset = asset_graph_view.get_subset_from_serializable_subset(inner_subset)

assert subset is not None
assert (
initial_subset.expensively_compute_asset_partitions()
== subset.expensively_compute_asset_partitions()
)

# if asset is removed, don't error just return None
empty_asset_graph_view = AssetGraphView.for_test(Definitions(), instance)
subset = empty_asset_graph_view.get_subset_from_serializable_subset(inner_subset)
assert subset is None

0 comments on commit 3fec910

Please sign in to comment.