-
Notifications
You must be signed in to change notification settings - Fork 93
Use TACL cluster in test_all_grant_types
and wait for ANONYMOUS FILE
grant
#3800
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
09c8ebf
cc515c9
f601a27
aaf7549
8da1010
1bddfae
28d2fc4
5087457
24b87cd
7579514
fd7a19e
510a38a
f0dfe18
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,11 +4,11 @@ | |
from collections.abc import Callable, Iterable | ||
|
||
import pytest | ||
from databricks.labs.lsql.backends import StatementExecutionBackend | ||
from databricks.labs.lsql.backends import CommandExecutionBackend, SqlBackend | ||
from databricks.sdk.errors import NotFound | ||
from databricks.sdk.retries import retried | ||
|
||
from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler | ||
from databricks.labs.ucx.hive_metastore.grants import Grant | ||
from databricks.labs.ucx.install import deploy_schema | ||
|
||
from ..conftest import MockRuntimeContext | ||
|
@@ -17,6 +17,16 @@ | |
logger = logging.getLogger(__name__) | ||
|
||
|
||
@pytest.fixture | ||
def sql_backend_tacl(ws, env_or_skip) -> SqlBackend: | ||
"""Ensure the SQL backend used during fixture setup is using the TACL cluster. | ||
|
||
The TACL cluster is used for grants. | ||
""" | ||
cluster_id = env_or_skip("TEST_LEGACY_TABLE_ACL_CLUSTER_ID") | ||
return CommandExecutionBackend(ws, cluster_id) | ||
|
||
|
||
@pytest.fixture() | ||
def _deployed_schema(runtime_ctx) -> None: | ||
"""Ensure that the schemas (and views) are initialized.""" | ||
|
@@ -25,7 +35,7 @@ def _deployed_schema(runtime_ctx) -> None: | |
|
||
|
||
@retried(on=[NotFound, TimeoutError], timeout=dt.timedelta(minutes=3)) | ||
def test_all_grant_types(runtime_ctx: MockRuntimeContext, _deployed_schema: None): | ||
def test_all_grant_types(runtime_ctx: MockRuntimeContext, sql_backend_tacl: SqlBackend, _deployed_schema: None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am mimicking the assessment workflow here:
|
||
"""All types of grants should be reported by the grant_detail view.""" | ||
|
||
# Fixture: a group and schema to hold all the objects, the objects themselves and a grant on each to the group. | ||
|
@@ -34,40 +44,48 @@ def test_all_grant_types(runtime_ctx: MockRuntimeContext, _deployed_schema: None | |
table = runtime_ctx.make_table(schema_name=schema.name) | ||
view = runtime_ctx.make_table(schema_name=schema.name, view=True, ctas="select 1") | ||
udf = runtime_ctx.make_udf(schema_name=schema.name) | ||
runtime_ctx.sql_backend.execute(f"GRANT SELECT ON CATALOG {schema.catalog_name} TO `{group.display_name}`") | ||
runtime_ctx.sql_backend.execute(f"GRANT SELECT ON SCHEMA {schema.full_name} TO `{group.display_name}`") | ||
runtime_ctx.sql_backend.execute(f"GRANT SELECT ON TABLE {table.full_name} TO `{group.display_name}`") | ||
runtime_ctx.sql_backend.execute(f"GRANT SELECT ON VIEW {view.full_name} TO `{group.display_name}`") | ||
runtime_ctx.sql_backend.execute(f"GRANT SELECT ON FUNCTION {udf.full_name} TO `{group.display_name}`") | ||
runtime_ctx.sql_backend.execute(f"GRANT SELECT ON ANY FILE TO `{group.display_name}`") | ||
runtime_ctx.sql_backend.execute(f"GRANT SELECT ON ANONYMOUS FUNCTION TO `{group.display_name}`") | ||
|
||
# Snapshotting tables and udfs to avoid snapshot on TACL cluster during grants crawler | ||
runtime_ctx.tables_crawler.snapshot() | ||
runtime_ctx.udfs_crawler.snapshot() | ||
|
||
# Grants require TACL cluster | ||
ctx = runtime_ctx.replace(sql_backend=sql_backend_tacl) | ||
ctx.sql_backend.execute(f"GRANT SELECT ON CATALOG {schema.catalog_name} TO `{group.display_name}`") | ||
ctx.sql_backend.execute(f"GRANT SELECT ON SCHEMA {schema.full_name} TO `{group.display_name}`") | ||
ctx.sql_backend.execute(f"GRANT SELECT ON TABLE {table.full_name} TO `{group.display_name}`") | ||
ctx.sql_backend.execute(f"GRANT SELECT ON VIEW {view.full_name} TO `{group.display_name}`") | ||
ctx.sql_backend.execute(f"GRANT SELECT ON FUNCTION {udf.full_name} TO `{group.display_name}`") | ||
ctx.sql_backend.execute(f"GRANT SELECT ON ANY FILE TO `{group.display_name}`") | ||
ctx.sql_backend.execute(f"GRANT SELECT ON ANONYMOUS FUNCTION TO `{group.display_name}`") | ||
|
||
@retried(on=[ValueError], timeout=dt.timedelta(minutes=2)) | ||
def wait_for_grants(condition: Callable[[Iterable[Grant]], bool], **kwargs) -> None: | ||
"""Wait for grants to meet the condition. | ||
|
||
The method retries the condition check to account for eventual consistency of the permission API. | ||
""" | ||
grants = runtime_ctx.grants_crawler.grants(**kwargs) | ||
grants = ctx.grants_crawler.grants(**kwargs) | ||
if not condition(grants): | ||
raise ValueError("Grants do not meet condition") | ||
|
||
def contains_select_on_any_file(grants: Iterable[Grant]) -> bool: | ||
def grants_contain_select_action(grants: Iterable[Grant]) -> bool: | ||
"""Check if the SELECT permission on ANY FILE is present in the grants.""" | ||
return any(g.principal == group.display_name and g.action_type == "SELECT" for g in grants) | ||
|
||
# Wait for the grants to be available so that we can snapshot them. | ||
# Only verifying the SELECT permission on ANY FILE as it takes a while to propagate. | ||
wait_for_grants(contains_select_on_any_file, any_file=True) | ||
# Only verifying the SELECT permission on ANY FILE and ANONYMOUS FUNCTION as those take a while to propagate. | ||
wait_for_grants(grants_contain_select_action, any_file=True) | ||
wait_for_grants(grants_contain_select_action, anonymous_function=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This fails with the following error:
This endpoint API docs suggest to use the new API endpoint. However, the choice for API endpoint is not up to us as it we use @gueniai : Could you ask the endpoint team on guidance here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried manually last night and I could make the |
||
|
||
runtime_ctx.grants_crawler.snapshot() | ||
ctx.grants_crawler.snapshot() | ||
|
||
grants_detail_query = f""" | ||
SELECT object_type, object_id | ||
FROM {runtime_ctx.inventory_database}.grant_detail | ||
WHERE principal_type='group' AND principal='{group.display_name}' and action_type='SELECT' | ||
""" | ||
grants = {(row.object_type, row.object_id) for row in runtime_ctx.sql_backend.fetch(grants_detail_query)} | ||
grants = {(row.object_type, row.object_id) for row in ctx.sql_backend.fetch(grants_detail_query)} | ||
|
||
# TODO: The types of objects targeted by grants is missclassified; this needs to be fixed. | ||
|
||
|
@@ -84,30 +102,35 @@ def contains_select_on_any_file(grants: Iterable[Grant]) -> bool: | |
|
||
|
||
@retried(on=[NotFound, TimeoutError], timeout=dt.timedelta(minutes=3)) | ||
def test_grant_findings( | ||
runtime_ctx: MockRuntimeContext, sql_backend: StatementExecutionBackend, _deployed_schema: None | ||
) -> None: | ||
def test_grant_findings(runtime_ctx: MockRuntimeContext, sql_backend_tacl: SqlBackend, _deployed_schema: None) -> None: | ||
"""Test that findings are reported for a grant.""" | ||
|
||
# Fixture: two objects, one with a grant that is okay and the other with a grant that is not okay. | ||
group = runtime_ctx.make_group() | ||
schema = runtime_ctx.make_schema() | ||
# The UDF is not used by the test, but avoids re-crawling UDFs during grants crawling | ||
runtime_ctx.make_udf(schema_name=schema.name) | ||
table_a = runtime_ctx.make_table(schema_name=schema.name) | ||
table_b = runtime_ctx.make_table(schema_name=schema.name) | ||
sql_backend.execute(f"GRANT SELECT ON TABLE {table_a.full_name} TO `{group.display_name}`") | ||
sql_backend.execute(f"DENY SELECT ON TABLE {table_b.full_name} TO `{group.display_name}`") | ||
|
||
# Snapshotting tables and udfs to avoid snapshot on TACL cluster during grants crawler | ||
runtime_ctx.tables_crawler.snapshot() | ||
runtime_ctx.udfs_crawler.snapshot() | ||
|
||
ctx = runtime_ctx.replace(sql_backend=sql_backend_tacl) | ||
ctx.sql_backend.execute(f"GRANT SELECT ON TABLE {table_a.full_name} TO `{group.display_name}`") | ||
ctx.sql_backend.execute(f"DENY SELECT ON TABLE {table_b.full_name} TO `{group.display_name}`") | ||
|
||
# Ensure the view is populated (it's based on the crawled grants) and fetch the content. | ||
GrantsCrawler(runtime_ctx.tables_crawler, runtime_ctx.udfs_crawler).snapshot() | ||
ctx.grants_crawler.snapshot() | ||
|
||
rows = sql_backend.fetch( | ||
f""" | ||
SELECT object_type, object_id, success, failures | ||
FROM {runtime_ctx.inventory_database}.grant_detail | ||
WHERE catalog='{schema.catalog_name}' AND database='{schema.name}' | ||
AND principal_type='group' AND principal='{group.display_name}' | ||
""" | ||
) | ||
grants_detail_query = f""" | ||
SELECT object_type, object_id, success, failures | ||
FROM {runtime_ctx.inventory_database}.grant_detail | ||
WHERE catalog='{schema.catalog_name}' AND database='{schema.name}' | ||
AND principal_type='group' AND principal='{group.display_name}' | ||
""" | ||
rows = ctx.sql_backend.fetch(grants_detail_query) | ||
grants = { | ||
(row.object_type, row.object_id): (row.success, json.loads(row.failures) if row.failures is not None else None) | ||
for row in rows | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I decoupled the grantscrawler from the tables crawler for two reasons:
GrantsCrawler
is expected to run against a differentSqlBackend
in the assessment workflow, namely the TACL cluster.