databrickslabs · JCZuurmond · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025 · Mar 3, 2025
@@ -254,7 +254,13 @@ def group_manager(self) -> GroupManager:
 
     @cached_property
     def grants_crawler(self) -> GrantsCrawler:
-        return GrantsCrawler(self.tables_crawler, self.udfs_crawler, self.config.include_databases)
+        return GrantsCrawler(
+            self.sql_backend,
+            self.inventory_database,
+            self.tables_crawler,
+            self.udfs_crawler,
+            include_databases=self.config.include_databases,
+        )
 
     @cached_property
     def grant_ownership(self) -> GrantOwnership:

@@ -218,13 +218,18 @@ def uc_grant_sql(self, object_type: str | None = None, object_key: str | None =
 class GrantsCrawler(CrawlerBase[Grant]):
     """Crawler that captures access controls that relate to data and other securable objects."""
 
-    def __init__(self, tables_crawler: TablesCrawler, udf: UdfsCrawler, include_databases: list[str] | None = None):
-        assert tables_crawler._sql_backend == udf._sql_backend
-        assert tables_crawler._catalog == udf._catalog
-        assert tables_crawler._schema == udf._schema
-        super().__init__(tables_crawler._sql_backend, tables_crawler._catalog, tables_crawler._schema, "grants", Grant)
+    def __init__(
+        self,
+        sql_backend: SqlBackend,
+        schema: str,
+        tables_crawler: TablesCrawler,
+        udfs_crawler: UdfsCrawler,
+        *,
+        include_databases: list[str] | None = None,
+    ):
+        super().__init__(sql_backend, "hive_metastore", schema, "grants", Grant)
         self._tables_crawler = tables_crawler
-        self._udf = udf
+        self._udfs_crawler = udfs_crawler
         self._include_databases = include_databases
 
     def snapshot(self, *, force_refresh: bool = False) -> Iterable[Grant]:
@@ -286,7 +291,7 @@ def _crawl(self) -> Iterable[Grant]:
             else:
                 task = partial(fn, view=table.name)
             tasks.append(task)
-        for udf in self._udf.snapshot():
+        for udf in self._udfs_crawler.snapshot():
             fn = partial(self.grants, catalog=catalog, database=udf.database)
             tasks.append(partial(fn, udf=udf.name))
         catalog_grants, errors = Threads.gather(f"listing grants for {catalog}", tasks)

@@ -4,11 +4,11 @@
 from collections.abc import Callable, Iterable
 
 import pytest
-from databricks.labs.lsql.backends import StatementExecutionBackend
+from databricks.labs.lsql.backends import CommandExecutionBackend, SqlBackend
 from databricks.sdk.errors import NotFound
 from databricks.sdk.retries import retried
 
-from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler
+from databricks.labs.ucx.hive_metastore.grants import Grant
 from databricks.labs.ucx.install import deploy_schema
 
 from ..conftest import MockRuntimeContext
@@ -17,6 +17,16 @@
 logger = logging.getLogger(__name__)
 
 
+@pytest.fixture
+def sql_backend_tacl(ws, env_or_skip) -> SqlBackend:
+    """Ensure the SQL backend used during fixture setup is using the TACL cluster.
+
+    The TACL cluster is used for grants.
+    """
+    cluster_id = env_or_skip("TEST_LEGACY_TABLE_ACL_CLUSTER_ID")
+    return CommandExecutionBackend(ws, cluster_id)
+
+
 @pytest.fixture()
 def _deployed_schema(runtime_ctx) -> None:
     """Ensure that the schemas (and views) are initialized."""
@@ -25,7 +35,7 @@ def _deployed_schema(runtime_ctx) -> None:
 
 
 @retried(on=[NotFound, TimeoutError], timeout=dt.timedelta(minutes=3))
-def test_all_grant_types(runtime_ctx: MockRuntimeContext, _deployed_schema: None):
+def test_all_grant_types(runtime_ctx: MockRuntimeContext, sql_backend_tacl: SqlBackend, _deployed_schema: None):
     """All types of grants should be reported by the grant_detail view."""
 
     # Fixture: a group and schema to hold all the objects, the objects themselves and a grant on each to the group.
@@ -34,40 +44,48 @@ def test_all_grant_types(runtime_ctx: MockRuntimeContext, _deployed_schema: None
     table = runtime_ctx.make_table(schema_name=schema.name)
     view = runtime_ctx.make_table(schema_name=schema.name, view=True, ctas="select 1")
     udf = runtime_ctx.make_udf(schema_name=schema.name)
-    runtime_ctx.sql_backend.execute(f"GRANT SELECT ON CATALOG {schema.catalog_name} TO `{group.display_name}`")
-    runtime_ctx.sql_backend.execute(f"GRANT SELECT ON SCHEMA {schema.full_name} TO `{group.display_name}`")
-    runtime_ctx.sql_backend.execute(f"GRANT SELECT ON TABLE {table.full_name} TO `{group.display_name}`")
-    runtime_ctx.sql_backend.execute(f"GRANT SELECT ON VIEW {view.full_name} TO `{group.display_name}`")
-    runtime_ctx.sql_backend.execute(f"GRANT SELECT ON FUNCTION {udf.full_name} TO `{group.display_name}`")
-    runtime_ctx.sql_backend.execute(f"GRANT SELECT ON ANY FILE TO `{group.display_name}`")
-    runtime_ctx.sql_backend.execute(f"GRANT SELECT ON ANONYMOUS FUNCTION TO `{group.display_name}`")
+
+    # Snapshotting tables and udfs to avoid snapshot on TACL cluster during grants crawler
+    runtime_ctx.tables_crawler.snapshot()
+    runtime_ctx.udfs_crawler.snapshot()
+
+    # Grants require TACL cluster
+    ctx = runtime_ctx.replace(sql_backend=sql_backend_tacl)
+    ctx.sql_backend.execute(f"GRANT SELECT ON CATALOG {schema.catalog_name} TO `{group.display_name}`")
+    ctx.sql_backend.execute(f"GRANT SELECT ON SCHEMA {schema.full_name} TO `{group.display_name}`")
+    ctx.sql_backend.execute(f"GRANT SELECT ON TABLE {table.full_name} TO `{group.display_name}`")
+    ctx.sql_backend.execute(f"GRANT SELECT ON VIEW {view.full_name} TO `{group.display_name}`")
+    ctx.sql_backend.execute(f"GRANT SELECT ON FUNCTION {udf.full_name} TO `{group.display_name}`")
+    ctx.sql_backend.execute(f"GRANT SELECT ON ANY FILE TO `{group.display_name}`")
+    ctx.sql_backend.execute(f"GRANT SELECT ON ANONYMOUS FUNCTION TO `{group.display_name}`")
 
     @retried(on=[ValueError], timeout=dt.timedelta(minutes=2))
     def wait_for_grants(condition: Callable[[Iterable[Grant]], bool], **kwargs) -> None:
         """Wait for grants to meet the condition.
 
         The method retries the condition check to account for eventual consistency of the permission API.
         """
-        grants = runtime_ctx.grants_crawler.grants(**kwargs)
+        grants = ctx.grants_crawler.grants(**kwargs)
         if not condition(grants):
             raise ValueError("Grants do not meet condition")
 
-    def contains_select_on_any_file(grants: Iterable[Grant]) -> bool:
+    def grants_contain_select_action(grants: Iterable[Grant]) -> bool:
         """Check if the SELECT permission on ANY FILE is present in the grants."""
         return any(g.principal == group.display_name and g.action_type == "SELECT" for g in grants)
 
     # Wait for the grants to be available so that we can snapshot them.
-    # Only verifying the SELECT permission on ANY FILE as it takes a while to propagate.
-    wait_for_grants(contains_select_on_any_file, any_file=True)
+    # Only verifying the SELECT permission on ANY FILE and ANONYMOUS FUNCTION as those take a while to propagate.
+    wait_for_grants(grants_contain_select_action, any_file=True)
+    wait_for_grants(grants_contain_select_action, anonymous_function=True)
 
-    runtime_ctx.grants_crawler.snapshot()
+    ctx.grants_crawler.snapshot()
 
     grants_detail_query = f"""
         SELECT object_type, object_id
         FROM {runtime_ctx.inventory_database}.grant_detail
         WHERE principal_type='group' AND principal='{group.display_name}' and action_type='SELECT'
     """
-    grants = {(row.object_type, row.object_id) for row in runtime_ctx.sql_backend.fetch(grants_detail_query)}
+    grants = {(row.object_type, row.object_id) for row in ctx.sql_backend.fetch(grants_detail_query)}
 
     # TODO: The types of objects targeted by grants is missclassified; this needs to be fixed.
 
@@ -84,30 +102,35 @@ def contains_select_on_any_file(grants: Iterable[Grant]) -> bool:
 
 
 @retried(on=[NotFound, TimeoutError], timeout=dt.timedelta(minutes=3))
-def test_grant_findings(
-    runtime_ctx: MockRuntimeContext, sql_backend: StatementExecutionBackend, _deployed_schema: None
-) -> None:
+def test_grant_findings(runtime_ctx: MockRuntimeContext, sql_backend_tacl: SqlBackend, _deployed_schema: None) -> None:
     """Test that findings are reported for a grant."""
 
     # Fixture: two objects, one with a grant that is okay and the other with a grant that is not okay.
     group = runtime_ctx.make_group()
     schema = runtime_ctx.make_schema()
+    # The UDF is not used by the test, but avoids re-crawling UDFs during grants crawling
+    runtime_ctx.make_udf(schema_name=schema.name)
     table_a = runtime_ctx.make_table(schema_name=schema.name)
     table_b = runtime_ctx.make_table(schema_name=schema.name)
-    sql_backend.execute(f"GRANT SELECT ON TABLE {table_a.full_name} TO `{group.display_name}`")
-    sql_backend.execute(f"DENY SELECT ON TABLE {table_b.full_name} TO `{group.display_name}`")
+
+    # Snapshotting tables and udfs to avoid snapshot on TACL cluster during grants crawler
+    runtime_ctx.tables_crawler.snapshot()
+    runtime_ctx.udfs_crawler.snapshot()
+
+    ctx = runtime_ctx.replace(sql_backend=sql_backend_tacl)
+    ctx.sql_backend.execute(f"GRANT SELECT ON TABLE {table_a.full_name} TO `{group.display_name}`")
+    ctx.sql_backend.execute(f"DENY SELECT ON TABLE {table_b.full_name} TO `{group.display_name}`")
 
     # Ensure the view is populated (it's based on the crawled grants) and fetch the content.
-    GrantsCrawler(runtime_ctx.tables_crawler, runtime_ctx.udfs_crawler).snapshot()
+    ctx.grants_crawler.snapshot()
 
-    rows = sql_backend.fetch(
-        f"""
-        SELECT object_type, object_id, success, failures
-        FROM {runtime_ctx.inventory_database}.grant_detail
-        WHERE catalog='{schema.catalog_name}' AND database='{schema.name}'
-          AND principal_type='group' AND principal='{group.display_name}'
-        """
-    )
+    grants_detail_query = f"""
+    SELECT object_type, object_id, success, failures
+    FROM {runtime_ctx.inventory_database}.grant_detail
+    WHERE catalog='{schema.catalog_name}' AND database='{schema.name}'
+      AND principal_type='group' AND principal='{group.display_name}'
+    """
+    rows = ctx.sql_backend.fetch(grants_detail_query)
     grants = {
         (row.object_type, row.object_id): (row.success, json.loads(row.failures) if row.failures is not None else None)
         for row in rows

@@ -9,9 +9,7 @@
 from databricks.labs.lsql.backends import StatementExecutionBackend
 
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
-from databricks.labs.ucx.hive_metastore import TablesCrawler
-from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler, GrantOwnership
-from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler
+from databricks.labs.ucx.hive_metastore.grants import GrantOwnership
 from ..conftest import MockRuntimeContext
 
 logger = logging.getLogger(__name__)
@@ -123,22 +121,20 @@ def test_all_grants_for_other_objects(
     assert {"DENIED_SELECT"} == found_anonymous_function_grants[group_d.display_name]
 
 
-def test_grant_ownership(ws, runtime_ctx, inventory_schema, sql_backend, make_random, make_acc_group) -> None:
+def test_grant_ownership(ws, runtime_ctx, make_random, make_acc_group) -> None:
     """Verify the ownership can be determined for crawled grants.
     This currently isn't very useful: we can't locate specific owners for grants"""
 
     schema = runtime_ctx.make_schema()
     this_user = ws.current_user.me()
-    sql_backend.execute(f"GRANT SELECT ON SCHEMA {escape_sql_identifier(schema.full_name)} TO `{this_user.user_name}`")
-    table_crawler = TablesCrawler(sql_backend, schema=inventory_schema, include_databases=[schema.name])
-    udf_crawler = UdfsCrawler(sql_backend, schema=inventory_schema, include_databases=[schema.name])
+    runtime_ctx.sql_backend.execute(
+        f"GRANT SELECT ON SCHEMA {escape_sql_identifier(schema.full_name)} TO `{this_user.user_name}`"
+    )
     current_user = ws.current_user.me()
     admin_group_name = f"admin_group_{make_random()}"
     make_acc_group(display_name=admin_group_name, members=[current_user.id], wait_for_provisioning=True)
 
-    # Produce the crawled records.
-    crawler = GrantsCrawler(table_crawler, udf_crawler, include_databases=[schema.name])
-    records = crawler.snapshot(force_refresh=True)
+    records = runtime_ctx.grants_crawler.snapshot(force_refresh=True)
 
     # Find the crawled record for the grant we made.
     grant_record = next(record for record in records if record.this_type_and_key() == ("DATABASE", schema.full_name))

@@ -190,7 +190,7 @@ def test_crawler_no_data() -> None:
     sql_backend = MockBackend()
     table = TablesCrawler(sql_backend, "schema")
     udf = UdfsCrawler(sql_backend, "schema")
-    crawler = GrantsCrawler(table, udf)
+    crawler = GrantsCrawler(sql_backend, "test", table, udf)
     grants = list(crawler.snapshot())
     assert len(grants) == 0
 
@@ -252,7 +252,7 @@ def test_crawler_crawl() -> None:
     }
     table = TablesCrawler(sql_backend, "schema")
     udf = UdfsCrawler(sql_backend, "schema")
-    crawler = GrantsCrawler(table, udf)
+    crawler = GrantsCrawler(sql_backend, "test", table, udf)
     grants = list(crawler.snapshot())
     assert len(grants) == len(expected_grants) and set(grants) == expected_grants
 
@@ -301,7 +301,7 @@ def test_crawler_udf_crawl() -> None:
 
     table = TablesCrawler(sql_backend, "schema")
     udf = UdfsCrawler(sql_backend, "schema")
-    crawler = GrantsCrawler(table, udf)
+    crawler = GrantsCrawler(sql_backend, "test", table, udf)
     grants = list(crawler.snapshot())
 
     assert len(grants) == len(expected_grants) and set(grants) == expected_grants
@@ -311,7 +311,7 @@ def test_crawler_snapshot_when_no_data() -> None:
     sql_backend = MockBackend()
     table = TablesCrawler(sql_backend, "schema")
     udf = UdfsCrawler(sql_backend, "schema")
-    crawler = GrantsCrawler(table, udf)
+    crawler = GrantsCrawler(sql_backend, "test", table, udf)
     snapshot = list(crawler.snapshot())
     assert len(snapshot) == 0
 
@@ -320,7 +320,7 @@ def test_crawler_snapshot_with_data() -> None:
     sql_backend = MockBackend(rows=ROWS)
     table = TablesCrawler(sql_backend, "schema")
     udf = UdfsCrawler(sql_backend, "schema")
-    crawler = GrantsCrawler(table, udf)
+    crawler = GrantsCrawler(sql_backend, "test", table, udf)
     snapshot = list(crawler.snapshot())
     assert len(snapshot) == 3
 
@@ -345,10 +345,10 @@ def test_grants_returning_error_when_showing_grants() -> None:
         ],
     }
 
-    backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "default")
-    udf = UdfsCrawler(backend, "default")
-    crawler = GrantsCrawler(table_crawler, udf)
+    sql_backend = MockBackend(fails_on_first=errors, rows=rows)
+    table_crawler = TablesCrawler(sql_backend, "default")
+    udf = UdfsCrawler(sql_backend, "default")
+    crawler = GrantsCrawler(sql_backend, "test", table_crawler, udf)
 
     results = list(crawler.snapshot())
     assert results == [
@@ -381,10 +381,10 @@ def test_grants_returning_error_when_describing() -> None:
         ],
     }
 
-    backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "default")
-    udf = UdfsCrawler(backend, "default")
-    crawler = GrantsCrawler(table_crawler, udf)
+    sql_backend = MockBackend(fails_on_first=errors, rows=rows)
+    table_crawler = TablesCrawler(sql_backend, "default")
+    udf = UdfsCrawler(sql_backend, "default")
+    crawler = GrantsCrawler(sql_backend, "test", table_crawler, udf)
 
     results = list(crawler.snapshot())
     assert results == [
@@ -420,10 +420,10 @@ def test_udf_grants_returning_error_when_showing_grants() -> None:
         ],
     }
 
-    backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "default")
-    udf = UdfsCrawler(backend, "default")
-    crawler = GrantsCrawler(table_crawler, udf)
+    sql_backend = MockBackend(fails_on_first=errors, rows=rows)
+    table_crawler = TablesCrawler(sql_backend, "default")
+    udf = UdfsCrawler(sql_backend, "default")
+    crawler = GrantsCrawler(sql_backend, "test", table_crawler, udf)
 
     results = list(crawler.snapshot())
     assert results == [
@@ -456,10 +456,10 @@ def test_udf_grants_returning_error_when_describing() -> None:
         ],
     }
 
-    backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "default")
-    udf = UdfsCrawler(backend, "default")
-    crawler = GrantsCrawler(table_crawler, udf)
+    sql_backend = MockBackend(fails_on_first=errors, rows=rows)
+    table_crawler = TablesCrawler(sql_backend, "default")
+    udf = UdfsCrawler(sql_backend, "default")
+    crawler = GrantsCrawler(sql_backend, "test", table_crawler, udf)
 
     results = list(crawler.snapshot())
     assert results == [
@@ -504,7 +504,7 @@ def test_crawler_should_filter_databases() -> None:
 
     table = TablesCrawler(sql_backend, "schema", include_databases=["database_one"])
     udf = UdfsCrawler(sql_backend, "schema", include_databases=["database_one"])
-    crawler = GrantsCrawler(table, udf, include_databases=["database_one"])
+    crawler = GrantsCrawler(sql_backend, "test", table, udf, include_databases=["database_one"])
     grants = list(crawler.snapshot())
 
     assert "SHOW DATABASES" not in sql_backend.queries