From cf2e0f8608016ff94f4879fa0557ff792ad9bdf7 Mon Sep 17 00:00:00 2001 From: Pahulpreet Singh <54016648+codelixir@users.noreply.github.com> Date: Wed, 26 Feb 2025 15:18:53 +0530 Subject: [PATCH] Update hive-lineage.sh (#1306) * [hive-lineage] update hive confwhitelist to allow hive openlineage properties at runtime Signed-off-by: Pahulpreet Singh * [hive-lineage] add KERBEROS clusters to hive lineage test Signed-off-by: Pahulpreet Singh * [hive-lineage] specify openlineage namespace while submitting job in test_hive_lineage Signed-off-by: Pahulpreet Singh --------- Signed-off-by: Pahulpreet Singh --- hive-lineage/hive-lineage.sh | 3 ++- hive-lineage/test_hive_lineage.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/hive-lineage/hive-lineage.sh b/hive-lineage/hive-lineage.sh index 586d60a3d..84da22515 100644 --- a/hive-lineage/hive-lineage.sh +++ b/hive-lineage/hive-lineage.sh @@ -34,7 +34,8 @@ function set_hive_lineage_conf() { ["hive.exec.post.hooks"]="$HIVE_OL_HOOK" ["hive.exec.failure.hooks"]="$HIVE_OL_HOOK" ["hive.openlineage.transport.type"]="gcplineage" - ["hive.conf.validation"]="false" # to allow custom properties, like hive.openlineage.namespace + ["hive.security.authorization.sqlstd.confwhitelist.append"]="tez.application.tags|hive.openlineage.*" + ["hive.conf.validation"]="false" ) echo "Setting hive conf to enable lineage" for key in "${!properties[@]}"; do diff --git a/hive-lineage/test_hive_lineage.py b/hive-lineage/test_hive_lineage.py index 166c700c0..0a3d50e26 100644 --- a/hive-lineage/test_hive_lineage.py +++ b/hive-lineage/test_hive_lineage.py @@ -9,15 +9,20 @@ class HiveLineageTestCase(DataprocTestCase): TEST_SCRIPT_FILE = "hive-lineage/hivetest.hive" def __submit_hive_job(self, cluster_name): - self.assert_dataproc_job( - cluster_name, 'hive', '--file={}/{}'.format(self.INIT_ACTIONS_REPO, - self.TEST_SCRIPT_FILE)) + properties = "hive.openlineage.namespace=init-actions-test" + self.assert_dataproc_job(cluster_name, 'hive', + '--file={}/{} --properties={}'.format( + self.INIT_ACTIONS_REPO, + self.TEST_SCRIPT_FILE, + properties)) + def verify_cluster(self, name): self.__submit_hive_job(name) @parameterized.parameters( 'STANDARD', 'HA', + 'KERBEROS', ) def test_hive_job_success(self, configuration): self.createCluster(configuration,