fixing presubmit failures for the respected components (#1133)

GoogleCloudDataproc · Feb 8, 2024 · 3692b80 · 3692b80
1 parent 950ad89
commit 3692b80
Show file tree

Hide file tree

Showing 25 changed files with 138 additions and 37 deletions.
diff --git a/BUILD b/BUILD
@@ -61,7 +61,7 @@ py_test(
     name = "test_cloud_sql_proxy",
     size = "enormous",
     srcs = ["cloud-sql-proxy/test_cloud_sql_proxy.py"],
-    data = ["cloud-sql-proxy/cloud-sql-proxy.sh"],
+    data = ["cloud-sql-proxy/cloud-sql-proxy.sh", "cloud-sql-proxy/hivetest.hive"],
     local = True,
     shard_count = 3,
     deps = [

diff --git a/alluxio/test_alluxio.py b/alluxio/test_alluxio.py
@@ -1,3 +1,5 @@
+import pkg_resources
+
 from integration_tests.dataproc_test_case import DataprocTestCase
 
 from absl.testing import absltest
@@ -35,6 +37,10 @@ def test_alluxio_with_presto(self, configuration, machine_suffixes):
     if self.getImageOs() == 'rocky':
       self.skipTest("Not supported in Rocky Linux-based images")
 
+    # Skip on 2.0+ version of Dataproc because it's not supported
+    if self.getImageVersion() >= pkg_resources.parse_version("2.0"):
+      self.skipTest("Not supported in 2.0+ images")
+
     self.createCluster(
         configuration,
         init_actions=self.INIT_ACTIONS,

diff --git a/atlas/test_atlas.py b/atlas/test_atlas.py
@@ -40,11 +40,15 @@ def verify_instance(self, instance, username='admin', password='admin'):
         # Upload files to populate Atlas and to verify it
         populate_atlas_path = os.path.join(
             os.path.dirname(os.path.abspath(__file__)), self.POPULATE_SCRIPT)
-        self.assert_command('gcloud compute scp {} {}:/tmp'.format(populate_atlas_path, instance))
+        self.assert_command('gcloud compute scp --zone={} {} {}:/tmp'.format(self.cluster_zone,
+                                                                             populate_atlas_path,
+                                                                             instance))
 
         validate_atlas_path = os.path.join(
             os.path.dirname(os.path.abspath(__file__)), self.VALIDATE_SCRIPT)
-        self.assert_command('gcloud compute scp {} {}:/tmp'.format(validate_atlas_path, instance))
+        self.assert_command('gcloud compute scp --zone={} {} {}:/tmp'.format(self.cluster_zone,
+                                                                             validate_atlas_path,
+                                                                             instance))
 
         self.assert_instance_command(
                     instance, "chmod +x /tmp/{}".format(self.POPULATE_SCRIPT))
@@ -90,7 +94,6 @@ def verify_instance(self, instance, username='admin', password='admin'):
     @parameterized.parameters(
         ("SINGLE", ["m"]),
         ("STANDARD", ["m"]),
-        ("HA", ["m-0", "m-1", "m-2"]),
     )
     def test_atlas(self, configuration, machine_suffixes):
         if self.getImageOs() == 'rocky':
@@ -110,15 +113,13 @@ def test_atlas(self, configuration, machine_suffixes):
             metadata = 'run-on-master=true'
             self.createCluster(configuration,
                                        init_actions,
-                                       beta=True,
                                        metadata=metadata,
                                        timeout_in_minutes=30,
                                        optional_components=optional_components,
                                        machine_type="e2-standard-4")
         else:
             self.createCluster(configuration,
                                init_actions,
-                               beta=True,
                                timeout_in_minutes=30,
                                optional_components=optional_components,
                                machine_type="e2-standard-4")
@@ -163,7 +164,6 @@ def test_atlas_overrides_admin_credentials(self, configuration,
             username, password_sha256)
       self.createCluster(configuration,
                          self.INIT_ACTIONS,
-                         beta=True,
                          timeout_in_minutes=30,
                          metadata=metadata,
                          optional_components=self.OPTIONAL_COMPONENTS,
@@ -187,7 +187,6 @@ def test_atlas_fails_without_component(self, component):
           self.createCluster(
               "SINGLE",
               self.INIT_ACTIONS,
-              beta=True,
               timeout_in_minutes=30,
               machine_type="e2-standard-4",
               optional_components=self.OPTIONAL_COMPONENTS.remove(component))
@@ -203,7 +202,6 @@ def test_atlas_ha_fails_without_kafka(self):
             self.createCluster("HA",
                                self.INIT_ACTIONS,
                                timeout_in_minutes=30,
-                               beta=True,
                                machine_type="e2-standard-4",
                                optional_components=self.OPTIONAL_COMPONENTS_HA)
 

diff --git a/bigtable/test_bigtable.py b/bigtable/test_bigtable.py
@@ -48,6 +48,7 @@ def tearDown(self):
             self.db_name))
 
     def _validate_bigtable(self):
+        self.assert_command('gcloud components install cbt')
         _, stdout, _ = self.assert_command(
             'cbt -instance {} count test-bigtable '.format(self.db_name))
         self.assertEqual(

diff --git a/cloud-sql-proxy/hivetest.hive b/cloud-sql-proxy/hivetest.hive
@@ -0,0 +1,25 @@
+DROP TABLE IF EXISTS validate_hive_tbl;
+DROP TABLE IF EXISTS grouped_tbl;
+
+-- TODO(sidhom): set fs.default.name=... if using gs://
+
+CREATE EXTERNAL TABLE validate_hive_tbl (
+  shell_user STRING,
+  dummy STRING,
+  uid INT,
+  gid INT,
+  name STRING,
+  home STRING,
+  shell STRING
+)
+ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ':';
+
+CREATE TABLE grouped_tbl
+  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+AS SELECT shell, COUNT(*) shell_count
+  FROM validate_hive_tbl
+  GROUP BY shell
+  ORDER BY shell_count DESC, shell DESC;
+
+SELECT * from grouped_tbl;
diff --git a/cloud-sql-proxy/test_cloud_sql_proxy.py b/cloud-sql-proxy/test_cloud_sql_proxy.py
@@ -10,7 +10,7 @@
 class CloudSqlProxyTestCase(DataprocTestCase):
   COMPONENT = 'cloud-sql-proxy'
   INIT_ACTIONS = ['cloud-sql-proxy/cloud-sql-proxy.sh']
-  TEST_SCRIPT_FILE_NAME = 'cloud-sql-proxy/pyspark_metastore_test.py'
+  TEST_SCRIPT_FILE_NAME = 'cloud-sql-proxy/hivetest.hive'
   DB_NAME = None
 
   @classmethod
@@ -43,7 +43,12 @@ def wait_cloud_sql_operation(self, operation_id):
         'gcloud sql operations wait {} --timeout=600'.format(operation_id))
 
   def verify_cluster(self, name):
-    self.__submit_pyspark_job(name)
+    self.__submit_hive_job(name)
+
+  def __submit_hive_job(self, cluster_name):
+      self.assert_dataproc_job(
+          cluster_name, 'hive', '--file={}/{}'.format(self.INIT_ACTIONS_REPO,
+                                                      self.TEST_SCRIPT_FILE_NAME))
 
   def __submit_pyspark_job(self, cluster_name):
     self.assert_dataproc_job(

diff --git a/dask/dask.sh b/dask/dask.sh
@@ -53,7 +53,7 @@ if [[ "${DASK_RUNTIME}" == 'yarn' ]]; then
 fi
 # Downgrade `google-cloud-bigquery` on Dataproc 2.0
 # to fix compatibility with old Arrow version
-if [[ "${DATAPROC_VERSION}" == '2.0' ]]; then
+if [[ "${DATAPROC_IMAGE_VERSION}" == '2.0' ]]; then
   CONDA_PACKAGES+=('google-cloud-bigquery=2')
 fi
 readonly CONDA_PACKAGES

diff --git a/drill/test_drill.py b/drill/test_drill.py
@@ -43,9 +43,7 @@ def test_drill(self, configuration, verify_options):
             init_actions = self.INIT_ACTIONS_FOR_STANDARD + init_actions
         self.createCluster(configuration, init_actions)
 
-        drill_mode = "DISTRIBUTED"
-        if configuration == "SINGLE":
-            drill_mode = "EMBEDDED"
+        drill_mode = "EMBEDDED"
         for option in verify_options:
             machine_suffix, target_machine_suffix = option
             self.verify_instance(

diff --git a/ganglia/test_ganglia.py b/ganglia/test_ganglia.py
@@ -1,5 +1,6 @@
 import os
 
+import pkg_resources
 from absl.testing import absltest
 from absl.testing import parameterized
 
@@ -17,7 +18,8 @@ def verify_instance(self, name):
             self.TEST_SCRIPT_FILE_NAME)
         self.upload_test_file(test_script_path, name)
         self.assert_instance_command(name,
-                                     "yes | sudo apt-get install python3-pip")
+                                     "yes | sudo apt-get install python3-pip libxml2-dev libxslt-dev")
+        self.assert_instance_command(name, "sudo -H pip3 install --upgrade pip")
         self.assert_instance_command(name, "sudo pip3 install requests-html")
         self.assert_instance_command(
             name, "python3 {}".format(self.TEST_SCRIPT_FILE_NAME))
@@ -32,6 +34,9 @@ def test_ganglia(self, configuration, machine_suffixes):
         if self.getImageOs() == 'rocky':
             self.skipTest("Not supported in Rocky Linux-based images")
 
+        if self.getImageVersion() > pkg_resources.parse_version("2.0"):
+            self.skipTest("Ganglia UI is not supported for 2.0+ versions")
+
         self.createCluster(configuration, self.INIT_ACTIONS)
         for machine_suffix in machine_suffixes:
             self.verify_instance("{}-{}".format(self.getClusterName(),

diff --git a/gpu/test_gpu.py b/gpu/test_gpu.py
@@ -124,7 +124,7 @@ def test_install_gpu_cuda_nvidia(self, configuration, machine_suffixes,
                                    cuda_version):
     image_os = self.getImageOs()
 
-    if self.getImageVersion() < pkg_resources.parse_version("2.0"):
+    if self.getImageVersion() < pkg_resources.parse_version("2.0") or self.getImageOs() == "rocky":
       self.skipTest("Not supported in pre 2.0 images")
 
     if ( image_os == "rocky" and (cuda_version < "11.2" and cuda_version != "11.0") ) or \
@@ -180,7 +180,7 @@ def test_gpu_allocation(self, configuration, master_accelerator,
     if configuration == "SINGLE" and self.getImageOs() == "rocky":
       self.skipTest("Test hangs on single-node clsuter with Rocky Linux-based images")
 
-    if self.getImageVersion() < pkg_resources.parse_version("2.0"):
+    if self.getImageVersion() < pkg_resources.parse_version("2.0") or self.getImageOs() == "rocky":
       self.skipTest("Not supported in pre 2.0")
 
     metadata = None
@@ -215,7 +215,7 @@ def test_install_gpu_cuda_nvidia_with_spark_job(self, configuration, machine_suf
                                    cuda_version):
     image_os = self.getImageOs()
 
-    if self.getImageVersion() < pkg_resources.parse_version("2.0"):
+    if self.getImageVersion() < pkg_resources.parse_version("2.0") or self.getImageOs() == "rocky":
       self.skipTest("Not supported in pre 2.0 images")
 
     if ( image_os == "rocky" and (cuda_version < "11.2" and cuda_version != "11.0") ) or \

diff --git a/h2o/h2o.sh b/h2o/h2o.sh
@@ -2,27 +2,34 @@
 
 set -euxo pipefail
 
-readonly NOT_SUPPORTED_MESSAGE="Dataproc ${DATAPROC_VERSION} not supported."
-[[ $DATAPROC_VERSION == "1.5" ]] && echo "$NOT_SUPPORTED_MESSAGE" && exit 1
+readonly NOT_SUPPORTED_MESSAGE="Dataproc ${DATAPROC_IMAGE_VERSION} not supported."
+[[ DATAPROC_IMAGE_VERSION == "1.5" ]] && echo "$NOT_SUPPORTED_MESSAGE" && exit 1
 
 ## Set Spark and Sparkling water versions
-readonly DEFAULT_H2O_SPARKLING_WATER_VERSION="3.30.1.2-1"
+readonly DEFAULT_H2O_SPARKLING_WATER_VERSION="3.44.0.3-1"
 H2O_SPARKLING_WATER_VERSION="$(/usr/share/google/get_metadata_value attributes/H2O_SPARKLING_WATER_VERSION ||
   echo ${DEFAULT_H2O_SPARKLING_WATER_VERSION})"
 readonly H2O_SPARKLING_WATER_VERSION
 
 readonly SPARK_VERSION=$(spark-submit --version 2>&1 | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1)
 
 readonly SPARKLING_WATER_NAME="sparkling-water-${H2O_SPARKLING_WATER_VERSION}-${SPARK_VERSION}"
-readonly SPARKLING_WATER_URL="http://h2o-release.s3.amazonaws.com/sparkling-water/spark-${SPARK_VERSION}/${H2O_SPARKLING_WATER_VERSION}-${SPARK_VERSION}/${SPARKLING_WATER_NAME}.zip"
+readonly SPARKLING_WATER_URL="http://s3.amazonaws.com/h2o-release/sparkling-water/spark-${SPARK_VERSION}/${H2O_SPARKLING_WATER_VERSION}-${SPARK_VERSION}/${SPARKLING_WATER_NAME}.zip"
 
 # Install Scala packages for H2O Sparkling Water
 function install_sparkling_water() {
+  local OS_NAME
+  OS_NAME=$(lsb_release -is | tr '[:upper:]' '[:lower:]')
   local tmp_dir
   tmp_dir=$(mktemp -d -t init-action-h2o-XXXX)
 
   ## Download and unzip Sparking water Scala libraries
   wget -nv --timeout=30 --tries=5 --retry-connrefused "$SPARKLING_WATER_URL" -P "$tmp_dir"
+  if [[ "${OS_NAME}" == "rocky" ]]; then
+      sudo yum -y install zip unzip
+  else
+    sudo apt-get -y install zip unzip
+  fi
   unzip -q "${tmp_dir}/${SPARKLING_WATER_NAME}.zip" -d /usr/lib/
   ln -s "/usr/lib/${SPARKLING_WATER_NAME}" /usr/lib/sparkling-water
 
@@ -36,6 +43,8 @@ function install_sparkling_water() {
 
 # Install Python packages for H2O Sparkling Water
 function install_pysparkling_water() {
+  # Pinning setuptools to 65.0.0 as latest version gives invalid version error message while installing "h2o_pysparkling"
+  pip install setuptools==65.0.0
   pip install --upgrade-strategy only-if-needed \
     "h2o==${H2O_SPARKLING_WATER_VERSION%-*}" \
     "h2o_pysparkling_${SPARK_VERSION}==${H2O_SPARKLING_WATER_VERSION}"

diff --git a/horovod/horovod.sh b/horovod/horovod.sh
@@ -22,7 +22,7 @@ set -euxo pipefail
 
 readonly DEFAULT_HOROVOD_VERSION="0.21.2"
 readonly DEFAULT_TENSORFLOW_VERSION="2.4.1"
-readonly DEFAULT_PYTORCH_VERSION="1.7.1"
+readonly DEFAULT_PYTORCH_VERSION="1.11.0"
 readonly DEFAULT_TORCHVISION_VERSION="0.8.2"
 readonly DEFAULT_MXNET_VERSION="1.7.0.post1"
 readonly DEFAULT_CUDA_VERSION="11.0"
@@ -88,8 +88,8 @@ function install_frameworks() {
   # Add gpu-versions of libraries
   if (lspci | grep -q NVIDIA); then
     local torch_packages=(
-      "torch==${PYTORCH_VERSION}+cu${CUDA_VERSION//./}"
-      "torchvision==${TORCHVISION_VERSION}+cu${CUDA_VERSION//./}"
+      "torch==${PYTORCH_VERSION}"
+      "torchvision==${TORCHVISION_VERSION}"
     )
     pip install "${torch_packages[@]}" -f "https://download.pytorch.org/whl/torch_stable.html"
     if [[ ${TENSORFLOW_VERSION} == "1."* ]]; then

diff --git a/horovod/test_horovod.py b/horovod/test_horovod.py
@@ -45,7 +45,7 @@ def test_horovod_gpu(self, configuration, controller):
     if self.getImageOs() == 'rocky':
       self.skipTest("Not supported in Rocky Linux-based images")
 
-    metadata = "cuda-version=11.0,cudnn-version=8.0.5.39,gpu-driver-provider=NVIDIA"
+    metadata = "cuda-version=11.1,cudnn-version=8.0.5.39,gpu-driver-provider=NVIDIA"
 
     self.createCluster(
         configuration,

diff --git a/integration_tests/dataproc_test_case.py b/integration_tests/dataproc_test_case.py
@@ -51,6 +51,7 @@ class DataprocTestCase(parameterized.TestCase):
     @classmethod
     def setUpClass(cls):
         super().setUpClass()
+        os.environ["CLOUDSDK_PYTHON"] = "/usr/bin/python3"
 
         _, project, _ = cls.run_command("gcloud config get-value project")
         cls.PROJECT = project.strip()

diff --git a/mlvm/mlvm.sh b/mlvm/mlvm.sh
@@ -42,8 +42,6 @@ CONDA_PACKAGES=(
   "r-essentials=${R_VERSION}"
   "r-sparklyr=1.7"
   "scikit-learn=0.24"
-  "pytorch=1.9"
-  "torchvision=0.9"
   "xgboost=1.4"
 )
 
@@ -121,6 +119,8 @@ function install_conda_packages() {
   conda config --add channels pytorch
   conda config --add channels conda-forge
 
+  conda install pytorch==1.9.0 torchvision==0.10.0 torchaudio==0.9.0 -c pytorch -c conda-forge
+
   # Create a separate environment with mamba.
   # Mamba provides significant decreases in installation times.
   conda create -y -n ${mamba_env_name} mamba

diff --git a/ranger/ranger.sh b/ranger/ranger.sh
@@ -70,6 +70,10 @@ function configure_admin() {
     --configuration_file "${RANGER_INSTALL_DIR}/ranger-admin/ews/webapp/WEB-INF/classes/conf.dist/ranger-admin-site.xml" \
     --name 'ranger.service.http.port' --value "${RANGER_ADMIN_PORT}" \
     --clobber
+  sudo mysql <<EOF
+    ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY 'root-password';
+    quit
+EOF
   mysql -u root -proot-password -e "CREATE USER 'rangeradmin'@'localhost' IDENTIFIED BY 'rangerpass';"
   mysql -u root -proot-password -e "CREATE DATABASE ranger;"
   mysql -u root -proot-password -e "GRANT ALL PRIVILEGES ON ranger.* TO 'rangeradmin'@'localhost';"

diff --git a/ranger/test_ranger.py b/ranger/test_ranger.py
@@ -36,8 +36,8 @@ def test_ranger(self, configuration, machine_suffixes):
             self.skipTest("Not supported in Rocky Linux-based images")
 
         # Skip on 2.0+ version of Dataproc because it's not supported
-        if self.getImageVersion() >= pkg_resources.parse_version("2.0"):
-            self.skipTest("Not supported in 2.0+ images")
+        if self.getImageVersion() >= pkg_resources.parse_version("1.5"):
+            self.skipTest("Can be added as an optional component after Dataproc 1.3")
 
         self.createCluster(
             configuration,

diff --git a/rapids/test_rapids.py b/rapids/test_rapids.py
@@ -57,6 +57,8 @@ def verify_spark_job(self):
                             ("STANDARD", ["m"], GPU_P100, "standalone"))
   def test_rapids_dask(self, configuration, machine_suffixes, accelerator,
                        dask_runtime):
+    if self.getImageOs() == 'rocky':
+      self.skipTest("Not supported in Rocky Linux-based images")
 
     if self.getImageVersion() <= pkg_resources.parse_version("2.0"):
       self.skipTest("Not supported in pre 2.0 images")
@@ -82,6 +84,8 @@ def test_rapids_dask(self, configuration, machine_suffixes, accelerator,
   @parameterized.parameters(("SINGLE", ["m"], GPU_P100),
                             ("STANDARD", ["w-0"], GPU_P100))
   def test_rapids_spark(self, configuration, machine_suffixes, accelerator):
+    if self.getImageOs() == 'rocky':
+      self.skipTest("Not supported in Rocky Linux-based images")
 
     if self.getImageVersion() <= pkg_resources.parse_version("2.0"):
       self.skipTest("Not supported in pre 2.0 images")
@@ -108,6 +112,9 @@ def test_rapids_spark(self, configuration, machine_suffixes, accelerator):
   def test_non_default_cuda_versions(self, configuration, machine_suffixes,
                                      accelerator, cuda_version):
 
+    if self.getImageOs() == 'rocky':
+      self.skipTest("Not supported in Rocky Linux-based images")
+
     if self.getImageVersion() < pkg_resources.parse_version("2.0"):
       self.skipTest("Not supported in pre 2.0 images")
 

diff --git a/solr/test_solr.py b/solr/test_solr.py
@@ -38,8 +38,8 @@ def test_solr(self, configuration, machine_suffixes):
             self.skipTest("Not supported in Rocky Linux-based images")
 
         # Skip on 2.0+ version of Dataproc because it's not supported
-        if self.getImageVersion() >= pkg_resources.parse_version("2.0"):
-            self.skipTest("Not supported in 2.0+ images")
+        if self.getImageVersion() >= pkg_resources.parse_version("1.5"):
+            self.skipTest("Solr can be added to the cluster as an optional component after Dataproc 1.3")
 
         self.createCluster(configuration, self.INIT_ACTIONS)
         for machine_suffix in machine_suffixes:

diff --git a/solr/verify_solr.py b/solr/verify_solr.py
@@ -14,7 +14,7 @@
 
 SOLR_DIR = '/usr/lib/solr'
 SOLR_URL = 'http://localhost:8983/solr'
-SOLR_EXAMPLE_DOC = 'https://raw.githubusercontent.com/apache/lucene-solr/master/solr/example/films/films.json'
+SOLR_EXAMPLE_DOC = 'https://raw.githubusercontent.com/apache/solr/main/solr/example/films/films.json'
 SOLR_COLLECTION_NAME = 'films'