Skip to content

Commit

Permalink
fixing presubmit failures for the respected components (#1133)
Browse files Browse the repository at this point in the history
  • Loading branch information
prince-cs authored Feb 8, 2024
1 parent 950ad89 commit 3692b80
Show file tree
Hide file tree
Showing 25 changed files with 138 additions and 37 deletions.
2 changes: 1 addition & 1 deletion BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ py_test(
name = "test_cloud_sql_proxy",
size = "enormous",
srcs = ["cloud-sql-proxy/test_cloud_sql_proxy.py"],
data = ["cloud-sql-proxy/cloud-sql-proxy.sh"],
data = ["cloud-sql-proxy/cloud-sql-proxy.sh", "cloud-sql-proxy/hivetest.hive"],
local = True,
shard_count = 3,
deps = [
Expand Down
6 changes: 6 additions & 0 deletions alluxio/test_alluxio.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pkg_resources

from integration_tests.dataproc_test_case import DataprocTestCase

from absl.testing import absltest
Expand Down Expand Up @@ -35,6 +37,10 @@ def test_alluxio_with_presto(self, configuration, machine_suffixes):
if self.getImageOs() == 'rocky':
self.skipTest("Not supported in Rocky Linux-based images")

# Skip on 2.0+ version of Dataproc because it's not supported
if self.getImageVersion() >= pkg_resources.parse_version("2.0"):
self.skipTest("Not supported in 2.0+ images")

self.createCluster(
configuration,
init_actions=self.INIT_ACTIONS,
Expand Down
14 changes: 6 additions & 8 deletions atlas/test_atlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@ def verify_instance(self, instance, username='admin', password='admin'):
# Upload files to populate Atlas and to verify it
populate_atlas_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), self.POPULATE_SCRIPT)
self.assert_command('gcloud compute scp {} {}:/tmp'.format(populate_atlas_path, instance))
self.assert_command('gcloud compute scp --zone={} {} {}:/tmp'.format(self.cluster_zone,
populate_atlas_path,
instance))

validate_atlas_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), self.VALIDATE_SCRIPT)
self.assert_command('gcloud compute scp {} {}:/tmp'.format(validate_atlas_path, instance))
self.assert_command('gcloud compute scp --zone={} {} {}:/tmp'.format(self.cluster_zone,
validate_atlas_path,
instance))

self.assert_instance_command(
instance, "chmod +x /tmp/{}".format(self.POPULATE_SCRIPT))
Expand Down Expand Up @@ -90,7 +94,6 @@ def verify_instance(self, instance, username='admin', password='admin'):
@parameterized.parameters(
("SINGLE", ["m"]),
("STANDARD", ["m"]),
("HA", ["m-0", "m-1", "m-2"]),
)
def test_atlas(self, configuration, machine_suffixes):
if self.getImageOs() == 'rocky':
Expand All @@ -110,15 +113,13 @@ def test_atlas(self, configuration, machine_suffixes):
metadata = 'run-on-master=true'
self.createCluster(configuration,
init_actions,
beta=True,
metadata=metadata,
timeout_in_minutes=30,
optional_components=optional_components,
machine_type="e2-standard-4")
else:
self.createCluster(configuration,
init_actions,
beta=True,
timeout_in_minutes=30,
optional_components=optional_components,
machine_type="e2-standard-4")
Expand Down Expand Up @@ -163,7 +164,6 @@ def test_atlas_overrides_admin_credentials(self, configuration,
username, password_sha256)
self.createCluster(configuration,
self.INIT_ACTIONS,
beta=True,
timeout_in_minutes=30,
metadata=metadata,
optional_components=self.OPTIONAL_COMPONENTS,
Expand All @@ -187,7 +187,6 @@ def test_atlas_fails_without_component(self, component):
self.createCluster(
"SINGLE",
self.INIT_ACTIONS,
beta=True,
timeout_in_minutes=30,
machine_type="e2-standard-4",
optional_components=self.OPTIONAL_COMPONENTS.remove(component))
Expand All @@ -203,7 +202,6 @@ def test_atlas_ha_fails_without_kafka(self):
self.createCluster("HA",
self.INIT_ACTIONS,
timeout_in_minutes=30,
beta=True,
machine_type="e2-standard-4",
optional_components=self.OPTIONAL_COMPONENTS_HA)

Expand Down
1 change: 1 addition & 0 deletions bigtable/test_bigtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def tearDown(self):
self.db_name))

def _validate_bigtable(self):
self.assert_command('gcloud components install cbt')
_, stdout, _ = self.assert_command(
'cbt -instance {} count test-bigtable '.format(self.db_name))
self.assertEqual(
Expand Down
25 changes: 25 additions & 0 deletions cloud-sql-proxy/hivetest.hive
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
DROP TABLE IF EXISTS validate_hive_tbl;
DROP TABLE IF EXISTS grouped_tbl;

-- TODO(sidhom): set fs.default.name=... if using gs://

CREATE EXTERNAL TABLE validate_hive_tbl (
shell_user STRING,
dummy STRING,
uid INT,
gid INT,
name STRING,
home STRING,
shell STRING
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ':';

CREATE TABLE grouped_tbl
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
AS SELECT shell, COUNT(*) shell_count
FROM validate_hive_tbl
GROUP BY shell
ORDER BY shell_count DESC, shell DESC;

SELECT * from grouped_tbl;
9 changes: 7 additions & 2 deletions cloud-sql-proxy/test_cloud_sql_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class CloudSqlProxyTestCase(DataprocTestCase):
COMPONENT = 'cloud-sql-proxy'
INIT_ACTIONS = ['cloud-sql-proxy/cloud-sql-proxy.sh']
TEST_SCRIPT_FILE_NAME = 'cloud-sql-proxy/pyspark_metastore_test.py'
TEST_SCRIPT_FILE_NAME = 'cloud-sql-proxy/hivetest.hive'
DB_NAME = None

@classmethod
Expand Down Expand Up @@ -43,7 +43,12 @@ def wait_cloud_sql_operation(self, operation_id):
'gcloud sql operations wait {} --timeout=600'.format(operation_id))

def verify_cluster(self, name):
self.__submit_pyspark_job(name)
self.__submit_hive_job(name)

def __submit_hive_job(self, cluster_name):
self.assert_dataproc_job(
cluster_name, 'hive', '--file={}/{}'.format(self.INIT_ACTIONS_REPO,
self.TEST_SCRIPT_FILE_NAME))

def __submit_pyspark_job(self, cluster_name):
self.assert_dataproc_job(
Expand Down
2 changes: 1 addition & 1 deletion dask/dask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ if [[ "${DASK_RUNTIME}" == 'yarn' ]]; then
fi
# Downgrade `google-cloud-bigquery` on Dataproc 2.0
# to fix compatibility with old Arrow version
if [[ "${DATAPROC_VERSION}" == '2.0' ]]; then
if [[ "${DATAPROC_IMAGE_VERSION}" == '2.0' ]]; then
CONDA_PACKAGES+=('google-cloud-bigquery=2')
fi
readonly CONDA_PACKAGES
Expand Down
4 changes: 1 addition & 3 deletions drill/test_drill.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ def test_drill(self, configuration, verify_options):
init_actions = self.INIT_ACTIONS_FOR_STANDARD + init_actions
self.createCluster(configuration, init_actions)

drill_mode = "DISTRIBUTED"
if configuration == "SINGLE":
drill_mode = "EMBEDDED"
drill_mode = "EMBEDDED"
for option in verify_options:
machine_suffix, target_machine_suffix = option
self.verify_instance(
Expand Down
7 changes: 6 additions & 1 deletion ganglia/test_ganglia.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

import pkg_resources
from absl.testing import absltest
from absl.testing import parameterized

Expand All @@ -17,7 +18,8 @@ def verify_instance(self, name):
self.TEST_SCRIPT_FILE_NAME)
self.upload_test_file(test_script_path, name)
self.assert_instance_command(name,
"yes | sudo apt-get install python3-pip")
"yes | sudo apt-get install python3-pip libxml2-dev libxslt-dev")
self.assert_instance_command(name, "sudo -H pip3 install --upgrade pip")
self.assert_instance_command(name, "sudo pip3 install requests-html")
self.assert_instance_command(
name, "python3 {}".format(self.TEST_SCRIPT_FILE_NAME))
Expand All @@ -32,6 +34,9 @@ def test_ganglia(self, configuration, machine_suffixes):
if self.getImageOs() == 'rocky':
self.skipTest("Not supported in Rocky Linux-based images")

if self.getImageVersion() > pkg_resources.parse_version("2.0"):
self.skipTest("Ganglia UI is not supported for 2.0+ versions")

self.createCluster(configuration, self.INIT_ACTIONS)
for machine_suffix in machine_suffixes:
self.verify_instance("{}-{}".format(self.getClusterName(),
Expand Down
6 changes: 3 additions & 3 deletions gpu/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def test_install_gpu_cuda_nvidia(self, configuration, machine_suffixes,
cuda_version):
image_os = self.getImageOs()

if self.getImageVersion() < pkg_resources.parse_version("2.0"):
if self.getImageVersion() < pkg_resources.parse_version("2.0") or self.getImageOs() == "rocky":
self.skipTest("Not supported in pre 2.0 images")

if ( image_os == "rocky" and (cuda_version < "11.2" and cuda_version != "11.0") ) or \
Expand Down Expand Up @@ -180,7 +180,7 @@ def test_gpu_allocation(self, configuration, master_accelerator,
if configuration == "SINGLE" and self.getImageOs() == "rocky":
self.skipTest("Test hangs on single-node clsuter with Rocky Linux-based images")

if self.getImageVersion() < pkg_resources.parse_version("2.0"):
if self.getImageVersion() < pkg_resources.parse_version("2.0") or self.getImageOs() == "rocky":
self.skipTest("Not supported in pre 2.0")

metadata = None
Expand Down Expand Up @@ -215,7 +215,7 @@ def test_install_gpu_cuda_nvidia_with_spark_job(self, configuration, machine_suf
cuda_version):
image_os = self.getImageOs()

if self.getImageVersion() < pkg_resources.parse_version("2.0"):
if self.getImageVersion() < pkg_resources.parse_version("2.0") or self.getImageOs() == "rocky":
self.skipTest("Not supported in pre 2.0 images")

if ( image_os == "rocky" and (cuda_version < "11.2" and cuda_version != "11.0") ) or \
Expand Down
17 changes: 13 additions & 4 deletions h2o/h2o.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,34 @@

set -euxo pipefail

readonly NOT_SUPPORTED_MESSAGE="Dataproc ${DATAPROC_VERSION} not supported."
[[ $DATAPROC_VERSION == "1.5" ]] && echo "$NOT_SUPPORTED_MESSAGE" && exit 1
readonly NOT_SUPPORTED_MESSAGE="Dataproc ${DATAPROC_IMAGE_VERSION} not supported."
[[ DATAPROC_IMAGE_VERSION == "1.5" ]] && echo "$NOT_SUPPORTED_MESSAGE" && exit 1

## Set Spark and Sparkling water versions
readonly DEFAULT_H2O_SPARKLING_WATER_VERSION="3.30.1.2-1"
readonly DEFAULT_H2O_SPARKLING_WATER_VERSION="3.44.0.3-1"
H2O_SPARKLING_WATER_VERSION="$(/usr/share/google/get_metadata_value attributes/H2O_SPARKLING_WATER_VERSION ||
echo ${DEFAULT_H2O_SPARKLING_WATER_VERSION})"
readonly H2O_SPARKLING_WATER_VERSION

readonly SPARK_VERSION=$(spark-submit --version 2>&1 | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1)

readonly SPARKLING_WATER_NAME="sparkling-water-${H2O_SPARKLING_WATER_VERSION}-${SPARK_VERSION}"
readonly SPARKLING_WATER_URL="http://h2o-release.s3.amazonaws.com/sparkling-water/spark-${SPARK_VERSION}/${H2O_SPARKLING_WATER_VERSION}-${SPARK_VERSION}/${SPARKLING_WATER_NAME}.zip"
readonly SPARKLING_WATER_URL="http://s3.amazonaws.com/h2o-release/sparkling-water/spark-${SPARK_VERSION}/${H2O_SPARKLING_WATER_VERSION}-${SPARK_VERSION}/${SPARKLING_WATER_NAME}.zip"

# Install Scala packages for H2O Sparkling Water
function install_sparkling_water() {
local OS_NAME
OS_NAME=$(lsb_release -is | tr '[:upper:]' '[:lower:]')
local tmp_dir
tmp_dir=$(mktemp -d -t init-action-h2o-XXXX)

## Download and unzip Sparking water Scala libraries
wget -nv --timeout=30 --tries=5 --retry-connrefused "$SPARKLING_WATER_URL" -P "$tmp_dir"
if [[ "${OS_NAME}" == "rocky" ]]; then
sudo yum -y install zip unzip
else
sudo apt-get -y install zip unzip
fi
unzip -q "${tmp_dir}/${SPARKLING_WATER_NAME}.zip" -d /usr/lib/
ln -s "/usr/lib/${SPARKLING_WATER_NAME}" /usr/lib/sparkling-water

Expand All @@ -36,6 +43,8 @@ function install_sparkling_water() {

# Install Python packages for H2O Sparkling Water
function install_pysparkling_water() {
# Pinning setuptools to 65.0.0 as latest version gives invalid version error message while installing "h2o_pysparkling"
pip install setuptools==65.0.0
pip install --upgrade-strategy only-if-needed \
"h2o==${H2O_SPARKLING_WATER_VERSION%-*}" \
"h2o_pysparkling_${SPARK_VERSION}==${H2O_SPARKLING_WATER_VERSION}"
Expand Down
6 changes: 3 additions & 3 deletions horovod/horovod.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ set -euxo pipefail

readonly DEFAULT_HOROVOD_VERSION="0.21.2"
readonly DEFAULT_TENSORFLOW_VERSION="2.4.1"
readonly DEFAULT_PYTORCH_VERSION="1.7.1"
readonly DEFAULT_PYTORCH_VERSION="1.11.0"
readonly DEFAULT_TORCHVISION_VERSION="0.8.2"
readonly DEFAULT_MXNET_VERSION="1.7.0.post1"
readonly DEFAULT_CUDA_VERSION="11.0"
Expand Down Expand Up @@ -88,8 +88,8 @@ function install_frameworks() {
# Add gpu-versions of libraries
if (lspci | grep -q NVIDIA); then
local torch_packages=(
"torch==${PYTORCH_VERSION}+cu${CUDA_VERSION//./}"
"torchvision==${TORCHVISION_VERSION}+cu${CUDA_VERSION//./}"
"torch==${PYTORCH_VERSION}"
"torchvision==${TORCHVISION_VERSION}"
)
pip install "${torch_packages[@]}" -f "https://download.pytorch.org/whl/torch_stable.html"
if [[ ${TENSORFLOW_VERSION} == "1."* ]]; then
Expand Down
2 changes: 1 addition & 1 deletion horovod/test_horovod.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_horovod_gpu(self, configuration, controller):
if self.getImageOs() == 'rocky':
self.skipTest("Not supported in Rocky Linux-based images")

metadata = "cuda-version=11.0,cudnn-version=8.0.5.39,gpu-driver-provider=NVIDIA"
metadata = "cuda-version=11.1,cudnn-version=8.0.5.39,gpu-driver-provider=NVIDIA"

self.createCluster(
configuration,
Expand Down
1 change: 1 addition & 0 deletions integration_tests/dataproc_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class DataprocTestCase(parameterized.TestCase):
@classmethod
def setUpClass(cls):
super().setUpClass()
os.environ["CLOUDSDK_PYTHON"] = "/usr/bin/python3"

_, project, _ = cls.run_command("gcloud config get-value project")
cls.PROJECT = project.strip()
Expand Down
4 changes: 2 additions & 2 deletions mlvm/mlvm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ CONDA_PACKAGES=(
"r-essentials=${R_VERSION}"
"r-sparklyr=1.7"
"scikit-learn=0.24"
"pytorch=1.9"
"torchvision=0.9"
"xgboost=1.4"
)

Expand Down Expand Up @@ -121,6 +119,8 @@ function install_conda_packages() {
conda config --add channels pytorch
conda config --add channels conda-forge

conda install pytorch==1.9.0 torchvision==0.10.0 torchaudio==0.9.0 -c pytorch -c conda-forge

# Create a separate environment with mamba.
# Mamba provides significant decreases in installation times.
conda create -y -n ${mamba_env_name} mamba
Expand Down
4 changes: 4 additions & 0 deletions ranger/ranger.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ function configure_admin() {
--configuration_file "${RANGER_INSTALL_DIR}/ranger-admin/ews/webapp/WEB-INF/classes/conf.dist/ranger-admin-site.xml" \
--name 'ranger.service.http.port' --value "${RANGER_ADMIN_PORT}" \
--clobber
sudo mysql <<EOF
ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY 'root-password';
quit
EOF
mysql -u root -proot-password -e "CREATE USER 'rangeradmin'@'localhost' IDENTIFIED BY 'rangerpass';"
mysql -u root -proot-password -e "CREATE DATABASE ranger;"
mysql -u root -proot-password -e "GRANT ALL PRIVILEGES ON ranger.* TO 'rangeradmin'@'localhost';"
Expand Down
4 changes: 2 additions & 2 deletions ranger/test_ranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def test_ranger(self, configuration, machine_suffixes):
self.skipTest("Not supported in Rocky Linux-based images")

# Skip on 2.0+ version of Dataproc because it's not supported
if self.getImageVersion() >= pkg_resources.parse_version("2.0"):
self.skipTest("Not supported in 2.0+ images")
if self.getImageVersion() >= pkg_resources.parse_version("1.5"):
self.skipTest("Can be added as an optional component after Dataproc 1.3")

self.createCluster(
configuration,
Expand Down
7 changes: 7 additions & 0 deletions rapids/test_rapids.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def verify_spark_job(self):
("STANDARD", ["m"], GPU_P100, "standalone"))
def test_rapids_dask(self, configuration, machine_suffixes, accelerator,
dask_runtime):
if self.getImageOs() == 'rocky':
self.skipTest("Not supported in Rocky Linux-based images")

if self.getImageVersion() <= pkg_resources.parse_version("2.0"):
self.skipTest("Not supported in pre 2.0 images")
Expand All @@ -82,6 +84,8 @@ def test_rapids_dask(self, configuration, machine_suffixes, accelerator,
@parameterized.parameters(("SINGLE", ["m"], GPU_P100),
("STANDARD", ["w-0"], GPU_P100))
def test_rapids_spark(self, configuration, machine_suffixes, accelerator):
if self.getImageOs() == 'rocky':
self.skipTest("Not supported in Rocky Linux-based images")

if self.getImageVersion() <= pkg_resources.parse_version("2.0"):
self.skipTest("Not supported in pre 2.0 images")
Expand All @@ -108,6 +112,9 @@ def test_rapids_spark(self, configuration, machine_suffixes, accelerator):
def test_non_default_cuda_versions(self, configuration, machine_suffixes,
accelerator, cuda_version):

if self.getImageOs() == 'rocky':
self.skipTest("Not supported in Rocky Linux-based images")

if self.getImageVersion() < pkg_resources.parse_version("2.0"):
self.skipTest("Not supported in pre 2.0 images")

Expand Down
4 changes: 2 additions & 2 deletions solr/test_solr.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def test_solr(self, configuration, machine_suffixes):
self.skipTest("Not supported in Rocky Linux-based images")

# Skip on 2.0+ version of Dataproc because it's not supported
if self.getImageVersion() >= pkg_resources.parse_version("2.0"):
self.skipTest("Not supported in 2.0+ images")
if self.getImageVersion() >= pkg_resources.parse_version("1.5"):
self.skipTest("Solr can be added to the cluster as an optional component after Dataproc 1.3")

self.createCluster(configuration, self.INIT_ACTIONS)
for machine_suffix in machine_suffixes:
Expand Down
2 changes: 1 addition & 1 deletion solr/verify_solr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

SOLR_DIR = '/usr/lib/solr'
SOLR_URL = 'http://localhost:8983/solr'
SOLR_EXAMPLE_DOC = 'https://raw.githubusercontent.com/apache/lucene-solr/master/solr/example/films/films.json'
SOLR_EXAMPLE_DOC = 'https://raw.githubusercontent.com/apache/solr/main/solr/example/films/films.json'
SOLR_COLLECTION_NAME = 'films'


Expand Down
Loading

0 comments on commit 3692b80

Please sign in to comment.