diff --git a/gpu/install_gpu_driver.sh b/gpu/install_gpu_driver.sh
index 66964b4d1..49c55e651 100644
--- a/gpu/install_gpu_driver.sh
+++ b/gpu/install_gpu_driver.sh
@@ -449,7 +449,7 @@ GPU_DRIVER_PROVIDER=$(get_metadata_attribute 'gpu-driver-provider' 'NVIDIA')
readonly GPU_DRIVER_PROVIDER
# Whether to install GPU monitoring agent that sends GPU metrics to Stackdriver
-INSTALL_GPU_AGENT=$(get_metadata_attribute 'install-gpu-agent' 'false')
+INSTALL_GPU_AGENT=$(get_metadata_attribute 'install-gpu-agent' 'true')
readonly INSTALL_GPU_AGENT
# Dataproc configurations
@@ -628,15 +628,20 @@ function install_nvidia_nccl() {
if [[ "$(hostname -s)" =~ ^test && "$(nproc)" < 32 ]] ; then
# when running with fewer than 32 cores, yield to in-progress build
sleep $(( ( RANDOM % 11 ) + 10 ))
- if gcloud storage ls -j "${gcs_tarball}.building" > "${local_tarball}.building.json" ; then
- local build_start_time="$(jq -r .[0].metadata.timeCreated "${local_tarball}.building.json")"
- local build_start_epoch="$(date -d "${build_start_time}" +%s)"
- local timeout_epoch=$((build_start_epoch + 2700)) # 45 minutes
- while gsutil ls -L "${gcs_tarball}.building" ; do
+ if ${gsutil_cmd} ls "${gcs_tarball}.building" ; then
+ local build_start_time build_start_epoch timeout_epoch
+ if [[ "${gsutil_cmd}" =~ "gsutil" ]] ; then
+ build_start_time="$(${gsutil_cmd} ls -L "${gcs_tarball}.building" | awk -F': +' '/Creation time/ {print $2}')"
+ else
+ build_start_time="$(${gsutil_cmd} ls -j "${gcs_tarball}.building" | jq -r .[0].metadata.timeCreated "${local_tarball}.building.json")"
+ fi
+ build_start_epoch="$(date -d "${build_start_time}" +%s)"
+ timeout_epoch=$((build_start_epoch + 2700)) # 45 minutes
+ while ${gsutil_cmd} ls -L "${gcs_tarball}.building" ; do
local now_epoch="$(date -u +%s)"
if (( now_epoch > timeout_epoch )) ; then
# detect unexpected build failure after 45m
- gsutil rm "${gcs_tarball}.building"
+ ${gsutil_cmd} rm "${gcs_tarball}.building"
break
fi
sleep 5m
@@ -644,15 +649,15 @@ function install_nvidia_nccl() {
fi
fi
- output=$(gsutil ls "${gcs_tarball}" 2>&1 || echo '')
+ output=$(${gsutil_cmd} ls "${gcs_tarball}" 2>&1 || echo '')
if echo "${output}" | grep -q "${gcs_tarball}" ; then
# cache hit - unpack from cache
echo "cache hit"
- gcloud storage cat "${gcs_tarball}" | tar xvz
+ ${gsutil_cmd} cat "${gcs_tarball}" | tar xvz
else
# build and cache
touch "${local_tarball}.building"
- gcloud storage cp "${local_tarball}.building" "${gcs_tarball}.building"
+ ${gsutil_cmd} cp "${local_tarball}.building" "${gcs_tarball}.building"
building_file="${gcs_tarball}.building"
pushd nccl
# https://github.com/NVIDIA/nccl?tab=readme-ov-file#install
@@ -699,8 +704,8 @@ function install_nvidia_nccl() {
make clean
popd
tar xzvf "${local_tarball}"
- gcloud storage cp "${local_tarball}" "${gcs_tarball}"
- if gcloud storage ls "${gcs_tarball}.building" ; then gcloud storage rm "${gcs_tarball}.building" || true ; fi
+ ${gsutil_cmd} cp "${local_tarball}" "${gcs_tarball}"
+ if ${gsutil_cmd} ls "${gcs_tarball}.building" ; then ${gsutil_cmd} rm "${gcs_tarball}.building" || true ; fi
building_file=""
rm "${local_tarball}"
fi
@@ -804,15 +809,20 @@ function install_pytorch() {
if [[ "$(hostname -s)" =~ ^test && "$(nproc)" < 32 ]] ; then
# when running with fewer than 32 cores, yield to in-progress build
sleep $(( ( RANDOM % 11 ) + 10 ))
- if gcloud storage ls -j "${gcs_tarball}.building" > "${local_tarball}.building.json" ; then
- local build_start_time="$(jq -r .[0].metadata.timeCreated "${local_tarball}.building.json")"
- local build_start_epoch="$(date -d "${build_start_time}" +%s)"
- local timeout_epoch=$((build_start_epoch + 2700)) # 45 minutes
- while gsutil ls -L "${gcs_tarball}.building" ; do
+ if ${gsutil_cmd} ls -j "${gcs_tarball}.building" > "${local_tarball}.building.json" ; then
+ local build_start_time build_start_epoch timeout_epoch
+ if [[ "${gsutil_cmd}" =~ "gsutil" ]] ; then
+ build_start_time="$(${gsutil_cmd} ls -L "${gcs_tarball}.building" | awk -F': +' '/Creation time/ {print $2}')"
+ else
+ build_start_time="$(${gsutil_cmd} ls -j "${gcs_tarball}.building" | jq -r .[0].metadata.timeCreated "${local_tarball}.building.json")"
+ fi
+ build_start_epoch="$(date -d "${build_start_time}" +%s)"
+ timeout_epoch=$((build_start_epoch + 2700)) # 45 minutes
+ while ${gsutil_cmd} ls -L "${gcs_tarball}.building" ; do
local now_epoch="$(date -u +%s)"
if (( now_epoch > timeout_epoch )) ; then
# detect unexpected build failure after 45m
- gsutil rm "${gcs_tarball}.building"
+ ${gsutil_cmd} rm "${gcs_tarball}.building"
break
fi
sleep 5m
@@ -820,15 +830,15 @@ function install_pytorch() {
fi
fi
- output=$(gsutil ls "${gcs_tarball}" 2>&1 || echo '')
+ output=$(${gsutil_cmd} ls "${gcs_tarball}" 2>&1 || echo '')
if echo "${output}" | grep -q "${gcs_tarball}" ; then
# cache hit - unpack from cache
echo "cache hit"
mkdir -p "${envpath}"
- gcloud storage cat "${gcs_tarball}" | tar -C "${envpath}" -xz
+ ${gsutil_cmd} cat "${gcs_tarball}" | tar -C "${envpath}" -xz
else
touch "${local_tarball}.building"
- gcloud storage cp "${local_tarball}.building" "${gcs_tarball}.building"
+ ${gsutil_cmd} cp "${local_tarball}.building" "${gcs_tarball}.building"
building_file="${gcs_tarball}.building"
local verb=create
if test -d "${envpath}" ; then verb=install ; fi
@@ -848,8 +858,8 @@ function install_pytorch() {
pushd "${envpath}"
tar czf "${local_tarball}" .
popd
- gcloud storage cp "${local_tarball}" "${gcs_tarball}"
- if gcloud storage ls "${gcs_tarball}.building" ; then gcloud storage rm "${gcs_tarball}.building" || true ; fi
+ ${gsutil_cmd} cp "${local_tarball}" "${gcs_tarball}"
+ if ${gsutil_cmd} ls "${gcs_tarball}.building" ; then ${gsutil_cmd} rm "${gcs_tarball}.building" || true ; fi
building_file=""
fi
@@ -987,8 +997,11 @@ function add_repo_cuda() {
local sources_list_path="/etc/apt/sources.list.d/cuda-${shortname}-x86_64.list"
echo "deb [signed-by=${kr_path}] https://developer.download.nvidia.com/compute/cuda/repos/${shortname}/x86_64/ /" \
| sudo tee "${sources_list_path}"
- curl ${curl_retry_args} "${NVIDIA_BASE_DL_URL}/cuda/repos/${shortname}/x86_64/cuda-archive-keyring.gpg" \
- -o "${kr_path}"
+
+ for keyid in "0xae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80" "0xeb693b3035cd5710e231e123a4b469963bf863cc" ; do
+ curl ${curl_retry_args} "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" \
+ | gpg --import --no-default-keyring --keyring "${kr_path}"
+ done
else
install_cuda_keyring_pkg # 11.7+, 12.0+
fi
@@ -997,11 +1010,13 @@ function add_repo_cuda() {
fi
}
-readonly uname_r=$(uname -r)
-
function build_driver_from_github() {
- # non-GPL driver will have been built on rocky8 or if driver version is prior to open kernel version
- if ( is_rocky8 || version_lt "${DRIVER_VERSION}" "${MIN_OPEN_DRIVER_VER}" ) ; then return 0 ; fi
+ # non-GPL driver will have been built on rocky8, or when driver
+ # version is prior to open driver min, or GPU architecture is prior
+ # to Turing
+ if ( is_rocky8 \
+ || version_lt "${DRIVER_VERSION}" "${MIN_OPEN_DRIVER_VER}" \
+ || [[ "$((16#${pci_device_id}))" < "$((16#1E00))" ]] ) ; then return 0 ; fi
pushd "${workdir}"
test -d "${workdir}/open-gpu-kernel-modules" || {
tarball_fn="${DRIVER_VERSION}.tar.gz"
@@ -1025,15 +1040,20 @@ function build_driver_from_github() {
if [[ "$(hostname -s)" =~ ^test && "$(nproc)" < 32 ]] ; then
# when running with fewer than 32 cores, yield to in-progress build
sleep $(( ( RANDOM % 11 ) + 10 ))
- if gcloud storage ls -j "${gcs_tarball}.building" > "${local_tarball}.building.json" ; then
- local build_start_time="$(jq -r .[0].metadata.timeCreated "${local_tarball}.building.json")"
- local build_start_epoch="$(date -d "${build_start_time}" +%s)"
- local timeout_epoch=$((build_start_epoch + 2700)) # 45 minutes
- while gsutil ls -L "${gcs_tarball}.building" ; do
+ if ${gsutil_cmd} ls -j "${gcs_tarball}.building" > "${local_tarball}.building.json" ; then
+ local build_start_time build_start_epoch timeout_epoch
+ if [[ "${gsutil_cmd}" =~ "gsutil" ]] ; then
+ build_start_time="$(${gsutil_cmd} ls -L "${gcs_tarball}.building" | awk -F': +' '/Creation time/ {print $2}')"
+ else
+ build_start_time="$(${gsutil_cmd} ls -j "${gcs_tarball}.building" | jq -r .[0].metadata.timeCreated "${local_tarball}.building.json")"
+ fi
+ build_start_epoch="$(date -d "${build_start_time}" +%s)"
+ timeout_epoch=$((build_start_epoch + 2700)) # 45 minutes
+ while ${gsutil_cmd} ls -L "${gcs_tarball}.building" ; do
local now_epoch="$(date -u +%s)"
if (( now_epoch > timeout_epoch )) ; then
# detect unexpected build failure after 45m
- gsutil rm "${gcs_tarball}.building"
+ ${gsutil_cmd} rm "${gcs_tarball}.building"
break
fi
sleep 5m
@@ -1041,12 +1061,12 @@ function build_driver_from_github() {
fi
fi
- if gsutil ls "${gcs_tarball}" 2>&1 | grep -q "${gcs_tarball}" ; then
+ if ${gsutil_cmd} ls "${gcs_tarball}" 2>&1 ; then
echo "cache hit"
else
# build the kernel modules
touch "${local_tarball}.building"
- gcloud storage cp "${local_tarball}.building" "${gcs_tarball}.building"
+ ${gsutil_cmd} cp "${local_tarball}.building" "${gcs_tarball}.building"
building_file="${gcs_tarball}.building"
pushd open-gpu-kernel-modules
install_build_dependencies
@@ -1075,14 +1095,14 @@ function build_driver_from_github() {
tar czvf "${local_tarball}" \
"${workdir}/open-gpu-kernel-modules/kernel-open/"*.log \
$(find /lib/modules/${uname_r}/ -iname 'nvidia*.ko')
- gcloud storage cp "${local_tarball}" "${gcs_tarball}"
- if gcloud storage ls "${gcs_tarball}.building" ; then gcloud storage rm "${gcs_tarball}.building" || true ; fi
+ ${gsutil_cmd} cp "${local_tarball}" "${gcs_tarball}"
+ if ${gsutil_cmd} ls "${gcs_tarball}.building" ; then ${gsutil_cmd} rm "${gcs_tarball}.building" || true ; fi
building_file=""
rm "${local_tarball}"
make clean
popd
fi
- gcloud storage cat "${gcs_tarball}" | tar -C / -xzv
+ ${gsutil_cmd} cat "${gcs_tarball}" | tar -C / -xzv
depmod -a
}
@@ -1155,10 +1175,15 @@ function install_nvidia_userspace_runfile() {
local cache_hit="0"
local local_tarball
- if ( is_rocky8 || version_lt "${DRIVER_VERSION}" "${MIN_OPEN_DRIVER_VER}" ) ; then
+ # Build nonfree driver on rocky8, or when driver version is prior to
+ # open driver min, or when GPU architecture is prior to Turing
+ if ( is_rocky8 \
+ || version_lt "${DRIVER_VERSION}" "${MIN_OPEN_DRIVER_VER}" \
+ || [[ "$((16#${pci_device_id}))" < "$((16#1E00))" ]] )
+ then
local nvidia_ko_path="$(find /lib/modules/$(uname -r)/ -name 'nvidia.ko')"
test -n "${nvidia_ko_path}" && test -f "${nvidia_ko_path}" || {
- local build_tarball="kmod_${_shortname}_${DRIVER_VERSION}.tar.gz"
+ local build_tarball="kmod_${_shortname}_${DRIVER_VERSION}_nonfree.tar.gz"
local_tarball="${workdir}/${build_tarball}"
local build_dir
if test -v modulus_md5sum && [[ -n "${modulus_md5sum}" ]]
@@ -1170,15 +1195,20 @@ function install_nvidia_userspace_runfile() {
if [[ "$(hostname -s)" =~ ^test && "$(nproc)" < 32 ]] ; then
# when running with fewer than 32 cores, yield to in-progress build
sleep $(( ( RANDOM % 11 ) + 10 ))
- if gcloud storage ls -j "${gcs_tarball}.building" > "${local_tarball}.building.json" ; then
- local build_start_time="$(jq -r .[0].metadata.timeCreated "${local_tarball}.building.json")"
- local build_start_epoch="$(date -d "${build_start_time}" +%s)"
- local timeout_epoch=$((build_start_epoch + 2700)) # 45 minutes
- while gsutil ls -L "${gcs_tarball}.building" ; do
+ if ${gsutil_cmd} ls -j "${gcs_tarball}.building" > "${local_tarball}.building.json" ; then
+ local build_start_time build_start_epoch timeout_epoch
+ if [[ "${gsutil_cmd}" =~ "gsutil" ]] ; then
+ build_start_time="$(${gsutil_cmd} ls -L "${gcs_tarball}.building" | awk -F': +' '/Creation time/ {print $2}')"
+ else
+ build_start_time="$(${gsutil_cmd} ls -j "${gcs_tarball}.building" | jq -r .[0].metadata.timeCreated "${local_tarball}.building.json")"
+ fi
+ build_start_epoch="$(date -d "${build_start_time}" +%s)"
+ timeout_epoch=$((build_start_epoch + 2700)) # 45 minutes
+ while ${gsutil_cmd} ls -L "${gcs_tarball}.building" ; do
local now_epoch="$(date -u +%s)"
if (( now_epoch > timeout_epoch )) ; then
# detect unexpected build failure after 45m
- gsutil rm "${gcs_tarball}.building"
+ ${gsutil_cmd} rm "${gcs_tarball}.building"
break
fi
sleep 5m
@@ -1186,7 +1216,7 @@ function install_nvidia_userspace_runfile() {
fi
fi
- if gsutil ls "${gcs_tarball}" 2>&1 | grep -q "${gcs_tarball}" ; then
+ if ${gsutil_cmd} ls "${gcs_tarball}" ; then
cache_hit="1"
if version_ge "${DRIVER_VERSION}" "${MIN_OPEN_DRIVER_VER}" ; then
runfile_args="${runfile_args} --no-kernel-modules"
@@ -1195,7 +1225,7 @@ function install_nvidia_userspace_runfile() {
else
# build the kernel modules
touch "${local_tarball}.building"
- gcloud storage cp "${local_tarball}.building" "${gcs_tarball}.building"
+ ${gsutil_cmd} cp "${local_tarball}.building" "${gcs_tarball}.building"
building_file="${gcs_tarball}.building"
install_build_dependencies
configure_dkms_certs
@@ -1225,17 +1255,20 @@ function install_nvidia_userspace_runfile() {
--install-libglvnd \
--tmpdir="${tmpdir}"
- if ( is_rocky8 || version_lt "${DRIVER_VERSION}" "${MIN_OPEN_DRIVER_VER}" ) ; then
+ # On rocky8, or when driver version is prior to open driver min, or when GPU architecture is prior to Turing
+ if ( is_rocky8 \
+ || version_lt "${DRIVER_VERSION}" "${MIN_OPEN_DRIVER_VER}" \
+ || [[ "$((16#${pci_device_id}))" < "$((16#1E00))" ]] ) ; then
if [[ "${cache_hit}" == "1" ]] ; then
- gcloud storage cat "${gcs_tarball}" | tar -C / -xzv
+ ${gsutil_cmd} cat "${gcs_tarball}" | tar -C / -xzv
depmod -a
else
clear_dkms_key
tar czvf "${local_tarball}" \
/var/log/nvidia-installer.log \
$(find /lib/modules/${uname_r}/ -iname 'nvidia*.ko')
- gcloud storage cp "${local_tarball}" "${gcs_tarball}"
- if gcloud storage ls "${gcs_tarball}.building" ; then gcloud storage rm "${gcs_tarball}.building" || true ; fi
+ ${gsutil_cmd} cp "${local_tarball}" "${gcs_tarball}"
+ if ${gsutil_cmd} ls "${gcs_tarball}.building" ; then ${gsutil_cmd} rm "${gcs_tarball}.building" || true ; fi
building_file=""
fi
fi
@@ -1314,7 +1347,7 @@ function install_cuda(){
}
function install_nvidia_container_toolkit() {
- is_complete install-nvtk && return
+ is_complete install-nvctk && return
local container_runtime_default
if command -v docker ; then container_runtime_default='docker'
@@ -1332,7 +1365,7 @@ function install_nvidia_container_toolkit() {
nvidia-ctk runtime configure --runtime="${CONTAINER_RUNTIME}"
systemctl restart "${CONTAINER_RUNTIME}"
- mark_complete install-nvtk
+ mark_complete install-nvctk
}
# Install NVIDIA GPU driver provided by NVIDIA
@@ -1370,6 +1403,8 @@ function install_ops_agent(){
cd /opt/google
# https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/installation
curl ${curl_retry_args} -O https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh
+ local expected="038d98644e4c4a7969d26da790946720d278c8d49bb82b677f550c2a2b858411 add-google-cloud-ops-agent-repo.sh"
+
execute_with_retries bash add-google-cloud-ops-agent-repo.sh --also-install
mark_complete ops-agent
@@ -1434,7 +1469,12 @@ function set_hadoop_property() {
}
function configure_yarn_resources() {
- if [[ ! -d "${HADOOP_CONF_DIR}" ]] ; then return 0 ; fi # pre-init scripts
+ if [[ ! -d "${HADOOP_CONF_DIR}" ]] ; then
+ # TODO: when running this script to customize an image, this file
+ # needs to be written *after* bdutil completes
+
+ return 0
+ fi # pre-init scripts
if [[ ! -f "${HADOOP_CONF_DIR}/resource-types.xml" ]]; then
printf '\n' >"${HADOOP_CONF_DIR}/resource-types.xml"
fi
@@ -1538,9 +1578,10 @@ EOF
chmod a+rx "${gpus_resources_script}"
- local spark_defaults_conf="/etc/spark/conf.dist/spark-defaults.conf"
if version_lt "${SPARK_VERSION}" "3.0" ; then return ; fi
+ local spark_defaults_conf="/etc/spark/conf.dist/spark-defaults.conf"
+ local spark_defaults_dir="$(dirname "${spark_defaults_conf}")"
if ! grep spark.executor.resource.gpu.discoveryScript "${spark_defaults_conf}" ; then
echo "spark.executor.resource.gpu.discoveryScript=${gpus_resources_script}" >> "${spark_defaults_conf}"
fi
@@ -1553,6 +1594,9 @@ EOF
# gpu_amount="$(echo $executor_cores | perl -pe "\$_ = ( ${gpu_count} / (\$_ / ${task_cpus}) )")"
gpu_amount="$(perl -e "print 1 / ${executor_cores}")"
+ # TODO: when running this script to customize an image, this file
+ # needs to be written *after* bdutil completes
+
cat >>"${spark_defaults_conf}" < "0" ]] ; then
+ # N.B.: https://pci-ids.ucw.cz/v2.2/pci.ids.xz
+ pci_device_id="$(grep -h -i PCI_ID=10DE /sys/bus/pci/devices/*/uevent | head -1 | awk -F: '{print $2}')"
+ pci_device_id_int="$((16#${pci_device_id}))"
+ case "${pci_device_id}" in
+ "15F8" ) gpu_type="nvidia-tesla-p100" ;;
+ "1BB3" ) gpu_type="nvidia-tesla-p4" ;;
+ "1DB1" ) gpu_type="nvidia-tesla-v100" ;;
+ "1EB8" ) gpu_type="nvidia-tesla-t4" ;;
+ "20*" ) gpu_type="nvidia-tesla-a100" ;;
+ "23*" ) gpu_type="nvidia-h100" ;; # install does not begin with image 2.0.68-debian10/cuda11.1
+ "27B8" ) gpu_type="nvidia-l4" ;; # install does not complete with image 2.0.68-debian10/cuda11.1
+ esac
+
+ ACCELERATOR="type=${gpu_type},count=${gpu_count}"
+ fi
+
nvsmi_works="0"
if is_cuda11 ; then gcc_ver="11"
@@ -1862,11 +1929,11 @@ function cache_fetched_package() {
local gcs_fn="$2"
local local_fn="$3"
- if gsutil ls "${gcs_fn}" 2>&1 | grep -q "${gcs_fn}" ; then
- time gcloud storage cp "${gcs_fn}" "${local_fn}"
+ if ${gsutil_cmd} ls "${gcs_fn}" 2>&1 ; then
+ time ${gsutil_cmd} cp "${gcs_fn}" "${local_fn}"
else
time ( curl ${curl_retry_args} "${src_url}" -o "${local_fn}" && \
- gcloud storage cp "${local_fn}" "${gcs_fn}" ; )
+ ${gsutil_cmd} cp "${local_fn}" "${gcs_fn}" ; )
fi
}
@@ -1881,8 +1948,8 @@ function clean_up_sources_lists() {
local regional_bigtop_repo_uri
regional_bigtop_repo_uri=$(cat ${dataproc_repo_file} |
- sed "s#/dataproc-bigtop-repo/#/goog-dataproc-bigtop-repo-${region}/#" |
- grep "deb .*goog-dataproc-bigtop-repo-${region}.* dataproc contrib" |
+ sed -E "s#/dataproc-bigtop-repo(-dev)?/#/goog-dataproc-bigtop-repo\\1-${region}/#" |
+ grep -E "deb .*goog-dataproc-bigtop-repo(-dev)?-${region}.* dataproc contrib" |
cut -d ' ' -f 2 |
head -1)
@@ -1908,12 +1975,17 @@ function clean_up_sources_lists() {
local -r key_url="https://packages.adoptium.net/artifactory/api/gpg/key/public"
local -r adoptium_kr_path="/usr/share/keyrings/adoptium.gpg"
rm -f "${adoptium_kr_path}"
- curl ${curl_retry_args} "${key_url}" \
- | gpg --dearmor -o "${adoptium_kr_path}"
+ local -r old_adoptium_list="/etc/apt/sources.list.d/adoptopenjdk.list"
+ if test -f "${old_adoptium_list}" ; then
+ rm -f "${old_adoptium_list}"
+ fi
+ for keyid in "0x3b04d753c9050d9a5d343f39843c48a565f8f04b" "0x35baa0b33e9eb396f59ca838c0ba5ce6dc6315a3" ; do
+ curl ${curl_retry_args} "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" \
+ | gpg --import --no-default-keyring --keyring "${adoptium_kr_path}"
+ done
echo "deb [signed-by=${adoptium_kr_path}] https://packages.adoptium.net/artifactory/deb/ $(os_codename) main" \
> /etc/apt/sources.list.d/adoptium.list
-
#
# docker
#
@@ -1923,20 +1995,22 @@ function clean_up_sources_lists() {
rm -f "${docker_kr_path}"
curl ${curl_retry_args} "${docker_key_url}" \
- | gpg --dearmor -o "${docker_kr_path}"
+ | gpg --import --no-default-keyring --keyring "${docker_kr_path}"
echo "deb [signed-by=${docker_kr_path}] https://download.docker.com/linux/$(os_id) $(os_codename) stable" \
> ${docker_repo_file}
#
# google cloud + logging/monitoring
#
- if ls /etc/apt/sources.list.d/google-cloud*.list ; then
- rm -f /usr/share/keyrings/cloud.google.gpg
- curl ${curl_retry_args} https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
+ local gcloud_kr_path="/usr/share/keyrings/cloud.google.gpg"
+ if ls /etc/apt/sources.list.d/google-clou*.list ; then
+ rm -f "${gcloud_kr_path}"
+ curl ${curl_retry_args} https://packages.cloud.google.com/apt/doc/apt-key.gpg \
+ | gpg --import --no-default-keyring --keyring "${gcloud_kr_path}"
for list in google-cloud google-cloud-logging google-cloud-monitoring ; do
list_file="/etc/apt/sources.list.d/${list}.list"
if [[ -f "${list_file}" ]]; then
- sed -i -e 's:deb https:deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https:g' "${list_file}"
+ sed -i -e "s:deb https:deb [signed-by=${gcloud_kr_path}] https:g" "${list_file}"
fi
done
fi
@@ -1945,12 +2019,13 @@ function clean_up_sources_lists() {
# cran-r
#
if [[ -f /etc/apt/sources.list.d/cran-r.list ]]; then
- keyid="0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7"
- if is_ubuntu18 ; then keyid="0x51716619E084DAB9"; fi
- rm -f /usr/share/keyrings/cran-r.gpg
- curl ${curl_retry_args} "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" | \
- gpg --dearmor -o /usr/share/keyrings/cran-r.gpg
- sed -i -e 's:deb http:deb [signed-by=/usr/share/keyrings/cran-r.gpg] http:g' /etc/apt/sources.list.d/cran-r.list
+ local cranr_kr_path="/usr/share/keyrings/cran-r.gpg"
+ rm -f "${cranr_kr_path}"
+ for keyid in "0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7" "0xe298a3a825c0d65dfd57cbb651716619e084dab9" ; do
+ curl ${curl_retry_args} "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" \
+ | gpg --import --no-default-keyring --keyring "${cranr_kr_path}"
+ done
+ sed -i -e "s:deb http:deb [signed-by=${cranr_kr_path}] http:g" /etc/apt/sources.list.d/cran-r.list
fi
#
@@ -1973,7 +2048,7 @@ function exit_handler() {
# clean up incomplete build indicators
if test -n "${building_file}" ; then
- if gcloud storage ls "${building_file}" ; then gcloud storage rm "${building_file}" || true ; fi
+ if ${gsutil_cmd} ls "${building_file}" ; then ${gsutil_cmd} rm "${building_file}" || true ; fi
fi
set +ex
@@ -2046,15 +2121,19 @@ function exit_handler() {
#/dev/vda2 7096908 2611344 4182932 39% /
df / | tee -a "/run/disk-usage.log"
- perl -e '@siz=( sort { $a => $b }
- map { (split)[2] =~ /^(\d+)/ }
- grep { m:^/: } );
-$max=$siz[0]; $min=$siz[-1]; $starting="unknown"; $inc=q{$max-$starting};
-print( " samples-taken: ", scalar @siz, $/,
+ perl -e '($first, @samples) = grep { m:^/: } ;
+ unshift(@samples,$first); $final=$samples[-1];
+ ($starting)=(split(/\s+/,$first))[2] =~ /^(\d+)/;
+ ($ending)=(split(/\s+/,$final))[2] =~ /^(\d+)/;
+ @siz=( sort { $a => $b }
+ map { (split)[2] =~ /^(\d+)/ } @samples );
+$max=$siz[0]; $min=$siz[-1]; $inc=$max-$starting;
+print( " samples-taken: ", scalar @siz, $/,
"starting-disk-used: $starting", $/,
- "maximum-disk-used: $max", $/,
- "minimum-disk-used: $min", $/,
- " increased-by: $inc", $/ )' < "/run/disk-usage.log"
+ " ending-disk-used: $ending", $/,
+ " maximum-disk-used: $max", $/,
+ " minimum-disk-used: $min", $/,
+ " increased-by: $inc", $/ )' < "/run/disk-usage.log"
echo "exit_handler has completed"
@@ -2074,7 +2153,6 @@ function set_proxy(){
if [[ -z "${METADATA_HTTP_PROXY}" ]] ; then return ; fi
- export METADATA_HTTP_PROXY
export http_proxy="${METADATA_HTTP_PROXY}"
export https_proxy="${METADATA_HTTP_PROXY}"
export HTTP_PROXY="${METADATA_HTTP_PROXY}"
@@ -2144,10 +2222,18 @@ function harden_sshd_config() {
}
function prepare_to_install(){
+ readonly uname_r=$(uname -r)
# Verify OS compatability and Secure boot state
check_os
check_secure_boot
+ # With the 402.0.0 release of gcloud sdk, `gcloud storage` can be
+ # used as a more performant replacement for `gsutil`
+ gsutil_cmd="gcloud storage"
+ gcloud_sdk_version="$(gcloud --version | awk -F'SDK ' '/Google Cloud SDK/ {print $2}')"
+ if version_lt "${gcloud_sdk_version}" "402.0.0" ; then
+ gsutil_cmd="gsutil -o GSUtil:check_hashes=never"
+ fi
curl_retry_args="-fsSL --retry-connrefused --retry 10 --retry-max-time 30"
prepare_gpu_env