From 9907743c886014f26e28247a1bb68c059477514c Mon Sep 17 00:00:00 2001 From: Vamshi Krishna Gajendram Date: Sat, 13 Jan 2024 00:26:09 +0530 Subject: [PATCH] Init action for installing ops agent (#1124) --- opsagent/README.md | 40 ++++++++++++++++++++++++++++++++ opsagent/opsagent.sh | 30 ++++++++++++++++++++++++ opsagent/opsagent_nosyslog.sh | 43 +++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 opsagent/README.md create mode 100644 opsagent/opsagent.sh create mode 100644 opsagent/opsagent_nosyslog.sh diff --git a/opsagent/README.md b/opsagent/README.md new file mode 100644 index 000000000..232b2c416 --- /dev/null +++ b/opsagent/README.md @@ -0,0 +1,40 @@ +# Ops Agent + +With [Dataproc 2.2 image version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-release-2.2), we recommend installing [Google Cloud Ops Agent](https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent) to obtain system metrics. + +This initialization action will install the Ops Agent on a [Google Cloud Dataproc](https://cloud.google.com/dataproc) cluster and provide similar metrics as the [`--metric-sources=monitoring-agent-defaults`](https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#monitoring_agent_metrics) setting which was supported until Dataproc 2.1. +[This page](https://cloud.google.com/monitoring/api/metrics_agent#oagent-vs-magent) highlights differences in metric collection between the Ops Agent and the legacy monitoring agent. + +We provide two variants of this initialization action: +- `opsagent.sh` installs the Ops Agent. [By default](https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default), it collects syslogs and system (node) metrics. +- `opsagent_nosyslog.sh` installs the Ops Agent and also specifies a user configuration in order to skip syslogs collection from your cluster nodes. If the user configuration is not specified, Ops Agent will collect syslogs besides the system (node) metrics. You can further customize this configuration to collect logs and metrics from other third-party applications. + +If you are looking to match the behavior of Dataproc image versions up to 2.1 with `--metric-sources=monitoring-agent-defaults`, which did not ingest syslogs from Dataproc cluster nodes, please use `opsagent_nosyslog.sh`. + +## Using this initialization action + +**:warning: NOTICE:** See +[best practices](/README.md#how-initialization-actions-are-used) of using +initialization actions in production. + +## Install the Ops Agent collecting system metrics only (no syslogs) + +```bash +REGION= +CLUSTER_NAME= +gcloud dataproc clusters create ${CLUSTER_NAME} \ + --image-version=2.2 \ + --region=${REGION} \ + --initialization-actions=gs://goog-dataproc-initialization-actions-${REGION}/opsagent/opsagent_nosyslog.sh +``` + +## Install the Ops Agent with default configuration + +```bash +REGION= +CLUSTER_NAME= +gcloud dataproc clusters create ${CLUSTER_NAME} \ + --image-version=2.2 \ + --region=${REGION} \ + --initialization-actions=gs://goog-dataproc-initialization-actions-${REGION}/opsagent/opsagent.sh +``` diff --git a/opsagent/opsagent.sh b/opsagent/opsagent.sh new file mode 100644 index 000000000..b17b0cd6c --- /dev/null +++ b/opsagent/opsagent.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script installs the Google Cloud Ops Agent on each node in the cluster. +# See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default +# for built-in configuration of Ops Agent. + +# Detect dataproc image version from its various names +if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then + DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" +fi + +if [[ $(echo "${DATAPROC_IMAGE_VERSION} < 2.2" | bc -l) == 1 ]]; then + echo "This Dataproc cluster node runs image version ${DATAPROC_IMAGE_VERSION} with pre-installed legacy monitoring agent. Skipping Ops Agent installation." + exit 0 +fi + +curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh +bash add-google-cloud-ops-agent-repo.sh --also-install diff --git a/opsagent/opsagent_nosyslog.sh b/opsagent/opsagent_nosyslog.sh new file mode 100644 index 000000000..95f98c9ad --- /dev/null +++ b/opsagent/opsagent_nosyslog.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script installs the Google Cloud Ops Agent on each node in the cluster. +# It also provides an override to the built-in logging config to set empty +# receivers i.e. not collect any logs. +# If you need to collect syslogs, you can use the other script in this directory, +# opsagent.sh which uses the built-in configuration of Ops Agent. +# See https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/configuration#default. + +# Detect dataproc image version from its various names +if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then + DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" +fi + +if [[ $(echo "${DATAPROC_IMAGE_VERSION} < 2.2" | bc -l) == 1 ]]; then + echo "This Dataproc cluster node runs image version ${DATAPROC_IMAGE_VERSION} with pre-installed legacy monitoring agent. Skipping Ops Agent installation." + exit 0 +fi + +curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh +bash add-google-cloud-ops-agent-repo.sh --also-install + +cat <> /etc/google-cloud-ops-agent/config.yaml +logging: + service: + pipelines: + default_pipeline: + receivers: [] +EOF + +systemctl restart google-cloud-ops-agent