Skip to content

Commit

Permalink
GDB-286 Added monitoring for graphdb
Browse files Browse the repository at this point in the history
* Added the necessary policy for the cloudwatch-agent
* Modified the user data script to enable the cloudwatch agent and configure it for the current tenant
* Added sample dashboard with a widget that monitors the cpu
  • Loading branch information
ivorusev committed Oct 13, 2023
1 parent 1e0b374 commit b391f45
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 6 deletions.
7 changes: 7 additions & 0 deletions examples/vpc-with-multiple-az/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,10 @@ module "graphdb" {
ami_id = var.ami_id
graphdb_version = var.graphdb_version
}

module "monitoring" {
source = "./monitoring"

aws_region = var.aws_region
resource_name_prefix = var.resource_name_prefix
}
15 changes: 15 additions & 0 deletions examples/vpc-with-multiple-az/monitoring/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# GraphDB AWS Monitoring Module

This module creates the necessary configurations to set up a Cloudwatch dashboard with all the necessary widgets that show the health of the graphdb cluster. Those configurations should be by tenant.

## Widgets
Here are created the widgets that are required to monitor the load and the health of graphdb. Those include things like:
* CPU Load
* Free Memory
* Node Health
* and etc.

The widgets are configured with queries and should be usable for each new SaaS offering.

## Alarms
In case of abnormal behavior like node going down or CPU goes high alarms are defined to send notifications for the cluster health.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ resource "aws_cloudwatch_dashboard" "main" {
"type": "metric",
"properties": {
"metrics": [
[ { "expression": "SELECT AVG(graphdb_cpu_load) FROM \"GraphDB-Metrics\" GROUP BY host", "id": "q1", "label": "CPU", "${var.aws_region}": "eu-central-1", "stat": "Average" } ]
[ { "expression": "SELECT AVG(graphdb_cpu_load) FROM \"${var.resource_name_prefix}-graphdb\" GROUP BY host", "id": "q1", "label": "CPU", "region": var.aws_region, "stat": "Average" } ]
],
"region": var.aws_region,
"stacked": false,
Expand Down
File renamed without changes.
File renamed without changes.
10 changes: 10 additions & 0 deletions examples/vpc-with-multiple-az/monitoring/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
terraform {
required_version = ">= 1.4.0"

required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.15"
}
}
}
5 changes: 5 additions & 0 deletions modules/iam/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ resource "aws_iam_instance_profile" "graphdb" {
role = var.user_supplied_iam_role_name != null ? var.user_supplied_iam_role_name : aws_iam_role.graphdb[0].name
}

resource "aws_iam_role_policy_attachment" "cloudwatch-agent-policy" {
role = aws_iam_role.graphdb[0].id
policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"
}

resource "aws_iam_role" "graphdb" {
count = var.user_supplied_iam_role_name != null ? 0 : 1
name_prefix = "${var.resource_name_prefix}-graphdb-"
Expand Down
3 changes: 0 additions & 3 deletions modules/monitoring/README.md

This file was deleted.

1 change: 1 addition & 0 deletions modules/user_data/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ locals {
zone_id = var.zone_id

jvm_max_memory = local.jvm_max_memory
resource_name_prefix = var.resource_name_prefix
}
)
}
7 changes: 5 additions & 2 deletions modules/user_data/templates/start_graphdb.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,14 @@ echo 'fs.file-max = 262144' | tee -a /etc/sysctl.conf

sysctl -p

chown -R graphdb:graphdb /etc/graphdb/ /etc/prometheus/
tmp=$(mktemp)
jq '.logs.metrics_collected.prometheus.log_group_name = "${resource_name_prefix}-graphdb"' /etc/graphdb/cloudwatch-agent-config.json > "$tmp" && mv "$tmp" /etc/graphdb/cloudwatch-agent-config.json
jq '.logs.metrics_collected.prometheus.emf_processor.metric_namespace = "${resource_name_prefix}-graphdb"' /etc/graphdb/cloudwatch-agent-config.json > "$tmp" && mv "$tmp" /etc/graphdb/cloudwatch-agent-config.json
amazon-cloudwatch-agent-ctl -a start
amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/etc/graphdb/cloudwatch-agent-config.json

# the proxy service is set up in the AMI but not enabled there, so we enable and start it
systemctl daemon-reload
systemctl start graphdb
systemctl enable graphdb-cluster-proxy.service
systemctl start graphdb-cluster-proxy.service
systemctl start graphdb-cluster-proxy.service

0 comments on commit b391f45

Please sign in to comment.