diff --git a/examples/vpc-with-multiple-az/main.tf b/examples/vpc-with-multiple-az/main.tf index 23e30e2..9a9043b 100644 --- a/examples/vpc-with-multiple-az/main.tf +++ b/examples/vpc-with-multiple-az/main.tf @@ -28,3 +28,10 @@ module "graphdb" { ami_id = var.ami_id graphdb_version = var.graphdb_version } + +module "monitoring" { + source = "./monitoring" + + aws_region = var.aws_region + resource_name_prefix = var.resource_name_prefix +} \ No newline at end of file diff --git a/examples/vpc-with-multiple-az/monitoring/README.md b/examples/vpc-with-multiple-az/monitoring/README.md new file mode 100644 index 0000000..a9b6977 --- /dev/null +++ b/examples/vpc-with-multiple-az/monitoring/README.md @@ -0,0 +1,15 @@ +# GraphDB AWS Monitoring Module + +This module creates the necessary configurations to set up a Cloudwatch dashboard with all the necessary widgets that show the health of the graphdb cluster. Those configurations should be by tenant. + +## Widgets +Here are created the widgets that are required to monitor the load and the health of graphdb. Those include things like: +* CPU Load +* Free Memory +* Node Health +* and etc. + +The widgets are configured with queries and should be usable for each new SaaS offering. + +## Alarms +In case of abnormal behavior like node going down or CPU goes high alarms are defined to send notifications for the cluster health. diff --git a/modules/monitoring/main.tf b/examples/vpc-with-multiple-az/monitoring/main.tf similarity index 80% rename from modules/monitoring/main.tf rename to examples/vpc-with-multiple-az/monitoring/main.tf index 3c712d5..ed3c8ca 100644 --- a/modules/monitoring/main.tf +++ b/examples/vpc-with-multiple-az/monitoring/main.tf @@ -11,7 +11,7 @@ resource "aws_cloudwatch_dashboard" "main" { "type": "metric", "properties": { "metrics": [ - [ { "expression": "SELECT AVG(graphdb_cpu_load) FROM \"GraphDB-Metrics\" GROUP BY host", "id": "q1", "label": "CPU", "${var.aws_region}": "eu-central-1", "stat": "Average" } ] + [ { "expression": "SELECT AVG(graphdb_cpu_load) FROM \"${var.resource_name_prefix}-graphdb\" GROUP BY host", "id": "q1", "label": "CPU", "region": var.aws_region, "stat": "Average" } ] ], "region": var.aws_region, "stacked": false, diff --git a/modules/monitoring/outputs.tf b/examples/vpc-with-multiple-az/monitoring/outputs.tf similarity index 100% rename from modules/monitoring/outputs.tf rename to examples/vpc-with-multiple-az/monitoring/outputs.tf diff --git a/modules/monitoring/variables.tf b/examples/vpc-with-multiple-az/monitoring/variables.tf similarity index 100% rename from modules/monitoring/variables.tf rename to examples/vpc-with-multiple-az/monitoring/variables.tf diff --git a/examples/vpc-with-multiple-az/monitoring/versions.tf b/examples/vpc-with-multiple-az/monitoring/versions.tf new file mode 100644 index 0000000..761245a --- /dev/null +++ b/examples/vpc-with-multiple-az/monitoring/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.4.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.15" + } + } +} diff --git a/modules/iam/main.tf b/modules/iam/main.tf index ee17888..71b0f19 100644 --- a/modules/iam/main.tf +++ b/modules/iam/main.tf @@ -3,6 +3,11 @@ resource "aws_iam_instance_profile" "graphdb" { role = var.user_supplied_iam_role_name != null ? var.user_supplied_iam_role_name : aws_iam_role.graphdb[0].name } +resource "aws_iam_role_policy_attachment" "cloudwatch-agent-policy" { + role = aws_iam_role.graphdb[0].id + policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy" +} + resource "aws_iam_role" "graphdb" { count = var.user_supplied_iam_role_name != null ? 0 : 1 name_prefix = "${var.resource_name_prefix}-graphdb-" diff --git a/modules/monitoring/README.md b/modules/monitoring/README.md deleted file mode 100644 index 6705d1a..0000000 --- a/modules/monitoring/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# GraphDB AWS Configuration Module - -TBD diff --git a/modules/user_data/main.tf b/modules/user_data/main.tf index 643d7c5..1601317 100644 --- a/modules/user_data/main.tf +++ b/modules/user_data/main.tf @@ -27,6 +27,7 @@ locals { zone_id = var.zone_id jvm_max_memory = local.jvm_max_memory + resource_name_prefix = var.resource_name_prefix } ) } diff --git a/modules/user_data/templates/start_graphdb.sh.tpl b/modules/user_data/templates/start_graphdb.sh.tpl index f7679cb..618ffe6 100644 --- a/modules/user_data/templates/start_graphdb.sh.tpl +++ b/modules/user_data/templates/start_graphdb.sh.tpl @@ -231,11 +231,14 @@ echo 'fs.file-max = 262144' | tee -a /etc/sysctl.conf sysctl -p -chown -R graphdb:graphdb /etc/graphdb/ /etc/prometheus/ +tmp=$(mktemp) +jq '.logs.metrics_collected.prometheus.log_group_name = "${resource_name_prefix}-graphdb"' /etc/graphdb/cloudwatch-agent-config.json > "$tmp" && mv "$tmp" /etc/graphdb/cloudwatch-agent-config.json +jq '.logs.metrics_collected.prometheus.emf_processor.metric_namespace = "${resource_name_prefix}-graphdb"' /etc/graphdb/cloudwatch-agent-config.json > "$tmp" && mv "$tmp" /etc/graphdb/cloudwatch-agent-config.json +amazon-cloudwatch-agent-ctl -a start amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/etc/graphdb/cloudwatch-agent-config.json # the proxy service is set up in the AMI but not enabled there, so we enable and start it systemctl daemon-reload systemctl start graphdb systemctl enable graphdb-cluster-proxy.service -systemctl start graphdb-cluster-proxy.service +systemctl start graphdb-cluster-proxy.service \ No newline at end of file