Add support for autoscaling replicas (#13)

* Add support for autoscaling replicas * Add test case for autoscaling
claranet · Mar 27, 2018 · 9e0246c · 9e0246c
1 parent ea819e4
commit 9e0246c
Show file tree

Hide file tree

Showing 4 changed files with 161 additions and 38 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@ Gives you:
  - An Aurora DB instance + 'n' number of additional instances
  - Optionally RDS 'Enhanced Monitoring' + associated required IAM role/policy (by simply setting the `monitoring_interval` param to > `0`
  - Optionally sensible alarms to SNS (high CPU, high connections, slow replication)
-
+ - Optionally configure autoscaling for read replicas (MySQL clusters only)
 
 ## Contributing
 
@@ -24,6 +24,9 @@ This README is generated with [terraform-docs](https://github.com/segmentio/terr
 Changing the parameter group in use requires a restart of the DB cluster, modifying parameters within a group
 may not (depending on the parameter being altered)
 
+## Known issues
+AWS doesn't automatically remove RDS instances created from autoscaling when you remove the autoscaling rules and this can cause issues when using Terraform to destroy the cluster.  To work around this, you should make sure there are no automatically created RDS instances running before attempting to destroy a cluster.
+
 ### Aurora 1.x (MySQL 5.6)
 
 
@@ -158,40 +161,46 @@ resource "aws_rds_cluster_parameter_group" "aurora_cluster_postgres96_parameter_
 
 ## Inputs
 
-| Name | Description | Default | Required |
-|------|-------------|:-----:|:-----:|
-| apply_immediately | Determines whether or not any DB modifications are applied immediately, or during the maintenance window | `false` | no |
-| auto_minor_version_upgrade | Determines whether minor engine upgrades will be performed automatically in the maintenance window | `true` | no |
-| azs | List of AZs to use | - | yes |
-| backup_retention_period | How long to keep backups for (in days) | `7` | no |
-| cw_alarms | Whether to enable CloudWatch alarms - requires `cw_sns_topic` is specified | `false` | no |
-| cw_max_conns | Connection count beyond which to trigger a CloudWatch alarm | `500` | no |
-| cw_max_cpu | CPU threshold above which to alarm | `85` | no |
-| cw_max_replica_lag | Maximum Aurora replica lag in milliseconds above which to alarm | `2000` | no |
-| cw_sns_topic | An SNS topic to publish CloudWatch alarms to | `false` | no |
-| db_cluster_parameter_group_name | The name of a DB Cluster parameter group to use | `default.aurora5.6` | no |
-| db_parameter_group_name | The name of a DB parameter group to use | `default.aurora5.6` | no |
-| engine | Aurora database engine type, currently aurora, aurora-mysql or aurora-postgresql | `aurora` | no |
-| engine-version | Aurora database engine version. | `5.6.10a` | no |
-| envname | Environment name (eg,test, stage or prod) | - | yes |
-| envtype | Environment type (eg,prod or nonprod) | - | yes |
-| final_snapshot_identifier | The name to use when creating a final snapshot on cluster destroy, appends a random 8 digits to name to ensure it's unique too. | `final` | no |
-| identifier_prefix | Prefix for cluster and instance identifier | `` | no |
-| instance_type | Instance type to use | `db.t2.small` | no |
-| monitoring_interval | The interval (seconds) between points when Enhanced Monitoring metrics are collected | `0` | no |
-| name | Name given to DB subnet group | - | yes |
-| password | Master DB password | - | yes |
-| port | The port on which to accept connections | `3306` | no |
-| preferred_backup_window | When to perform DB backups | `02:00-03:00` | no |
-| preferred_maintenance_window | When to perform DB maintenance | `sun:05:00-sun:06:00` | no |
-| publicly_accessible | Whether the DB should have a public IP address | `false` | no |
-| replica_count | Number of reader nodes to create | `0` | no |
-| security_groups | VPC Security Group IDs | - | yes |
-| skip_final_snapshot | Should a final snapshot be created on cluster destroy | `false` | no |
-| snapshot_identifier | DB snapshot to create this database from | `` | no |
-| storage_encrypted | Specifies whether the underlying storage layer should be encrypted | `true` | no |
-| subnets | List of subnet IDs to use | - | yes |
-| username | Master DB username | `root` | no |
+| Name | Description | Type | Default | Required |
+|------|-------------|:----:|:-----:|:-----:|
+| apply_immediately | Determines whether or not any DB modifications are applied immediately, or during the maintenance window | string | `false` | no |
+| auto_minor_version_upgrade | Determines whether minor engine upgrades will be performed automatically in the maintenance window | string | `true` | no |
+| azs | List of AZs to use | list | - | yes |
+| backup_retention_period | How long to keep backups for (in days) | string | `7` | no |
+| cw_alarms | Whether to enable CloudWatch alarms - requires `cw_sns_topic` is specified | string | `false` | no |
+| cw_max_conns | Connection count beyond which to trigger a CloudWatch alarm | string | `500` | no |
+| cw_max_cpu | CPU threshold above which to alarm | string | `85` | no |
+| cw_max_replica_lag | Maximum Aurora replica lag in milliseconds above which to alarm | string | `2000` | no |
+| cw_sns_topic | An SNS topic to publish CloudWatch alarms to | string | `false` | no |
+| db_cluster_parameter_group_name | The name of a DB Cluster parameter group to use | string | `default.aurora5.6` | no |
+| db_parameter_group_name | The name of a DB parameter group to use | string | `default.aurora5.6` | no |
+| engine | Aurora database engine type, currently aurora, aurora-mysql or aurora-postgresql | string | `aurora` | no |
+| engine-version | Aurora database engine version. | string | `5.6.10a` | no |
+| envname | Environment name (eg,test, stage or prod) | string | - | yes |
+| envtype | Environment type (eg,prod or nonprod) | string | - | yes |
+| final_snapshot_identifier | The name to use when creating a final snapshot on cluster destroy, appends a random 8 digits to name to ensure it's unique too. | string | `final` | no |
+| identifier_prefix | Prefix for cluster and instance identifier | string | `` | no |
+| instance_type | Instance type to use | string | `db.t2.small` | no |
+| monitoring_interval | The interval (seconds) between points when Enhanced Monitoring metrics are collected | string | `0` | no |
+| name | Name given to DB subnet group | string | - | yes |
+| password | Master DB password | string | - | yes |
+| port | The port on which to accept connections | string | `3306` | no |
+| preferred_backup_window | When to perform DB backups | string | `02:00-03:00` | no |
+| preferred_maintenance_window | When to perform DB maintenance | string | `sun:05:00-sun:06:00` | no |
+| publicly_accessible | Whether the DB should have a public IP address | string | `false` | no |
+| replica_count | Number of reader nodes to create.  If `replica_scale_enable` is `true`, the value of `replica_scale_min` is used instead. | string | `0` | no |
+| replica_scale_cpu | CPU usage to trigger autoscaling at | string | `70` | no |
+| replica_scale_enabled | Whether to enable autoscaling for RDS Aurora (MySQL) read replicas | string | `false` | no |
+| replica_scale_in_cooldown | Cooldown in seconds before allowing further scaling operations after a scale in | string | `300` | no |
+| replica_scale_max | Maximum number of replicas to allow scaling for | string | `0` | no |
+| replica_scale_min | Maximum number of replicas to allow scaling for | string | `2` | no |
+| replica_scale_out_cooldown | Cooldown in seconds before allowing further scaling operations after a scale out | string | `300` | no |
+| security_groups | VPC Security Group IDs | list | - | yes |
+| skip_final_snapshot | Should a final snapshot be created on cluster destroy | string | `false` | no |
+| snapshot_identifier | DB snapshot to create this database from | string | `` | no |
+| storage_encrypted | Specifies whether the underlying storage layer should be encrypted | string | `true` | no |
+| subnets | List of subnet IDs to use | list | - | yes |
+| username | Master DB username | string | `root` | no |
 
 ## Outputs
 

diff --git a/main.tf b/main.tf
@@ -10,7 +10,7 @@
   *  - An Aurora DB instance + 'n' number of additional instances
   *  - Optionally RDS 'Enhanced Monitoring' + associated required IAM role/policy (by simply setting the `monitoring_interval` param to > `0`
   *  - Optionally sensible alarms to SNS (high CPU, high connections, slow replication)
-  *
+  *  - Optionally configure autoscaling for read replicas (MySQL clusters only)
   *
   * ## Contributing
   *
@@ -25,6 +25,9 @@
   * Changing the parameter group in use requires a restart of the DB cluster, modifying parameters within a group
   * may not (depending on the parameter being altered)
   *
+  * ## Known issues
+  * AWS doesn't automatically remove RDS instances created from autoscaling when you remove the autoscaling rules and this can cause issues when using Terraform to destroy the cluster.  To work around this, you should make sure there are no automatically created RDS instances running before attempting to destroy a cluster.
+  *
   * ### Aurora 1.x (MySQL 5.6)
   *
   * 
@@ -195,7 +198,7 @@ resource "aws_rds_cluster_instance" "cluster_instance_0" {
 // Create 'n' number of additional DB instance(s) in same cluster
 resource "aws_rds_cluster_instance" "cluster_instance_n" {
   depends_on                   = ["aws_rds_cluster_instance.cluster_instance_0"]
-  count                        = "${var.replica_count}"
+  count                        = "${var.replica_scale_enabled ? var.replica_scale_min : var.replica_count}"
   engine                       = "${var.engine}"
   engine_version               = "${var.engine-version}"
   identifier                   = "${var.identifier_prefix != "" ? format("%s-node-%d", var.identifier_prefix, count.index + 1) : format("%s-aurora-node-%d", var.envname, count.index + 1)}"
@@ -272,3 +275,33 @@ resource "aws_iam_role_policy_attachment" "rds-enhanced-monitoring-policy-attach
   role       = "${aws_iam_role.rds-enhanced-monitoring.name}"
   policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonRDSEnhancedMonitoringRole"
 }
+
+// Autoscaling
+resource "aws_appautoscaling_target" "autoscaling" {
+  count              = "${var.replica_scale_enabled ? 1 : 0}"
+  max_capacity       = "${var.replica_scale_max}"
+  min_capacity       = "${var.replica_scale_min}"
+  resource_id        = "cluster:${aws_rds_cluster.default.cluster_identifier}"
+  scalable_dimension = "rds:cluster:ReadReplicaCount"
+  service_namespace  = "rds"
+}
+
+resource "aws_appautoscaling_policy" "autoscaling" {
+  count              = "${var.replica_scale_enabled ? 1 : 0}"
+  depends_on         = ["aws_appautoscaling_target.autoscaling"]
+  name               = "target-metric"
+  policy_type        = "TargetTrackingScaling"
+  resource_id        = "cluster:${aws_rds_cluster.default.cluster_identifier}"
+  scalable_dimension = "rds:cluster:ReadReplicaCount"
+  service_namespace  = "rds"
+
+  target_tracking_scaling_policy_configuration {
+    predefined_metric_specification {
+      predefined_metric_type = "RDSReaderAverageCPUUtilization"
+    }
+
+    scale_in_cooldown  = "${var.replica_scale_in_cooldown}"
+    scale_out_cooldown = "${var.replica_scale_out_cooldown}"
+    target_value       = "${var.replica_scale_cpu}"
+  }
+}
diff --git a/tests/terraform/test-mysql-57-autoscaling.tf b/tests/terraform/test-mysql-57-autoscaling.tf
@@ -0,0 +1,45 @@
+resource "aws_sns_topic" "db_alarms_57_autoscaling" {
+  name = "aurora-db-alarms-57-autoscaling"
+}
+
+module "aurora_db_57_autoscaling" {
+  source                          = "../.."
+  engine                          = "aurora-mysql"
+  engine-version                  = "5.7.12"
+  name                            = "test-aurora-db-57-autoscaling"
+  envname                         = "test-57-autoscaling"
+  envtype                         = "test"
+  subnets                         = ["${module.vpc.private_subnets}"]
+  azs                             = ["${module.vpc.availability_zones}"]
+  security_groups                 = ["${aws_security_group.allow_all.id}"]
+  instance_type                   = "db.t2.medium"
+  username                        = "root"
+  password                        = "changeme"
+  backup_retention_period         = "5"
+  final_snapshot_identifier       = "final-db-snapshot-prod"
+  storage_encrypted               = "true"
+  apply_immediately               = "true"
+  monitoring_interval             = "10"
+  cw_alarms                       = true
+  cw_sns_topic                    = "${aws_sns_topic.db_alarms_57_autoscaling.id}"
+  db_parameter_group_name         = "${aws_db_parameter_group.aurora_db_57_autoscaling_parameter_group.id}"
+  db_cluster_parameter_group_name = "${aws_rds_cluster_parameter_group.aurora_57_autoscaling_cluster_parameter_group.id}"
+  replica_scale_enabled           = true
+  replica_scale_min               = "1"
+  replica_scale_max               = "1"
+  replica_scale_cpu               = "70"
+  replica_scale_in_cooldown       = "300"
+  replica_scale_out_cooldown      = "300"
+}
+
+resource "aws_db_parameter_group" "aurora_db_57_autoscaling_parameter_group" {
+  name        = "test-aurora-db-57-autoscaling-parameter-group"
+  family      = "aurora-mysql5.7"
+  description = "test-aurora-db-57-autoscaling-parameter-group"
+}
+
+resource "aws_rds_cluster_parameter_group" "aurora_57_autoscaling_cluster_parameter_group" {
+  name        = "test-aurora-57-autoscaling-cluster-parameter-group"
+  family      = "aurora-mysql5.7"
+  description = "test-aurora-57-autoscaling-cluster-parameter-group"
+}
diff --git a/variables.tf b/variables.tf
@@ -32,7 +32,7 @@ variable "azs" {
 variable "replica_count" {
   type        = "string"
   default     = "0"
-  description = "Number of reader nodes to create"
+  description = "Number of reader nodes to create.  If `replica_scale_enable` is `true`, the value of `replica_scale_min` is used instead."
 }
 
 variable "security_groups" {
@@ -181,3 +181,39 @@ variable "engine-version" {
   default     = "5.6.10a"
   description = "Aurora database engine version."
 }
+
+variable "replica_scale_enabled" {
+  type        = "string"
+  default     = false
+  description = "Whether to enable autoscaling for RDS Aurora (MySQL) read replicas"
+}
+
+variable "replica_scale_max" {
+  type        = "string"
+  default     = "0"
+  description = "Maximum number of replicas to allow scaling for"
+}
+
+variable "replica_scale_min" {
+  type        = "string"
+  default     = "2"
+  description = "Maximum number of replicas to allow scaling for"
+}
+
+variable "replica_scale_cpu" {
+  type        = "string"
+  default     = "70"
+  description = "CPU usage to trigger autoscaling at"
+}
+
+variable "replica_scale_in_cooldown" {
+  type        = "string"
+  default     = "300"
+  description = "Cooldown in seconds before allowing further scaling operations after a scale in"
+}
+
+variable "replica_scale_out_cooldown" {
+  type        = "string"
+  default     = "300"
+  description = "Cooldown in seconds before allowing further scaling operations after a scale out"
+}