Skip to content

Commit

Permalink
Add README for all submodules (#2)
Browse files Browse the repository at this point in the history
* Add READMEs for modules

* Add READMEs for modules

* Add READMEs for modules

* Add READMEs for modules

* Add READMEs for modules

* Add READMEs for modules
  • Loading branch information
aknysh authored Aug 14, 2022
1 parent 300c3ac commit 7c33d05
Show file tree
Hide file tree
Showing 9 changed files with 544 additions and 0 deletions.
37 changes: 37 additions & 0 deletions modules/glue-catalog-database/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# glue-catalog-database

Terraform module to provision AWS Glue Catalog Databases.

## Usage

```hcl
module "s3_bucket_source" {
source = "cloudposse/s3-bucket/aws"
version = "2.0.3"
acl = "private"
versioning_enabled = false
force_destroy = true
allow_encrypted_uploads_only = true
allow_ssl_requests_only = true
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
attributes = ["source"]
context = module.this.context
}
module "glue_catalog_database" {
source = "cloudposse/glue/aws//modules/glue-catalog-database"
# Cloud Posse recommends pinning every module to a specific version
# version = "x.x.x"
catalog_database_name = "analytics"
catalog_database_description = "Glue Catalog database using data located in an S3 bucket"
location_uri = format("s3://%s", module.s3_bucket_source.bucket_id)
context = module.this.context
}
```
95 changes: 95 additions & 0 deletions modules/glue-catalog-table/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# glue-catalog-table

Terraform module to provision AWS Glue Catalog Tables.

## Usage

```hcl
module "s3_bucket_source" {
source = "cloudposse/s3-bucket/aws"
version = "2.0.3"
acl = "private"
versioning_enabled = false
force_destroy = true
allow_encrypted_uploads_only = true
allow_ssl_requests_only = true
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
attributes = ["source"]
context = module.this.context
}
module "glue_catalog_database" {
source = "cloudposse/glue/aws//modules/glue-catalog-database"
# Cloud Posse recommends pinning every module to a specific version
# version = "x.x.x"
catalog_database_name = "analytics"
catalog_database_description = "Glue Catalog database using data located in an S3 bucket"
location_uri = format("s3://%s", module.s3_bucket_source.bucket_id)
context = module.this.context
}
module "glue_catalog_table" {
source = "cloudposse/glue/aws//modules/glue-catalog-table"
# Cloud Posse recommends pinning every module to a specific version
# version = "x.x.x"
catalog_table_name = "geo"
catalog_table_description = "region/state/county Glue Catalog table"
database_name = module.glue_catalog_database.name
parameters = {
"lakeformation.aso.status" = true
"classification" = "parquet"
}
storage_descriptor = {
# List of reducer grouping columns, clustering columns, and bucketing columns in the table
bucket_columns = null
# Configuration block for columns in the table
columns = [
{
name = "county",
type = "string"
},
{
name = "state",
type = "string"
},
{
name = "region",
type = "string"
}
]
# Whether the data in the table is compressed
compressed = false
# Input format: SequenceFileInputFormat (binary), or TextInputFormat, or a custom format
input_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
# Physical location of the table. By default this takes the form of the warehouse location, followed by the database location in the warehouse, followed by the table name
location = format("s3://%s/geo", module.s3_bucket_source.bucket_id)
# Must be specified if the table contains any dimension columns
number_of_buckets = 0
# Output format: SequenceFileOutputFormat (binary), or IgnoreKeyTextOutputFormat, or a custom format
output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
# Configuration block for serialization and deserialization ("SerDe") information
ser_de_info = {
# Map of initialization parameters for the SerDe, in key-value form
parameters = {
"serialization.format" = "1"
}
# Usually the class that implements the SerDe. An example is org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
}
# Whether the table data is stored in subdirectories
stored_as_sub_directories = false
}
context = module.this.context
}
```
63 changes: 63 additions & 0 deletions modules/glue-connection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# glue-workflow

Terraform module to provision AWS Glue Connections.

## Usage

```hcl
module "vpc" {
source = "cloudposse/vpc/aws"
version = "1.1.0"
ipv4_primary_cidr_block = "172.19.0.0/16"
context = module.this.context
}
data "aws_subnet" "selected" {
id = module.vpc.private_subnet_ids[0]
}
module "security_group" {
source = "cloudposse/security-group/aws"
version = "1.0.1"
vpc_id = module.vpc.vpc_id
create_before_destroy = true
allow_all_egress = true
rules = [
{
type = "ingress"
from_port = 5432
to_port = 5432
protocol = "all"
cidr_blocks = [module.vpc.vpc_cidr_block]
}
]
context = module.this.context
}
module "glue_connection" {
source = "cloudposse/glue/aws//modules/glue-connection"
# Cloud Posse recommends pinning every module to a specific version
# version = "x.x.x"
connection_name = "geo"
connection_description = "Glue connection to Postgres database"
connection_type = "JDBC"
connection_properties = {}
physical_connection_requirements = {
# List of security group IDs used by the connection
security_group_id_list = [module.security_group.id]
# The availability zone of the connection. This field is redundant and implied by subnet_id, but is currently an API requirement
availability_zone = data.aws_subnet.selected.availability_zone
# The subnet ID used by the connection
subnet_id = module.vpc.private_subnet_ids[0]
}
context = module.this.context
}
```
112 changes: 112 additions & 0 deletions modules/glue-crawler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# glue-crawler

Terraform module to provision AWS Glue Crawlers.

## Usage

```hcl
module "s3_bucket_source" {
source = "cloudposse/s3-bucket/aws"
version = "2.0.3"
acl = "private"
versioning_enabled = false
force_destroy = true
allow_encrypted_uploads_only = true
allow_ssl_requests_only = true
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
attributes = ["source"]
context = module.this.context
}
module "iam_role" {
source = "cloudposse/iam-role/aws"
version = "0.16.2"
principals = {
"Service" = ["glue.amazonaws.com"]
}
managed_policy_arns = [
"arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole"
]
policy_document_count = 0
policy_description = "Policy for AWS Glue with access to EC2, S3, and Cloudwatch Logs"
role_description = "Role for AWS Glue with access to EC2, S3, and Cloudwatch Logs"
context = module.this.context
}
module "s3_bucket_destination" {
source = "cloudposse/s3-bucket/aws"
version = "2.0.3"
acl = "private"
versioning_enabled = false
force_destroy = true
allow_encrypted_uploads_only = true
allow_ssl_requests_only = true
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
attributes = ["destination"]
context = module.this.context
}
module "glue_catalog_database" {
source = "cloudposse/glue/aws//modules/glue-catalog-database"
# Cloud Posse recommends pinning every module to a specific version
# version = "x.x.x"
catalog_database_name = "analytics"
catalog_database_description = "Glue Catalog database using data located in an S3 bucket"
location_uri = format("s3://%s", module.s3_bucket_source.bucket_id)
context = module.this.context
}
module "glue_crawler" {
source = "cloudposse/glue/aws//modules/glue-crawler"
# Cloud Posse recommends pinning every module to a specific version
# version = "x.x.x"
crawler_description = "Glue crawler that processes data in the source S3 bucket and writes the result into the destination S3 bucket"
database_name = module.glue_catalog_database.name
role = module.iam_role.arn
schedule = "cron(0 1 * * ? *)"
schema_change_policy = {
delete_behavior = "LOG"
update_behavior = null
}
s3_target = [
{
path = format("s3://%s", module.s3_bucket_destination.bucket_id)
}
]
configuration = jsonencode(
{
Grouping = {
TableGroupingPolicy = "CombineCompatibleSchemas"
}
CrawlerOutput = {
Partitions = {
AddOrUpdateBehavior = "InheritFromTable"
}
}
Version = 1
}
)
context = module.this.context
}
```
70 changes: 70 additions & 0 deletions modules/glue-job/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# glue-job

Terraform module to provision AWS Glue Jobs.

## Usage

```hcl
module "iam_role" {
source = "cloudposse/iam-role/aws"
version = "0.16.2"
principals = {
"Service" = ["glue.amazonaws.com"]
}
managed_policy_arns = [
"arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole"
]
policy_document_count = 0
policy_description = "Policy for AWS Glue with access to EC2, S3, and Cloudwatch Logs"
role_description = "Role for AWS Glue with access to EC2, S3, and Cloudwatch Logs"
context = module.this.context
}
module "s3_bucket_job_source" {
source = "cloudposse/s3-bucket/aws"
version = "2.0.3"
acl = "private"
versioning_enabled = false
force_destroy = true
allow_encrypted_uploads_only = true
allow_ssl_requests_only = true
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
context = module.this.context
}
module "glue_job" {
source = "cloudposse/glue/aws//modules/glue-job"
# Cloud Posse recommends pinning every module to a specific version
# version = "x.x.x"
job_name = "geo_processor"
job_description = "Glue Job for processing geo data"
role_arn = module.iam_role.arn
glue_version = "2.0"
default_arguments = {}
worker_type = "Standard"
number_of_workers = 2
max_retries = 2
# The job timeout in minutes
timeout = 20
command = {
name = "Run Python script"
script_location = format("s3://%s/geo.py", module.s3_bucket_job_source.bucket_id)
python_version = 3
}
context = module.this.context
}
```
16 changes: 16 additions & 0 deletions modules/glue-registry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# glue-registry

Terraform module to provision AWS Glue Registries.

## Usage

```hcl
module "glue_registry" {
source = "cloudposse/glue/aws//modules/glue-registry"
# Cloud Posse recommends pinning every module to a specific version
# version = "x.x.x"
registry_name = "analytics"
registry_description = "Glue Registry for analytics"
}
```
Loading

0 comments on commit 7c33d05

Please sign in to comment.