diff --git a/.github/workflows/lambda_zipper.yml b/.github/workflows/lambda_zipper.yml index d7a7913..2d0acfc 100644 --- a/.github/workflows/lambda_zipper.yml +++ b/.github/workflows/lambda_zipper.yml @@ -31,7 +31,7 @@ jobs: cd ./herding_cats_pipelines/lambda_jobs pip install -r requirements.txt -t ./package cp *.py ./package/ - cd ./package && zip -r ../lambda_jobs.zip . + cd ./package && zip -r ../lambda_herding_cats_jobs.zip . - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4.0.2 with: @@ -41,4 +41,4 @@ jobs: - name: Upload to S3 run: | cd ./herding_cats_pipelines/lambda_jobs - aws s3 cp lambda_jobs.zip s3://${{ secrets.S3_BUCKET_NAME }}/lambda_jobs.zip \ No newline at end of file + aws s3 cp lambda_herding_cats_jobs.zip s3://${{ secrets.S3_BUCKET_NAME }}/lambda_herding_cats_jobs.zip \ No newline at end of file diff --git a/.gitignore b/.gitignore index a9653c2..6375ca4 100644 --- a/.gitignore +++ b/.gitignore @@ -171,9 +171,9 @@ creds.py /dbt/target/ /dbt/dbt_packages/ dev.duckdb -variables.tf terraform.tfvars .terraform.lock.hcl +.terraform LICENSE.txt terraform-provider-aws_v5.50.0_x5 terraform/.terraform/providers/registry.terraform.io/hashicorp/aws/5.50.0/darwin_arm64 diff --git a/herding_cats_pipelines/glue_jobs/main.py b/herding_cats_pipelines/etl_jobs/main.py similarity index 100% rename from herding_cats_pipelines/glue_jobs/main.py rename to herding_cats_pipelines/etl_jobs/main.py diff --git a/herding_cats_pipelines/lambda_jobs/london_datastore.py b/herding_cats_pipelines/lambda_jobs/main.py similarity index 92% rename from herding_cats_pipelines/lambda_jobs/london_datastore.py rename to herding_cats_pipelines/lambda_jobs/main.py index 805d93d..2b67c6f 100644 --- a/herding_cats_pipelines/lambda_jobs/london_datastore.py +++ b/herding_cats_pipelines/lambda_jobs/main.py @@ -1,5 +1,7 @@ import json import boto3 +import botocore +import botocore.exceptions import requests from loguru import logger @@ -43,7 +45,7 @@ def lambda_handler(event, context) -> json: try: secret_name = get_param("herding_cats_param") secret = get_secret(secret_name) - bucket_name = secret[secret_name] + bucket_name = secret["herding_cats_raw_data_bucket"] url = "https://data.london.gov.uk/api/action/package_search" response = requests.get(url) @@ -62,7 +64,7 @@ def lambda_handler(event, context) -> json: Body=json.dumps(data), ContentType='application/json' ) - print(f"Data Successfully Dumped to S3://{bucket_name}/{file_name}") + logger.success(f"Data Successfully Dumped") return { 'statusCode': 200, @@ -74,7 +76,7 @@ def lambda_handler(event, context) -> json: 'statusCode': 500, 'body': json.dumps({'error': f'Data fetch error: {str(e)}'}) } - except boto3.exceptions.BotoError as e: + except botocore.exceptions.ClientError as e: print(f"An error occurred while dumping to S3: {str(e)}") return { 'statusCode': 500, diff --git a/herding_cats_pipelines/terraform/lambda-module/main.tf b/herding_cats_pipelines/terraform/lambda-module/main.tf new file mode 100644 index 0000000..b993328 --- /dev/null +++ b/herding_cats_pipelines/terraform/lambda-module/main.tf @@ -0,0 +1,100 @@ +provider "aws" { + region = var.aws_region +} + +# LAMBDA FUNCTION +resource "aws_lambda_function" "herding-cats" { + function_name = var.function_name + role = aws_iam_role.lambda_role.arn + handler = "main.lambda_handler" + memory_size = 2048 + reserved_concurrent_executions = 1 + timeout = 15 + runtime = "python3.11" + s3_bucket = var.code_bucket_name + s3_key = "lambda_herding_cats_jobs.zip" + source_code_hash = data.aws_s3_object.lambda_code.etag +} + +# IAM role for Lambda +resource "aws_iam_role" "lambda_role" { + name = "${var.function_name}-role" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { + Service = "lambda.amazonaws.com" + } + }] + }) +} + +# Lambda basic execution policy +resource "aws_iam_role_policy_attachment" "lambda_basic_execution" { + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + role = aws_iam_role.lambda_role.name +} + +# S3 read policy for code bucket +resource "aws_iam_policy" "s3_code_access_policy" { + name = "${var.function_name}-s3-code-access-policy" + path = "/" + description = "IAM policy for S3 read access to code bucket from Lambda" + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "s3:GetObject", + "s3:ListBucket" + ] + Resource = [ + "arn:aws:s3:::${var.code_bucket_name}", + "arn:aws:s3:::${var.code_bucket_name}/*" + ] + }] + }) +} + +# S3 read/write policy for the data bucket +resource "aws_iam_policy" "s3_data_access_policy" { + name = "${var.function_name}-s3-data-access-policy" + path = "/" + description = "IAM policy for S3 read and write access to the data bucket from Lambda" + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListBucket" + ] + Resource = [ + "arn:aws:s3:::${var.data_bucket_name}", + "arn:aws:s3:::${var.data_bucket_name}/*" + ] + }] + }) +} + +# Attach S3 code access policy to Lambda role +resource "aws_iam_role_policy_attachment" "lambda_s3_code_access" { + policy_arn = aws_iam_policy.s3_code_access_policy.arn + role = aws_iam_role.lambda_role.name +} + +# Attach S3 data access policy to Lambda role +resource "aws_iam_role_policy_attachment" "lambda_s3_data_access" { + policy_arn = aws_iam_policy.s3_data_access_policy.arn + role = aws_iam_role.lambda_role.name +} + + +data "aws_s3_object" "lambda_code" { + bucket = var.code_bucket_name + key = "lambda_herding_cats_jobs.zip" +} \ No newline at end of file diff --git a/herding_cats_pipelines/terraform/lambda-module/variables.tf b/herding_cats_pipelines/terraform/lambda-module/variables.tf new file mode 100644 index 0000000..f030391 --- /dev/null +++ b/herding_cats_pipelines/terraform/lambda-module/variables.tf @@ -0,0 +1,19 @@ +variable "aws_region" { + description = "The AWS region to deploy to" + type = string +} + +variable "function_name" { + description = "The name of the Lambda function" + type = string +} + +variable "code_bucket_name" { + description = "Name of the existing S3 bucket containing Lambda function code" + type = string +} + +variable "data_bucket_name" { + description = "Name of the S3 bucket for Lambda function data operations" + type = string +} \ No newline at end of file diff --git a/makefile b/makefile index 6b36f1b..02f6cb8 100644 --- a/makefile +++ b/makefile @@ -11,7 +11,7 @@ git-add: git-commit: @read -p "Please enter an additional commit message: " msg; \ - git commit -m "updates $(DATE) - $$msg" + git commit -m "Updates $(DATE) - $$msg" git-push: git push \ No newline at end of file