diff --git a/tf/modules/glue/main.tf b/tf/modules/glue/main.tf index 82e7129..b3e2f6e 100644 --- a/tf/modules/glue/main.tf +++ b/tf/modules/glue/main.tf @@ -37,14 +37,14 @@ EOF inline_policy { name = "policy-8675309" - policy = templatefile("${path.module}/crawler_policy.json.tpl", { data_bucket = var.data_bucket }) + policy = templatefile("${path.module}/policies/crawler_policy.json.tpl", { data_bucket = var.data_bucket }) } } resource "aws_iam_policy" "glue_policy" { name = "${var.app_prefix}-glue" description = "Glue Access Policy" - policy = templatefile("${path.module}/policy.json.tpl", { app_prefix = var.app_prefix, role_arn = aws_iam_role.glue_role.arn }) + policy = templatefile("${path.module}/policies/policy.json.tpl", { app_prefix = var.app_prefix, role_arn = aws_iam_role.glue_role.arn }) } resource "aws_iam_role_policy_attachment" "glue_attach" { @@ -63,3 +63,17 @@ resource "aws_glue_catalog_database" "stage_database" { } } } + +resource "aws_s3_object" "etl_script" { + bucket = var.data_bucket + key = "scripts/etl_job.py" + source = "${path.module}/scripts/etl_job.py" + etag = filemd5("${path.module}/scripts/etl_job.py") +} + +resource "aws_s3_object" "shell_script" { + bucket = var.data_bucket + key = "scripts/shell_job.py" + source = "${path.module}/scripts/shell_job.py" + etag = filemd5("${path.module}/scripts/shell_job.py") +} diff --git a/tf/modules/glue/outputs.tf b/tf/modules/glue/outputs.tf index e0bab51..3c5d9fe 100644 --- a/tf/modules/glue/outputs.tf +++ b/tf/modules/glue/outputs.tf @@ -9,3 +9,11 @@ output "glue_crawler_role" { output "stage_database" { value = aws_glue_catalog_database.stage_database } + +output "etl_script_url" { + value = "s3://${var.data_bucket}/${aws_s3_object.etl_script.id}" +} + +output "shell_script_url" { + value = "s3://${var.data_bucket}/${aws_s3_object.shell_script.id}" +} diff --git a/tf/modules/glue/crawler_policy.json.tpl b/tf/modules/glue/policies/crawler_policy.json.tpl similarity index 100% rename from tf/modules/glue/crawler_policy.json.tpl rename to tf/modules/glue/policies/crawler_policy.json.tpl diff --git a/tf/modules/glue/policy.json.tpl b/tf/modules/glue/policies/policy.json.tpl similarity index 100% rename from tf/modules/glue/policy.json.tpl rename to tf/modules/glue/policies/policy.json.tpl diff --git a/glue_scripts/etl_job.py b/tf/modules/glue/scripts/etl_job.py similarity index 100% rename from glue_scripts/etl_job.py rename to tf/modules/glue/scripts/etl_job.py diff --git a/glue_scripts/shell_job.py b/tf/modules/glue/scripts/shell_job.py similarity index 100% rename from glue_scripts/shell_job.py rename to tf/modules/glue/scripts/shell_job.py diff --git a/tf/modules/pipelines/austin_traffic/main.tf b/tf/modules/pipelines/austin_traffic/main.tf index 4337081..e5a6eaf 100644 --- a/tf/modules/pipelines/austin_traffic/main.tf +++ b/tf/modules/pipelines/austin_traffic/main.tf @@ -4,7 +4,7 @@ resource "aws_glue_job" "import_raw_traffic_json" { command { name = "pythonshell" - script_location = "s3://${var.data_bucket}/scripts/shell_job.py" + script_location = var.shell_script_url python_version = "3.9" } @@ -32,7 +32,7 @@ resource "aws_glue_job" "transform_traffic_raw_to_stage" { command { name = "glueetl" - script_location = "s3://${var.data_bucket}/scripts/etl_job.py" + script_location = var.etl_script_url python_version = "3" } diff --git a/tf/modules/pipelines/austin_traffic/vars.tf b/tf/modules/pipelines/austin_traffic/vars.tf index 77af6dc..d9d235b 100644 --- a/tf/modules/pipelines/austin_traffic/vars.tf +++ b/tf/modules/pipelines/austin_traffic/vars.tf @@ -6,15 +6,23 @@ variable "data_bucket" { type = string } +variable "etl_script_url" { + type = string +} + +variable "shell_script_url" { + type = string +} + variable "glue_role" { -type = object({ + type = object({ arn = string name = string }) } variable "glue_crawler_role" { -type = object({ + type = object({ arn = string name = string }) diff --git a/tf/modules/pipelines/main.tf b/tf/modules/pipelines/main.tf index 761ceee..b33dab8 100644 --- a/tf/modules/pipelines/main.tf +++ b/tf/modules/pipelines/main.tf @@ -16,6 +16,8 @@ module "austin_traffic_pipeline" { app_prefix = var.app_prefix data_bucket = module.buckets.data_bucket + etl_script_url = module.glue.etl_script_url + shell_script_url = module.glue.shell_script_url glue_role = module.glue.glue_role glue_crawler_role = module.glue.glue_crawler_role stage_database = module.glue.stage_database