Skip to content

Commit

Permalink
feat: [PAYMCLOUD-98] terraform monitoring function refactor with new …
Browse files Browse the repository at this point in the history
…container app job resource (#359)

* Refactor monitoring job to use azurerm_container_app_job

Reworked the monitoring job resource from azapi_resource to azurerm_container_app_job for improved maintainability and compatibility.

* Standardize secret names to lowercase

Updated the secret names in the monitoring function configuration to consistent lowercase format.

* Refactor secret names to use hyphenated format

Updated the secret names in `01_main.tf` to use hyphens instead of underscores.

* Add alert auto-mitigation variable

Introduce a new variable `alert_set_auto_mitigate` to control if metric alerts should auto-resolve, defaulting to true. Updated the README to document this new optional input.

* Add azapi support and conditionally manage app job resources

Added azapi provider and introduced a "legacy" variable to toggle between legacy and new resource provisioning methods. Modified container app job resource definitions to either use azapi or azurerm based on the "legacy" setting, ensuring a smoother transition and rollback capability.

* Add Terraform configuration for monitoring function test environment

* Remove .terraform-version file

This file was specifying the Terraform version as 1.8.5. It is no longer needed and can be managed through other version control mechanisms.

* Fix sequential resource creation-destruction comment

* Format README files with consistent Markdown

Replaced spaces between pre tags with slashes to ensure consistent formatting across all README files. This change does not alter the functionality but improves readability and uniformity in documentation.

* Update Markdown formatting in README files for consistency

Converted newline characters from escaped newlines to plain newlines in code blocks within multiple README.md files. This change enhances readability and maintains a consistent formatting style across the documentation.

* Add auto_mitigate option to alert configuration

Added the "auto_mitigate" field to the alert configuration in the README.md and 01_main.tf files. This new optional parameter allows automatic resolution of alerts and defaults to true. Adjusted documentation and local variables to accommodate this change.

* Update README formatting for input parameter tables

Fixed markdown formatting issues for several input parameter tables in the README. This ensures better readability and consistent styling throughout the documentation.
  • Loading branch information
ffppa authored Oct 15, 2024
1 parent e557087 commit 71d85c8
Show file tree
Hide file tree
Showing 14 changed files with 522 additions and 17 deletions.
108 changes: 95 additions & 13 deletions monitoring_function/01_main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ resource "azurerm_storage_table_entity" "monitoring_configuration" {
"tags" = lookup(local.decoded_configuration[count.index], "tags", null) != null ? jsonencode(local.decoded_configuration[count.index].tags) : null
"bodyCompareStrategy" = lookup(local.decoded_configuration[count.index], "bodyCompareStrategy", null) != null ? local.decoded_configuration[count.index].bodyCompareStrategy : null
"expectedBody" = lookup(local.decoded_configuration[count.index], "expectedBody", null) != null ? jsonencode(local.decoded_configuration[count.index].expectedBody) : null

}
}

Expand All @@ -81,8 +80,10 @@ resource "azurerm_private_endpoint" "synthetic_monitoring_storage_private_endpoi
subnet_id = var.private_endpoint_subnet_id

private_dns_zone_group {
name = "${var.prefix}-synthetic-monitoring-private-dns-zone-group"
private_dns_zone_ids = [var.storage_account_settings.table_private_dns_zone_id]
name = "${var.prefix}-synthetic-monitoring-private-dns-zone-group"
private_dns_zone_ids = [
var.storage_account_settings.table_private_dns_zone_id
]
}

private_service_connection {
Expand All @@ -95,8 +96,9 @@ resource "azurerm_private_endpoint" "synthetic_monitoring_storage_private_endpoi
tags = var.tags
}


resource "azapi_resource" "monitoring_app_job" {
count = var.legacy == true ? 1 : 0

type = "Microsoft.App/jobs@2022-11-01-preview"
name = "${var.prefix}-monitoring-app-job"
location = var.location
Expand Down Expand Up @@ -178,14 +180,93 @@ resource "azapi_resource" "monitoring_app_job" {
})
}

resource "azurerm_container_app_job" "monitoring_terraform_app_job" {
count = var.legacy == false ? 1 : 0

name = "${var.prefix}-monitoring-app-job"
resource_group_name = var.resource_group_name
location = var.location
container_app_environment_id = var.job_settings.container_app_environment_id

identity {
type = "SystemAssigned"
}

schedule_trigger_config {
cron_expression = var.job_settings.cron_scheduling
parallelism = 1
replica_completion_count = 1
}


template {
container {
cpu = var.job_settings.cpu_requirement
memory = var.job_settings.memory_requirement
name = "synthetic-monitoring"
image = "${var.docker_settings.registry_url}/${var.docker_settings.image_name}:${var.docker_settings.image_tag}"

env {
name = "APP_INSIGHT_CONNECTION_STRING"
value = data.azurerm_application_insights.app_insight.connection_string
}
env {
name = "STORAGE_ACCOUNT_NAME"
value = module.synthetic_monitoring_storage_account.name
}
env {
name = "STORAGE_ACCOUNT_KEY"
value = module.synthetic_monitoring_storage_account.primary_access_key
}
env {
name = "STORAGE_ACCOUNT_TABLE_NAME"
value = azurerm_storage_table.table_storage.name
}
env {
name = "AVAILABILITY_PREFIX"
value = var.job_settings.availability_prefix
}
env {
name = "HTTP_CLIENT_TIMEOUT"
value = tostring(var.job_settings.http_client_timeout)
}
env {
name = "LOCATION"
value = var.location
}
env {
name = "CERT_VALIDITY_RANGE_DAYS"
value = tostring(var.job_settings.cert_validity_range_days)
}
}
}

replica_retry_limit = 1
replica_timeout_in_seconds = var.job_settings.execution_timeout_seconds

tags = var.tags

# Prevents non-sequential destruction of the legacy resource azapi_resource.monitoring_app_job.
# This configuration forces resources to be destroyed and created sequentially by
# avoiding the duplicate resource error and enabling a switch to a new or old version
# (in case rollback is needed).
lifecycle {
precondition {
condition = length(azapi_resource.monitoring_app_job) == 0
error_message = "Warning: You cannot create the new resource. Perform legacy import before proceeding with changes."
}
}
}

locals {
default_alert_configuration = {
enabled = true,
severity = 0,
frequency = "PT1M"
threshold = 100
operator = "LessThan"
aggregation = "Average"
enabled = true,
severity = 0,
frequency = "PT1M"
auto_mitigate = var.alert_set_auto_mitigate
threshold = 100
operator = "LessThan"
aggregation = "Average"
}

default_custom_action_groups = []
Expand All @@ -201,7 +282,7 @@ resource "azurerm_monitor_metric_alert" "alert" {
description = "Monitors the availability of ${local.decoded_configuration[count.index].appName} ${local.decoded_configuration[count.index].apiName} from ${local.decoded_configuration[count.index].type}"
severity = lookup(lookup(local.decoded_configuration[count.index], "alertConfiguration", local.default_alert_configuration), "severity", local.default_alert_configuration.severity)
frequency = lookup(lookup(local.decoded_configuration[count.index], "alertConfiguration", local.default_alert_configuration), "frequency", local.default_alert_configuration.frequency)
auto_mitigate = true
auto_mitigate = lookup(lookup(local.decoded_configuration[count.index], "alertConfiguration", local.default_alert_configuration), "auto_mitigate", local.default_alert_configuration.auto_mitigate)
enabled = lookup(lookup(local.decoded_configuration[count.index], "alertConfiguration", local.default_alert_configuration), "enabled", local.default_alert_configuration.enabled)

criteria {
Expand All @@ -213,7 +294,9 @@ resource "azurerm_monitor_metric_alert" "alert" {
dimension {
name = "availabilityResult/name"
operator = "Include"
values = ["${var.job_settings.availability_prefix}-${local.decoded_configuration[count.index].appName}-${local.decoded_configuration[count.index].apiName}"]
values = [
"${var.job_settings.availability_prefix}-${local.decoded_configuration[count.index].appName}-${local.decoded_configuration[count.index].apiName}"
]
}
dimension {
name = "availabilityResult/location"
Expand All @@ -234,7 +317,6 @@ resource "azurerm_monitor_metric_alert" "alert" {
}



resource "azurerm_monitor_metric_alert" "self_alert" {
name = "availability-synthetic-monitoring-function"
resource_group_name = var.resource_group_name
Expand Down
12 changes: 12 additions & 0 deletions monitoring_function/99_variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ variable "location" {
description = "(Required) Resource location"
}

variable "legacy" {
type = bool
default = true
description = "(Optional) Enable new terraform resource features for container app job."
}

variable "storage_account_settings" {
type = object({
tier = optional(string, "Standard") #(Optional) Tier used for the backup storage account
Expand Down Expand Up @@ -127,3 +133,9 @@ variable "self_alert_configuration" {
}
}

variable "alert_set_auto_mitigate" {
type = bool
default = true
description = "(Optional) Should the alerts in this Metric Alert be auto resolved? Defaults to true."
}

8 changes: 4 additions & 4 deletions monitoring_function/99_versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ terraform {
source = "hashicorp/azurerm"
version = "~>3.30"
}
null = {
source = "hashicorp/null"
version = "~> 3.2"
}
azapi = {
source = "azure/azapi"
version = "~> 1.11.0"
}
null = {
source = "hashicorp/null"
version = "~> 3.2"
}
}
}
4 changes: 4 additions & 0 deletions monitoring_function/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ That's an example of the properties that can be specified, containing the defaul
"enabled" : true, # (Optional) enables the alert
"severity" : 0, # (Optional) The severity of this Metric Alert. Possible values are 0, 1, 2, 3 and 4
"frequency" : "PT1M", # (Optional) The evaluation frequency of this Metric Alert, represented in ISO 8601 duration format. Possible values are PT1M, PT5M, PT15M, PT30M and PT1H
"auto_mitigate" : true, # (Optional) Should the alerts in this Metric Alert be auto resolved? Defaults to true
"threshold" : 100, # (Optional) The criteria threshold value that activates the alert
"operator" : "LessThan" # (Optional) The criteria operator. Possible values are Equals, GreaterThan, GreaterThanOrEqual, LessThan and LessThanOrEqual
"aggregation": "Average" # (Required) The statistic that runs over the metric values. Possible values are Average, Count, Minimum, Maximum and Total.
Expand Down Expand Up @@ -183,6 +184,7 @@ module "monitoring_function" {
| Name | Type |
|------|------|
| [azapi_resource.monitoring_app_job](https://registry.terraform.io/providers/azure/azapi/latest/docs/resources/resource) | resource |
| [azurerm_container_app_job.monitoring_terraform_app_job](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/container_app_job) | resource |
| [azurerm_monitor_metric_alert.alert](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_metric_alert) | resource |
| [azurerm_monitor_metric_alert.self_alert](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_metric_alert) | resource |
| [azurerm_private_endpoint.synthetic_monitoring_storage_private_endpoint](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_endpoint) | resource |
Expand All @@ -195,11 +197,13 @@ module "monitoring_function" {

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_alert_set_auto_mitigate"></a> [alert\_set\_auto\_mitigate](#input\_alert\_set\_auto\_mitigate) | (Optional) Should the alerts in this Metric Alert be auto resolved? Defaults to true. | `bool` | `true` | no |
| <a name="input_application_insight_name"></a> [application\_insight\_name](#input\_application\_insight\_name) | (Required) name of the application insight instance where to publish metrics | `string` | n/a | yes |
| <a name="input_application_insight_rg_name"></a> [application\_insight\_rg\_name](#input\_application\_insight\_rg\_name) | (Required) name of the application insight instance resource group where to publish metrics | `string` | n/a | yes |
| <a name="input_application_insights_action_group_ids"></a> [application\_insights\_action\_group\_ids](#input\_application\_insights\_action\_group\_ids) | (Required) Application insights action group ids | `list(string)` | n/a | yes |
| <a name="input_docker_settings"></a> [docker\_settings](#input\_docker\_settings) | n/a | <pre>object({<br> registry_url = optional(string, "ghcr.io") #(Optional) Docker container registry url where to find the monitoring image<br> image_tag = string #(Optional) Docker image tag<br> image_name = optional(string, "pagopa/azure-synthetic-monitoring") #(Optional) Docker image name<br> })</pre> | <pre>{<br> "image_name": "pagopa/azure-synthetic-monitoring",<br> "image_tag": "1.0.0",<br> "registry_url": "ghcr.io"<br>}</pre> | no |
| <a name="input_job_settings"></a> [job\_settings](#input\_job\_settings) | n/a | <pre>object({<br> execution_timeout_seconds = optional(number, 300) #(Optional) Job execution timeout, in seconds<br> cron_scheduling = optional(string, "* * * * *") #(Optional) Cron expression defining the execution scheduling of the monitoring function<br> cpu_requirement = optional(number, 0.25) #(Optional) Decimal; cpu requirement<br> memory_requirement = optional(string, "0.5Gi") #(Optional) Memory requirement<br> http_client_timeout = optional(number, 30000) #(Optional) Default http client response timeout, in milliseconds<br> default_duration_limit = optional(number, 10000) #(Optional) Duration limit applied if none is given in the monitoring configuration. in milliseconds<br> availability_prefix = optional(string, "synthetic") #(Optional) Prefix used for prefixing availability test names<br> container_app_environment_id = string #(Required) If defined, the id of the container app environment tu be used to run the monitoring job. If provided, skips the creation of a dedicated subnet<br> cert_validity_range_days = optional(number, 7) #(Optional) Number of days before the expiration date of a certificate over which the check is considered success<br> })</pre> | <pre>{<br> "availability_prefix": "synthetic",<br> "cert_validity_range_days": 7,<br> "container_app_environment_id": null,<br> "cpu_requirement": 0.25,<br> "cron_scheduling": "* * * * *",<br> "default_duration_limit": 10000,<br> "execution_timeout_seconds": 300,<br> "http_client_timeout": 30000,<br> "memory_requirement": "0.5Gi"<br>}</pre> | no |
| <a name="input_legacy"></a> [legacy](#input\_legacy) | (Optional) Enable new terraform resource features for container app job. | `bool` | `true` | no |
| <a name="input_location"></a> [location](#input\_location) | (Required) Resource location | `string` | n/a | yes |
| <a name="input_monitoring_configuration_encoded"></a> [monitoring\_configuration\_encoded](#input\_monitoring\_configuration\_encoded) | (Required) monitoring configuration provided in JSON string format (use jsonencode) | `string` | n/a | yes |
| <a name="input_prefix"></a> [prefix](#input\_prefix) | (Required) Prefix used in the Velero dedicated resource names | `string` | n/a | yes |
Expand Down
28 changes: 28 additions & 0 deletions monitoring_function/tests/analytics_workspace.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
resource "azurerm_resource_group" "synthetic_rg" {
location = var.location
name = "${local.project}-rg"
}


resource "azurerm_log_analytics_workspace" "log_analytics_workspace" {
name = "${local.project}-law"
location = azurerm_resource_group.synthetic_rg.location
resource_group_name = azurerm_resource_group.synthetic_rg.name
sku = var.law_sku
retention_in_days = var.law_retention_in_days
daily_quota_gb = var.law_daily_quota_gb

tags = var.tags
}

# Application insights
resource "azurerm_application_insights" "application_insights" {
name = "${local.project}-appinsights"
location = azurerm_resource_group.synthetic_rg.location
resource_group_name = azurerm_resource_group.synthetic_rg.name
application_type = "other"

workspace_id = azurerm_log_analytics_workspace.log_analytics_workspace.id

tags = var.tags
}
1 change: 1 addition & 0 deletions monitoring_function/tests/backend.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
subscription=DevOpsLab
32 changes: 32 additions & 0 deletions monitoring_function/tests/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

resource "random_id" "unique" {
byte_length = 3
}

locals {
project = "${var.prefix}${substr(random_id.unique.hex, 0, 1)}"
rg_name = "${local.project}-${substr(random_id.unique.hex, 0, 1)}-rg"
key_vault_name = "${local.project}-kv"
vnet_name = "${local.project}-vnet"
subnet_name = "${local.project}-subnet"
law_name = "${local.project}-runner-law"
environment_name = "${local.project}-runner-cae"
}

locals {
product = "${var.prefix}-${var.env_short}"
domain = "synthetic"


monitor_appinsights_name = "${local.product}-appinsights"
monitor_action_group_slack_name = "SlackPagoPA"
monitor_action_group_email_name = "PagoPA"
monitor_resource_group_name = "${local.product}-monitor-rg"

vnet_core_resource_group_name = "${local.product}-vnet-rg"
vnet_core_name = "${local.product}-vnet"
log_analytics_workspace_name = "${local.product}-law"
log_analytics_workspace_resource_group_name = "${local.product}-monitor-rg"


}
18 changes: 18 additions & 0 deletions monitoring_function/tests/monitoring_configuration.json.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"apiName" : "root",
"appName" : "pagopa",
"url" : "https://${api_dot_env_name}.platform.pagopa.it/",
"type" : "public",
"checkCertificate" : true,
"method" : "GET",
"expectedCodes" : ["200"],
"tags" : {
"description" : "pagopa ${env_name} context root"
},
"durationLimit" : 10000,
"alertConfiguration" : {
"enabled" : false
}
}
]
18 changes: 18 additions & 0 deletions monitoring_function/tests/output.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
output "random_id" {
value = random_id.unique.hex
}

output "subnet_name" {
value = azurerm_subnet.subnet.name
description = "Subnet name"
}

output "subnet_cidr" {
value = azurerm_subnet.subnet.address_prefixes
description = "Subnet CIDR blocks"
}

output "cae_name" {
value = azurerm_container_app_environment.container_app_environment.name
description = "Container App Environment name"
}
Loading

0 comments on commit 71d85c8

Please sign in to comment.