Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added alert frequency monitor metric #111

Merged
merged 1 commit into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions aks/postgres/resources.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ locals {
azure_enable_monitoring = var.use_azure && var.azure_enable_monitoring

kubernetes_name = "${var.service_name}-${var.environment}-postgres${local.name_suffix}"

alert_frequency_map = {
PT5M = "PT1M"
PT15M = "PT1M"
PT30M = "PT1M"
PT1H = "PT1M"
PT6H = "PT5M"
PT12H = "PT5M"
}
alert_frequency = local.alert_frequency_map[var.alert_window_size]
}

# Username & password
Expand Down Expand Up @@ -172,6 +182,7 @@ resource "azurerm_monitor_metric_alert" "memory" {
scopes = [azurerm_postgresql_flexible_server.main[0].id]
description = "Action will be triggered when memory use is greater than 75%"
window_size = var.alert_window_size
frequency = local.alert_frequency

criteria {
metric_namespace = "Microsoft.DBforPostgreSQL/flexibleServers"
Expand Down Expand Up @@ -200,6 +211,7 @@ resource "azurerm_monitor_metric_alert" "cpu" {
scopes = [azurerm_postgresql_flexible_server.main[0].id]
description = "Action will be triggered when cpu use is greater than ${var.azure_cpu_threshold}%"
window_size = var.alert_window_size
frequency = local.alert_frequency

criteria {
metric_namespace = "Microsoft.DBforPostgreSQL/flexibleServers"
Expand Down Expand Up @@ -228,6 +240,7 @@ resource "azurerm_monitor_metric_alert" "storage" {
scopes = [azurerm_postgresql_flexible_server.main[0].id]
description = "Action will be triggered when storage use is greater than ${var.azure_storage_threshold}%"
window_size = var.alert_window_size
frequency = local.alert_frequency

criteria {
metric_namespace = "Microsoft.DBforPostgreSQL/flexibleServers"
Expand Down
2 changes: 1 addition & 1 deletion aks/postgres/tfdocs.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ No modules.
|------|-------------|------|---------|:--------:|
| <a name="input_admin_password"></a> [admin\_password](#input\_admin\_password) | Password of the admin user | `string` | `null` | no |
| <a name="input_admin_username"></a> [admin\_username](#input\_admin\_username) | Username of the admin user | `string` | `null` | no |
| <a name="input_alert_window_size"></a> [alert\_window\_size](#input\_alert\_window\_size) | The period of time that is used to monitor alert activity e.g PT1M, PT5M, PT15M, PT30M, PT1H, PT6H or PT12H | `string` | `"PT5M"` | no |
| <a name="input_alert_window_size"></a> [alert\_window\_size](#input\_alert\_window\_size) | The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly. | `string` | `"PT5M"` | no |
| <a name="input_azure_cpu_threshold"></a> [azure\_cpu\_threshold](#input\_azure\_cpu\_threshold) | n/a | `number` | `80` | no |
| <a name="input_azure_enable_backup_storage"></a> [azure\_enable\_backup\_storage](#input\_azure\_enable\_backup\_storage) | n/a | `bool` | `true` | no |
| <a name="input_azure_enable_high_availability"></a> [azure\_enable\_high\_availability](#input\_azure\_enable\_high\_availability) | n/a | `bool` | `false` | no |
Expand Down
14 changes: 10 additions & 4 deletions aks/postgres/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,17 @@ variable "azure_enable_monitoring" {
default = true
}



variable "alert_window_size" {
type = string
nullable = false
default = "PT5M"
description = "The period of time that is used to monitor alert activity e.g PT1M, PT5M, PT15M, PT30M, PT1H, PT6H or PT12H"
type = string
nullable = false
default = "PT5M"
validation {
condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size)
error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H"
}
description = "The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly."
}

variable "azure_maintenance_window" {
Expand Down
11 changes: 11 additions & 0 deletions aks/redis/resources.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ locals {
azure_enable_monitoring = var.use_azure && var.azure_enable_monitoring

kubernetes_name = "${var.service_name}-${var.environment}-redis${local.name_suffix}"

alert_frequency_map = {
PT5M = "PT1M"
PT15M = "PT1M"
PT30M = "PT1M"
PT1H = "PT1M"
PT6H = "PT5M"
PT12H = "PT5M"
}
alert_frequency = local.alert_frequency_map[var.alert_window_size]
}

# Azure
Expand Down Expand Up @@ -86,6 +96,7 @@ resource "azurerm_monitor_metric_alert" "memory" {
scopes = [azurerm_redis_cache.main[0].id]
description = "Action will be triggered when memory use is greater than ${var.azure_memory_threshold}%"
window_size = var.alert_window_size
frequency = local.alert_frequency

criteria {
metric_namespace = "Microsoft.Cache/redis"
Expand Down
2 changes: 1 addition & 1 deletion aks/redis/tfdocs.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ No modules.

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_alert_window_size"></a> [alert\_window\_size](#input\_alert\_window\_size) | The period of time that is used to monitor alert activity e.g PT1M, PT5M, PT15M, PT30M, PT1H, PT6H or PT12H | `string` | `"PT5M"` | no |
| <a name="input_alert_window_size"></a> [alert\_window\_size](#input\_alert\_window\_size) | The period of time that is used to monitor alert activity e,g, PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly. | `string` | `"PT5M"` | no |
| <a name="input_azure_capacity"></a> [azure\_capacity](#input\_azure\_capacity) | n/a | `number` | `1` | no |
| <a name="input_azure_enable_monitoring"></a> [azure\_enable\_monitoring](#input\_azure\_enable\_monitoring) | n/a | `bool` | `true` | no |
| <a name="input_azure_family"></a> [azure\_family](#input\_azure\_family) | n/a | `string` | `"C"` | no |
Expand Down
12 changes: 8 additions & 4 deletions aks/redis/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,12 @@ variable "azure_patch_schedule" {
}

variable "alert_window_size" {
type = string
default = "PT5M"
nullable = false
description = "The period of time that is used to monitor alert activity e.g PT1M, PT5M, PT15M, PT30M, PT1H, PT6H or PT12H"
type = string
default = "PT5M"
nullable = false
validation {
condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size)
error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H"
}
description = "The period of time that is used to monitor alert activity e,g, PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly."
}
Loading