diff --git a/aks/application/README.md b/aks/application/README.md index 5d0024d..4432409 100644 --- a/aks/application/README.md +++ b/aks/application/README.md @@ -16,10 +16,10 @@ module "web_application" { service_name = local.service_name cluster_configuration_map = module.cluster_data.configuration_map - + kubernetes_config_map_name = module.application_configuration.kubernetes_config_map_name kubernetes_secret_name = module.application_configuration.kubernetes_secret_name - + docker_image = var.docker_image } @@ -89,6 +89,13 @@ app.MapHealthChecks("/healthcheck/all"); [ASP.NET Core Health Checks Middleware]: https://learn.microsoft.com/en-us/aspnet/core/host-and-deploy/health-checks?view=aspnetcore-7.0 +### Monitoring + +If `azure_enable_monitoring` is `true`, it’s expected that the following resources already exist: + +- A resource group named `${azure_resource_prefix}-${service_short}-mn-rg` (where `mn` stands for monitoring and `rg` stands for resource group). +- A monitor action group named `${azure_resource_prefix}-${service_name}` within the above resource group. + ## Outputs ### `hostname` diff --git a/aks/application/data.tf b/aks/application/data.tf new file mode 100644 index 0000000..b932cd3 --- /dev/null +++ b/aks/application/data.tf @@ -0,0 +1,12 @@ +data "azurerm_resource_group" "monitoring" { + count = var.azure_enable_monitoring ? 1 : 0 + + name = "${var.azure_resource_prefix}-${var.service_short}-mn-rg" +} + +data "azurerm_monitor_action_group" "main" { + count = var.azure_enable_monitoring ? 1 : 0 + + name = "${var.azure_resource_prefix}-${var.service_name}" + resource_group_name = data.azurerm_resource_group.monitoring[0].name +} diff --git a/aks/application/resources.tf b/aks/application/resources.tf index f6d3faa..672594d 100644 --- a/aks/application/resources.tf +++ b/aks/application/resources.tf @@ -244,3 +244,37 @@ resource "kubernetes_secret" "ghcr_auth" { }) } } + +resource "azurerm_monitor_metric_alert" "container_restarts" { + count = var.azure_enable_monitoring ? 1 : 0 + + name = "${local.app_name}-container-restarts" + resource_group_name = data.azurerm_resource_group.monitoring[0].name + scopes = [var.kubernetes_cluster_id] + description = "Action will be triggered when container restarts is greater than 0" + window_size = "PT30M" + + criteria { + metric_namespace = "Insights.container/pods" + metric_name = "restartingContainerCount" + aggregation = "Maximum" + operator = "GreaterThan" + threshold = 0 + + dimension { + name = "controllerName" + operator = "StartsWith" + values = ["${local.app_name}"] + } + } + + action { + action_group_id = data.azurerm_monitor_action_group.main[0].id + } + + lifecycle { + ignore_changes = [ + tags + ] + } +} diff --git a/aks/application/tfdocs.md b/aks/application/tfdocs.md index 3933236..e1c3a02 100644 --- a/aks/application/tfdocs.md +++ b/aks/application/tfdocs.md @@ -6,6 +6,7 @@ No requirements. | Name | Version | |------|---------| +| [azurerm](#provider\_azurerm) | n/a | | [kubernetes](#provider\_kubernetes) | n/a | ## Modules @@ -16,16 +17,21 @@ No modules. | Name | Type | |------|------| +| [azurerm_monitor_metric_alert.container_restarts](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_metric_alert) | resource | | [kubernetes_deployment.main](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/deployment) | resource | | [kubernetes_ingress_v1.main](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/ingress_v1) | resource | | [kubernetes_pod_disruption_budget_v1.main](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/pod_disruption_budget_v1) | resource | | [kubernetes_secret.ghcr_auth](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/secret) | resource | | [kubernetes_service.main](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/service) | resource | +| [azurerm_monitor_action_group.main](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/monitor_action_group) | data source | +| [azurerm_resource_group.monitoring](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/resource_group) | data source | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [azure\_enable\_monitoring](#input\_azure\_enable\_monitoring) | Whether to enable monitoring of container failures | `bool` | `false` | no | +| [azure\_resource\_prefix](#input\_azure\_resource\_prefix) | Prefix of Azure resources for the service | `string` | `null` | no | | [cluster\_configuration\_map](#input\_cluster\_configuration\_map) | Configuration map for the cluster |
object({
resource_group_name = string,
resource_prefix = string,
dns_zone_prefix = optional(string),
cpu_min = number
})
| n/a | yes | | [command](#input\_command) | Custom command that overwrites Docker image | `list(string)` | `[]` | no | | [docker\_image](#input\_docker\_image) | Path to the docker image | `string` | n/a | yes | @@ -33,6 +39,7 @@ No modules. | [github\_personal\_access\_token](#input\_github\_personal\_access\_token) | Github Personal Access Token (PAT) of github\_username | `string` | `null` | no | | [github\_username](#input\_github\_username) | Github user authorised to access the private registry | `string` | `null` | no | | [is\_web](#input\_is\_web) | Whether this a web application | `bool` | `true` | no | +| [kubernetes\_cluster\_id](#input\_kubernetes\_cluster\_id) | ID of the Kubernetes cluster | `string` | `null` | no | | [kubernetes\_config\_map\_name](#input\_kubernetes\_config\_map\_name) | Name of the Kubernetes configuration map | `string` | n/a | yes | | [kubernetes\_secret\_name](#input\_kubernetes\_secret\_name) | Name of the Kubernetes secrets | `string` | n/a | yes | | [max\_memory](#input\_max\_memory) | Maximum memory of the instance | `string` | `"1Gi"` | no | @@ -42,6 +49,7 @@ No modules. | [probe\_path](#input\_probe\_path) | Path for the liveness and startup probe. The probe can be disabled by setting this to null. | `string` | `"/healthcheck"` | no | | [replicas](#input\_replicas) | Number of application instances | `number` | `1` | no | | [service\_name](#input\_service\_name) | Name of the service | `string` | n/a | yes | +| [service\_short](#input\_service\_short) | Short name of the service | `string` | `null` | no | | [web\_external\_hostnames](#input\_web\_external\_hostnames) | List of external hostnames for the web application | `list(string)` | `[]` | no | | [web\_port](#input\_web\_port) | Port of the web application | `number` | `3000` | no | diff --git a/aks/application/variables.tf b/aks/application/variables.tf index 97a206f..b76f5f4 100644 --- a/aks/application/variables.tf +++ b/aks/application/variables.tf @@ -19,6 +19,12 @@ variable "service_name" { description = "Name of the service" } +variable "service_short" { + type = string + default = null + description = "Short name of the service" +} + variable "cluster_configuration_map" { type = object({ resource_group_name = string, @@ -45,6 +51,12 @@ variable "kubernetes_secret_name" { description = "Name of the Kubernetes secrets" } +variable "kubernetes_cluster_id" { + type = string + default = null + description = "ID of the Kubernetes cluster" +} + variable "docker_image" { type = string description = "Path to the docker image" @@ -102,3 +114,15 @@ variable "github_personal_access_token" { default = null description = "Github Personal Access Token (PAT) of github_username" } + +variable "azure_resource_prefix" { + type = string + default = null + description = "Prefix of Azure resources for the service" +} + +variable "azure_enable_monitoring" { + type = bool + default = false + description = "Whether to enable monitoring of container failures" +} diff --git a/aks/cluster_data/README.md b/aks/cluster_data/README.md index 68651eb..52f1089 100644 --- a/aks/cluster_data/README.md +++ b/aks/cluster_data/README.md @@ -37,6 +37,10 @@ object({ The host to use to connect to the Kubernetes cluster. +### `kubernetes_id` + +The ID of the Kubernetes Managed Cluster. + ### `kubernetes_client_certificate` The client certificate to use to connect to the Kubernetes cluster. diff --git a/aks/cluster_data/outputs.tf b/aks/cluster_data/outputs.tf index af2d201..5455bc3 100644 --- a/aks/cluster_data/outputs.tf +++ b/aks/cluster_data/outputs.tf @@ -6,6 +6,10 @@ output "kubernetes_host" { value = data.azurerm_kubernetes_cluster.main.kube_config.0.host } +output "kubernetes_id" { + value = data.azurerm_kubernetes_cluster.main.id +} + output "kubernetes_client_certificate" { value = base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_certificate) } diff --git a/aks/cluster_data/tfdocs.md b/aks/cluster_data/tfdocs.md index 91c71c3..057b174 100644 --- a/aks/cluster_data/tfdocs.md +++ b/aks/cluster_data/tfdocs.md @@ -34,3 +34,4 @@ No modules. | [kubernetes\_client\_key](#output\_kubernetes\_client\_key) | n/a | | [kubernetes\_cluster\_ca\_certificate](#output\_kubernetes\_cluster\_ca\_certificate) | n/a | | [kubernetes\_host](#output\_kubernetes\_host) | n/a | +| [kubernetes\_id](#output\_kubernetes\_id) | n/a |