From f73c42a7eefe770d0321deb442919c9565a10e49 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Wed, 20 Nov 2024 17:36:59 +0530 Subject: [PATCH] feat: update hms metrics namespace (#288) * feat: common namespace * feat: common namespace * feat: common namespace * feat: common namespace * feat: common namespace * feat: common namespace * feat: common namespace * feat: common namespace * feat: common namespace * feat: common namespace * feat: common namespace * fix: fxing syntax * fix: format list * fix: change default value * feat: update change log * fix: removed unused local variables * fix: remove unused input parameters in templete * fix: change variable descriptions * fix: update change log date * fix: test json * fix: fix json format * fix: json template format * fix: test ecs defination * fix: revert * fix: replace with single quote * fix: fix escape * Update variables.tf * Update CHANGELOG.md --------- Co-authored-by: Dhrubajyoti Sadhu Co-authored-by: janli --- CHANGELOG.md | 10 ++ VARIABLES.md | 34 +++--- common.tf | 5 +- k8s-readonly.tf | 2 +- k8s-readwrite.tf | 2 +- templates.tf | 31 ++--- templates/apiary-hms-readonly.json | 2 +- templates/apiary-hms-readwrite.json | 2 +- variables.tf | 171 ++++++++++++++++++++++------ 9 files changed, 193 insertions(+), 66 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3eec28d..5b5bac6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.7.0] - 2024-11-19 +### Changed +- Updated the hms namespaces for metrics for both readwrite and readonly. +### Added +- Added following variables: + - `hms_ecs_metrics_readwrite_namespace` + - `hms_ecs_metrics_readonly_namespace` + - `hms_k8s_metrics_readwrite_namespace` + - `hms_k8s_metrics_readonly_namespace` + ## [7.6.1] - 2024-10-30 ### Changed - Changed Openmetric to Prometheus in DockerLabels in ECS. diff --git a/VARIABLES.md b/VARIABLES.md index 9c1adca..e977836 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -13,7 +13,7 @@ | apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | | apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | | apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | -| apiary\_domain\_private\_zone | Apiary domain private zone 53. | `bool` | `true` | no | +| apiary\_domain\_private\_zone | Apiary domain private zone 53. | `bool` | `true` | no | | apiary\_governance\_iamroles | AWS IAM governance roles allowed read and tagging access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | | apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | | apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | @@ -29,12 +29,12 @@ | apiary\_common\_producer\_iamroles | AWS IAM roles allowed general (not tied to schema) write access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | | dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | | db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | -| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | `"7"` | yes | +| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | `"7"` | yes | | db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | | db\_copy\_tags\_to\_snapshot | Copy all Cluster tags to snapshots. | `bool` | `true` | no | | db\_enable\_performance\_insights | Enable RDS Performance Insights | `bool` | `false` | no | | db\_enhanced\_monitoring\_interval | RDS monitoring interval (in seconds) for enhanced monitoring. Valid values are 0, 1, 5, 10, 15, 30, 60. Default is 0. | `number` | `0` | no | -| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | `"db.t4g.medium"` | yes | +| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | `"db.t4g.medium"` | yes | | db\_instance\_count | Desired count of database cluster instances. | `string` | `"2"` | no | | db\_maintenance\_window | Preferred maintenance window for the RDS Metastore DB in UTC. | `string` | `"wed:03:00-wed:04:00"` | no | | db\_master\_username | Aurora cluster MySQL master user name. | `string` | `"apiary"` | no | @@ -71,22 +71,22 @@ | hms\_ro\_heapsize | Heapsize for the read only Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | | hms\_ro\_k8s\_replica\_count | Initial Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | | hms\_ro\_k8s\_max\_replica\_count | Max Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| hms\_rw\_k8s\_pdb\_settings | Add PodDisruptionBudget to the HMS rw pods. | `object` | `max_unavailable = 1` | no | -| hms\_rw\_k8s\_rolling\_update\_strategy | Configure HMS RW deployment rolling strategy. | `object` | `max_unavailable = 1` | no | +| hms\_rw\_k8s\_pdb\_settings | Add PodDisruptionBudget to the HMS rw pods. | `object` | `max_unavailable = 1` | no | +| hms\_rw\_k8s\_rolling\_update\_strategy | Configure HMS RW deployment rolling strategy. | `object` | `max_unavailable = 1` | no | | hms\_ro\_target\_cpu\_percentage | Read only Hive Metastore autoscaling threshold for CPU target usage. | `number` | `"2048"` | no | | hms\_ro\_request\_partition\_limit | Read only Hive Metastore limits of request partitions. | `string` | n/a | no | -| hms\_ro\_node\_affinity | Add node affinities to the Hive metastore pods. | `list(object)` | n/a | no | -| hms\_ro\_tolerations | Add tolerations to the Hive metastore pods. | `list(object)` | n/a | no | +| hms\_ro\_node\_affinity | Add node affinities to the Hive metastore pods. | `list(object)` | n/a | no | +| hms\_ro\_tolerations | Add tolerations to the Hive metastore pods. | `list(object)` | n/a | no | | hms\_rw\_cpu | CPU for the read/write Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | | hms\_rw\_db\_connection\_pool\_size | Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | | hms\_rw\_ecs\_task\_count | Desired ECS task count of the read/write Hive Metastore service. | `string` | `"3"` | no | | hms\_rw\_heapsize | Heapsize for the read/write Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | | hms\_rw\_k8s\_replica\_count | Initial Number of read/write Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| hms\_rw\_k8s\_pdb\_settings | Add PodDisruptionBudget to the HMS rw pods. | `object` | `max_unavailable = 1` | no | -| hms\_rw\_k8s\_rolling\_update\_strategy | Configure HMS RW deployment rolling strategy. | `object` | `max_unavailable = 1` | no | +| hms\_rw\_k8s\_pdb\_settings | Add PodDisruptionBudget to the HMS rw pods. | `object` | `max_unavailable = 1` | no | +| hms\_rw\_k8s\_rolling\_update\_strategy | Configure HMS RW deployment rolling strategy. | `object` | `max_unavailable = 1` | no | | hms\_rw\_request\_partition\_limit | Read Write Hive Metastore limits of request partitions. | `string` | n/a | no | -| hms\_rw\_node\_affinity | Add node affinities to the Hive metastore pods. | `list(object)` | n/a | no | -| hms\_rw\_tolerations | Add tolerations to the Hive metastore pods. | `list(object)` | n/a | no | +| hms\_rw\_node\_affinity | Add node affinities to the Hive metastore pods. | `list(object)` | n/a | no | +| hms\_rw\_tolerations | Add tolerations to the Hive metastore pods. | `list(object)` | n/a | no | | iam\_name\_root | Name to identify Hive Metastore IAM roles. | `string` | `"hms"` | no | | ingress\_cidr | Generally allowed ingress CIDR list. | `list(string)` | n/a | yes | | instance\_name | Apiary instance name to identify resources in multi-instance deployments. | `string` | `""` | no | @@ -132,12 +132,16 @@ | datadog\_key\_secret\_name | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments. | string | null | no | | datadog\_agent\_version | Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments. | string | 7.50.3-jmx | no | | datadog\_agent\_enabled | Whether to include the datadog-agent container. This is only applicable to ECS deployments. | string | false | no | -| enable\_tcp\_keepalive | tcp_keepalive settings on HMS pods. To use this you need to enable the ability to cahnge sysctl settings on your kubernetes cluster. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). If your EKS version is below 1.24 you need to create a PodSecurityPolicy allowing the following sysctls "net.ipv4.tcp_keepalive_time", "net.ipv4.tcp_keepalive_intvl","net.ipv4.tcp_keepalive_probes" and a ClusterRole + Rolebinding for the service account running the HMS pods or all services accounts in the namespace where Apiary is running so that kubernetes can apply the tcp)keepalive configuration. For EKS 1.25 and above check this https://kubernetes.io/blog/2022/08/23/kubernetes-v1-25-release/#pod-security-changes. Also see tcp_keepalive_* variables. | bool | false | no | +| enable\_tcp\_keepalive | tcp_keepalive settings on HMS pods. To use this you need to enable the ability to cahnge sysctl settings on your kubernetes cluster. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). If your EKS version is below 1.24 you need to create a PodSecurityPolicy allowing the following sysctls "net.ipv4.tcp_keepalive_time", "net.ipv4.tcp_keepalive_intvl","net.ipv4.tcp_keepalive_probes" and a ClusterRole + Rolebinding for the service account running the HMS pods or all services accounts in the namespace where Apiary is running so that kubernetes can apply the tcp)keepalive configuration. For EKS 1.25 and above check this https://kubernetes.io/blog/2022/08/23/kubernetes-v1-25-release/#pod-security-changes. Also see tcp_keepalive_* variables. | bool | false | no | | tcp\_keepalive\_time | Sets net.ipv4.tcp_keepalive_time (seconds). | number | `200` | no | | tcp\_keepalive\_intvl | Sets net.ipv4.tcp_keepalive_intvl (seconds) | number | `30` | no | -| tcp\_keepalive\_probes | Sets net.ipv4.tcp_keepalive_probes (seconds) | number | `2` | no | -| ecs\_platform\_version | ECS Service Platform Version | `string` | `"LATEST"` -| ecs\_requires\_compatibilities | ECS task definition requires compatibilities. | `list(string)` | `["EC2", "FARGATE"]` | no | +| tcp\_keepalive\_probes | Sets net.ipv4.tcp_keepalive_probes (seconds) | number | `2` | no | +| ecs\_platform\_version | ECS Service Platform Version | `string` | `"LATEST"` +| ecs\_requires\_compatibilities | ECS task definition requires compatibilities. | `list(string)` | `["EC2", "FARGATE"]` | no | +| hms\_ecs\_metrics\_readonly\_namespace | ECS readwrite metrics namespace | `string` | `hmsreadonlylegacy` | no | +| hms\_ecs\_metrics\_readwrite\_namespace | ECS readonly metrics namespace | `string` | `hmsreadwritelegacy` | no | +| hms\_k8s\_metrics\_readonly\_namespace | K8s readwrite metrics namespace | `string` | `hms_readonly` | no | +| hms\_k8s\_metrics\_readwrite\_namespace | K8s readonly metrics namespace | `string` | `hms_readwrite` | no | ### apiary_assume_roles diff --git a/common.tf b/common.tf index 57df24d..3b058cc 100644 --- a/common.tf +++ b/common.tf @@ -9,8 +9,7 @@ locals { apiary_bucket_prefix = "${local.instance_alias}-${data.aws_caller_identity.current.account_id}-${data.aws_region.current.name}" apiary_assume_role_bucket_prefix = [for assumerole in var.apiary_assume_roles : "${local.instance_alias}-${data.aws_caller_identity.current.account_id}-${lookup(assumerole, "allow_cross_region_access", false) ? "*" : data.aws_region.current.name}"] enable_route53_records = var.apiary_domain_name == "" ? false : true - - datadog_tags = join(" ", formatlist("%s:%s", keys(var.apiary_tags), values(var.apiary_tags))) + datadog_tags = join(" ", formatlist("%s:%s", keys(var.apiary_tags), values(var.apiary_tags))) # # Create a new list of maps with some extra attributes needed later # @@ -63,6 +62,8 @@ locals { ro_ingress_cidr = var.ingress_cidr rw_ingress_cidr = length(var.rw_ingress_cidr) == 0 ? var.ingress_cidr : var.rw_ingress_cidr + hms_metrics = join("\\\",\\\"", var.datadog_metrics_hms_readwrite_readonly) + hms_metrics_type_overrides = join("\\\": \\\"gauge\\\",\\\"", var.datadog_metrics_hms_readwrite_readonly) } data "aws_iam_account_alias" "current" {} diff --git a/k8s-readonly.tf b/k8s-readonly.tf index c659db1..54ad743 100644 --- a/k8s-readonly.tf +++ b/k8s-readonly.tf @@ -38,7 +38,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_readonly" { annotations = { "ad.datadoghq.com/${local.hms_alias}-readonly.check_names" = var.datadog_metrics_enabled ? "[\"prometheus\"]" : null "ad.datadoghq.com/${local.hms_alias}-readonly.init_configs" = var.datadog_metrics_enabled ? "[{}]" : null - "ad.datadoghq.com/${local.hms_alias}-readonly.instances" = var.datadog_metrics_enabled ? "[{ \"prometheus_url\": \"http://%%host%%:${var.datadog_metrics_port}/actuator/prometheus\", \"namespace\": \"hms_readonly\", \"metrics\": [ \"${join("\",\"", var.datadog_metrics_hms_readwrite_readonly)}\" ] , \"type_overrides\": { \"${join("\": \"gauge\",\"", var.datadog_metrics_hms_readwrite_readonly)}\": \"gauge\"} }]" : null + "ad.datadoghq.com/${local.hms_alias}-readonly.instances" = var.datadog_metrics_enabled ? "[{ \"prometheus_url\": \"http://%%host%%:${var.datadog_metrics_port}/actuator/prometheus\", \"namespace\": \"${var.hms_k8s_metrics_readonly_namespace}\", \"metrics\": [ \"${join("\",\"", var.datadog_metrics_hms_readwrite_readonly)}\" ] , \"type_overrides\": { \"${join("\": \"gauge\",\"", var.datadog_metrics_hms_readwrite_readonly)}\": \"gauge\"} }]" : null "iam.amazonaws.com/role" = var.oidc_provider == "" ? aws_iam_role.apiary_hms_readonly.name : null "prometheus.io/path" = "/metrics" "prometheus.io/port" = "8080" diff --git a/k8s-readwrite.tf b/k8s-readwrite.tf index 02fcf03..0692c42 100644 --- a/k8s-readwrite.tf +++ b/k8s-readwrite.tf @@ -38,7 +38,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_readwrite" { annotations = { "ad.datadoghq.com/${local.hms_alias}-readwrite.check_names" = var.datadog_metrics_enabled ? "[\"prometheus\"]" : null "ad.datadoghq.com/${local.hms_alias}-readwrite.init_configs" = var.datadog_metrics_enabled ? "[{}]" : null - "ad.datadoghq.com/${local.hms_alias}-readwrite.instances" = var.datadog_metrics_enabled ? "[{ \"prometheus_url\": \"http://%%host%%:${var.datadog_metrics_port}/actuator/prometheus\", \"namespace\": \"hms_readwrite\", \"metrics\": [ \"${join("\",\"", var.datadog_metrics_hms_readwrite_readonly)}\" ] , \"type_overrides\": { \"${join("\": \"gauge\",\"", var.datadog_metrics_hms_readwrite_readonly)}\": \"gauge\"} }]" : null + "ad.datadoghq.com/${local.hms_alias}-readwrite.instances" = var.datadog_metrics_enabled ? "[{ \"prometheus_url\": \"http://%%host%%:${var.datadog_metrics_port}/actuator/prometheus\", \"namespace\": \"${var.hms_k8s_metrics_readwrite_namespace}\", \"metrics\": [ \"${join("\",\"", var.datadog_metrics_hms_readwrite_readonly)}\" ] , \"type_overrides\": { \"${join("\": \"gauge\",\"", var.datadog_metrics_hms_readwrite_readonly)}\": \"gauge\"} }]" : null "iam.amazonaws.com/role" = var.oidc_provider == "" ? aws_iam_role.apiary_hms_readwrite.name : null "prometheus.io/path" = "/metrics" "prometheus.io/port" = "8080" diff --git a/templates.tf b/templates.tf index af1cfdd..c8d5aa7 100644 --- a/templates.tf +++ b/templates.tf @@ -27,7 +27,7 @@ locals{ table_param_filter = var.enable_metadata_events ? var.table_param_filter : "" enable_gluesync = var.enable_gluesync ? "1" : "" gluedb_prefix = "${local.gluedb_prefix}" - + hms_metrics_namespace = "${var.hms_ecs_metrics_readwrite_namespace}" ranger_service_name = "${local.instance_alias}-metastore" ranger_policy_manager_url = "${var.ranger_policy_manager_url}" ranger_audit_solr_url = "${var.ranger_audit_solr_url}" @@ -69,9 +69,11 @@ locals{ datadog_agent_version = var.datadog_agent_version datadog_agent_enabled = var.datadog_agent_enabled datadog_tags = local.datadog_tags - tcp_keepalive_time = var.tcp_keepalive_time - tcp_keepalive_intvl = var.tcp_keepalive_intvl - tcp_keepalive_probes = var.tcp_keepalive_probes + tcp_keepalive_time = var.tcp_keepalive_time + tcp_keepalive_intvl = var.tcp_keepalive_intvl + tcp_keepalive_probes = var.tcp_keepalive_probes + hms_metrics = local.hms_metrics + hms_metrics_type_overrides = local.hms_metrics_type_overrides }) hms_readonly_template = templatefile("${path.module}/templates/apiary-hms-readonly.json", { @@ -117,14 +119,17 @@ locals{ mysql_user_cred_arn = data.aws_secretsmanager_secret.db_ro_user.arn # Datadog variables - datadog_agent_enabled = var.datadog_agent_enabled - datadog_secret_key = length(var.datadog_key_secret_name) > 0 ? chomp(data.external.datadog_key[0].result["api_key"]) : "" - wd_instance_type = var.hms_instance_type - metrics_port = var.datadog_metrics_port - datadog_agent_version = var.datadog_agent_version - datadog_tags = local.datadog_tags - tcp_keepalive_time = var.tcp_keepalive_time - tcp_keepalive_intvl = var.tcp_keepalive_intvl - tcp_keepalive_probes = var.tcp_keepalive_probes + datadog_agent_enabled = var.datadog_agent_enabled + datadog_secret_key = length(var.datadog_key_secret_name) > 0 ? chomp(data.external.datadog_key[0].result["api_key"]) : "" + wd_instance_type = var.hms_instance_type + metrics_port = var.datadog_metrics_port + datadog_agent_version = var.datadog_agent_version + datadog_tags = local.datadog_tags + tcp_keepalive_time = var.tcp_keepalive_time + tcp_keepalive_intvl = var.tcp_keepalive_intvl + tcp_keepalive_probes = var.tcp_keepalive_probes + hms_metrics_namespace = "${var.hms_ecs_metrics_readonly_namespace}" + hms_metrics = local.hms_metrics + hms_metrics_type_overrides = local.hms_metrics_type_overrides }) } diff --git a/templates/apiary-hms-readonly.json b/templates/apiary-hms-readonly.json index 20c4f82..ea7a36b 100644 --- a/templates/apiary-hms-readonly.json +++ b/templates/apiary-hms-readonly.json @@ -75,7 +75,7 @@ } }, "dockerLabels": { - "com.datadoghq.ad.instances": "[{ \"prometheus_url\": \"http://%%host%%:8080/actuator/prometheus\", \"namespace\": \"hms_readonly_legacy\", \"metrics\": [\"metrics_classloading_loaded_value\", \"metrics_threads_count_value\", \"metrics_memory_heap_max_value\", \"metrics_init_total_count_tables_value\", \"metrics_init_total_count_dbs_value\", \"metrics_memory_heap_used_value\", \"metrics_init_total_count_partitions_value\", \"jvm_threads_current\", \"jvm_threads_started_total\", \"jvm_memory_bytes_used\", \"jvm_memory_bytes_init\", \"jvm_gc_collection_seconds_count\", \"jvm_gc_collection_seconds\", \"process_cpu_seconds_total\", \"java_lang_operatingsystem_processcpuload\", \"java_lang_operatingsystem_processcputime\", \"metrics_threads_runnable_count_value\", \"metrics_threads_waiting_count_value\", \"java_lang_memory_heapmemoryusage_used\", \"metrics_memory_heap_init_value\", \"metrics_api_get_partition_by_name_count\", \"metrics_api_get_partitions_by_names_count\", \"metrics_api_get_partition_names_count\", \"metrics_api_get_partitions_by_expr_count\", \"metrics_api_get_partitions_count\", \"metrics_api_get_partition_count\", \"metrics_api_get_partitions_by_filter_count\", \"metrics_api_add_partitions_count\", \"metrics_api_add_partitions_req_count\", \"metrics_api_drop_partition_by_name_count\", \"metrics_api_add_partition_count\", \"metrics_api_alter_partitions_count\", \"metrics_api_create_table_count\", \"metrics_api_alter_table_with_cascade_count\", \"metrics_api_get_table_meta_count\", \"metrics_api_get_table_metas_count\", \"metrics_api_get_table_count\", \"metrics_api_alter_table_count\", \"metrics_api_get_tables_count\", \"metrics_api_get_all_tables_count\", \"metrics_api_drop_table_count\", \"metrics_api_get_multi_table_count\", \"metrics_api_get_database_count\", \"metrics_api_get_all_databases_count\", \"metrics_api_get_databases_count\", \"metrics_api_create_function_count\", \"metrics_api_getmetaconf_count\", \"metrics_api_alter_table_with_environment_context_count\", \"metrics_api_delete_column_statistics_by_table_count\", \"metrics_api_get_functions_count\", \"metrics_api_get_function_count\", \"metrics_api_shutdown_count\", \"metrics_api_flushcache_count\", \"metrics_api_get_indexes_count\", \"metrics_api_get_config_value_count\", \"metrics_api_set_ugi_count\", \"metrics_api_get_all_functions_count\", \"metrics_api_get_table_req_95thpercentile\", \"metrics_api_get_table_req_50thpercentile\", \"metrics_api_get_table_req_count\", \"metrics_api_get_table_req_max\", \"metrics_api_get_databases_count\", \"metrics_api_get_databases_95thpercentile\", \"metrics_api_get_databases_50thpercentile\", \"metrics_api_get_databases_max\", \"metrics_api_get_partitions_95thpercentile\", \"metrics_api_get_partitions_50thpercentile\", \"metrics_api_get_partitions_count\", \"metrics_api_get_partitions_max\", \"metrics_api_get_partitions_50thpercentile\", \"metrics_api_get_table_req_50thpercentile\", \"metrics_api_get_database_95thpercentile\", \"metrics_api_get_database_50thpercentile\", \"metrics_kafka_listener_failures_count\", \"metrics_kafka_listener_successes_count\", \"metrics_api_get_table_objects_by_name_req_max\" ], \"type_overrides\": { \"metrics_classloading_loaded_value\": \"gauge\", \"metrics_threads_count_value\": \"gauge\", \"metrics_memory_heap_max_value\": \"gauge\", \"metrics_init_total_count_tables_value\": \"gauge\", \"metrics_init_total_count_dbs_value\": \"gauge\", \"metrics_memory_heap_used_value\": \"gauge\", \"metrics_init_total_count_partitions_value\": \"gauge\", \"jvm_threads_current\": \"gauge\", \"jvm_threads_started_total\": \"gauge\", \"jvm_memory_bytes_used\": \"gauge\", \"jvm_memory_bytes_init\": \"gauge\", \"jvm_gc_collection_seconds_count\": \"gauge\", \"jvm_gc_collection_seconds\": \"gauge\", \"process_cpu_seconds_total\": \"gauge\", \"java_lang_operatingsystem_processcpuload\": \"gauge\", \"java_lang_operatingsystem_processcputime\": \"gauge\", \"metrics_threads_runnable_count_value\": \"gauge\", \"metrics_threads_waiting_count_value\": \"gauge\", \"java_lang_memory_heapmemoryusage_used\": \"gauge\", \"metrics_memory_heap_init_value\": \"gauge\", \"metrics_api_get_partition_by_name_count\": \"gauge\", \"metrics_api_get_partitions_by_names_count\": \"gauge\", \"metrics_api_get_partition_names_count\": \"gauge\", \"metrics_api_get_partitions_by_expr_count\": \"gauge\", \"metrics_api_get_partitions_count\": \"gauge\", \"metrics_api_get_partition_count\": \"gauge\", \"metrics_api_get_partitions_by_filter_count\": \"gauge\", \"metrics_api_add_partitions_count\": \"gauge\", \"metrics_api_add_partitions_req_count\": \"gauge\", \"metrics_api_drop_partition_by_name_count\": \"gauge\", \"metrics_api_add_partition_count\": \"gauge\", \"metrics_api_alter_partitions_count\": \"gauge\", \"metrics_api_create_table_count\": \"gauge\", \"metrics_api_alter_table_with_cascade_count\": \"gauge\", \"metrics_api_get_table_meta_count\": \"gauge\", \"metrics_api_get_table_metas_count\": \"gauge\", \"metrics_api_get_table_count\": \"gauge\", \"metrics_api_alter_table_count\": \"gauge\", \"metrics_api_get_tables_count\": \"gauge\", \"metrics_api_get_all_tables_count\": \"gauge\", \"metrics_api_drop_table_count\": \"gauge\", \"metrics_api_get_multi_table_count\": \"gauge\", \"metrics_api_get_database_count\": \"gauge\", \"metrics_api_get_all_databases_count\": \"gauge\", \"metrics_api_get_databases_count\": \"gauge\", \"metrics_api_create_function_count\": \"gauge\", \"metrics_api_getmetaconf_count\": \"gauge\", \"metrics_api_alter_table_with_environment_context_count\": \"gauge\", \"metrics_api_delete_column_statistics_by_table_count\": \"gauge\", \"metrics_api_get_functions_count\": \"gauge\", \"metrics_api_get_function_count\": \"gauge\", \"metrics_api_shutdown_count\": \"gauge\", \"metrics_api_flushcache_count\": \"gauge\", \"metrics_api_get_indexes_count\": \"gauge\", \"metrics_api_get_config_value_count\": \"gauge\", \"metrics_api_set_ugi_count\": \"gauge\", \"metrics_api_get_all_functions_count\": \"gauge\", \"metrics_api_get_table_req_95thpercentile\": \"gauge\", \"metrics_api_get_table_req_50thpercentile\": \"gauge\", \"metrics_api_get_table_req_count\": \"gauge\", \"metrics_api_get_table_req_max\": \"gauge\", \"metrics_api_get_databases_count\": \"gauge\", \"metrics_api_get_databases_95thpercentile\": \"gauge\", \"metrics_api_get_databases_50thpercentile\": \"gauge\", \"metrics_api_get_databases_max\": \"gauge\", \"metrics_api_get_partitions_95thpercentile\": \"gauge\", \"metrics_api_get_partitions_50thpercentile\": \"gauge\", \"metrics_api_get_partitions_count\": \"gauge\", \"metrics_api_get_partitions_max\": \"gauge\", \"metrics_api_get_partitions_50thpercentile\": \"gauge\", \"metrics_api_get_table_req_50thpercentile\": \"gauge\",\"metrics_api_get_database_95thpercentile\": \"gauge\",\"metrics_api_get_database_50thpercentile\": \"gauge\", \"metrics_kafka_listener_failures_count\": \"gauge\", \"metrics_kafka_listener_successes_count\": \"gauge\", \"metrics_api_get_table_objects_by_name_req_max\": \"gauge\"} }]", + "com.datadoghq.ad.instances": "[{ \"prometheus_url\": \"http://%%host%%:8080/actuator/prometheus\", \"namespace\": \"${hms_metrics_namespace}\", \"metrics\": [\"${hms_metrics}\"],\"type_overrides\":{\"${hms_metrics_type_overrides}\": \"gauge\"} }]", "com.datadoghq.ad.check_names": "[\"prometheus\"]", "com.datadoghq.ad.init_configs": "[{}]" }, diff --git a/templates/apiary-hms-readwrite.json b/templates/apiary-hms-readwrite.json index 029369b..64220d2 100644 --- a/templates/apiary-hms-readwrite.json +++ b/templates/apiary-hms-readwrite.json @@ -75,7 +75,7 @@ } }, "dockerLabels": { - "com.datadoghq.ad.instances": "[{ \"prometheus_url\": \"http://%%host%%:8080/actuator/prometheus\", \"namespace\": \"hms_readwrite_legacy\", \"metrics\": [\"metrics_classloading_loaded_value\", \"metrics_threads_count_value\", \"metrics_memory_heap_max_value\", \"metrics_init_total_count_tables_value\", \"metrics_init_total_count_dbs_value\", \"metrics_memory_heap_used_value\", \"metrics_init_total_count_partitions_value\", \"jvm_threads_current\", \"jvm_threads_started_total\", \"jvm_memory_bytes_used\", \"jvm_memory_bytes_init\", \"jvm_gc_collection_seconds_count\", \"jvm_gc_collection_seconds\", \"process_cpu_seconds_total\", \"java_lang_operatingsystem_processcpuload\", \"java_lang_operatingsystem_processcputime\", \"metrics_threads_runnable_count_value\", \"metrics_threads_waiting_count_value\", \"java_lang_memory_heapmemoryusage_used\", \"metrics_memory_heap_init_value\", \"metrics_api_get_partition_by_name_count\", \"metrics_api_get_partitions_by_names_count\", \"metrics_api_get_partition_names_count\", \"metrics_api_get_partitions_by_expr_count\", \"metrics_api_get_partitions_count\", \"metrics_api_get_partition_count\", \"metrics_api_get_partitions_by_filter_count\", \"metrics_api_add_partitions_count\", \"metrics_api_add_partitions_req_count\", \"metrics_api_drop_partition_by_name_count\", \"metrics_api_add_partition_count\", \"metrics_api_alter_partitions_count\", \"metrics_api_create_table_count\", \"metrics_api_alter_table_with_cascade_count\", \"metrics_api_get_table_meta_count\", \"metrics_api_get_table_metas_count\", \"metrics_api_get_table_count\", \"metrics_api_alter_table_count\", \"metrics_api_get_tables_count\", \"metrics_api_get_all_tables_count\", \"metrics_api_drop_table_count\", \"metrics_api_get_multi_table_count\", \"metrics_api_get_database_count\", \"metrics_api_get_all_databases_count\", \"metrics_api_get_databases_count\", \"metrics_api_create_function_count\", \"metrics_api_getmetaconf_count\", \"metrics_api_alter_table_with_environment_context_count\", \"metrics_api_delete_column_statistics_by_table_count\", \"metrics_api_get_functions_count\", \"metrics_api_get_function_count\", \"metrics_api_shutdown_count\", \"metrics_api_flushcache_count\", \"metrics_api_get_indexes_count\", \"metrics_api_get_config_value_count\", \"metrics_api_set_ugi_count\", \"metrics_api_get_all_functions_count\", \"metrics_api_get_table_req_95thpercentile\", \"metrics_api_get_table_req_50thpercentile\", \"metrics_api_get_table_req_count\", \"metrics_api_get_table_req_max\", \"metrics_api_get_databases_count\", \"metrics_api_get_databases_95thpercentile\", \"metrics_api_get_databases_50thpercentile\", \"metrics_api_get_databases_max\", \"metrics_api_get_partitions_95thpercentile\", \"metrics_api_get_partitions_50thpercentile\", \"metrics_api_get_partitions_count\", \"metrics_api_get_partitions_max\", \"metrics_api_get_partitions_50thpercentile\", \"metrics_api_get_table_req_50thpercentile\", \"metrics_api_get_database_95thpercentile\", \"metrics_api_get_database_50thpercentile\", \"metrics_kafka_listener_failures_count\", \"metrics_kafka_listener_successes_count\", \"metrics_api_get_table_objects_by_name_req_max\" ], \"type_overrides\": { \"metrics_classloading_loaded_value\": \"gauge\", \"metrics_threads_count_value\": \"gauge\", \"metrics_memory_heap_max_value\": \"gauge\", \"metrics_init_total_count_tables_value\": \"gauge\", \"metrics_init_total_count_dbs_value\": \"gauge\", \"metrics_memory_heap_used_value\": \"gauge\", \"metrics_init_total_count_partitions_value\": \"gauge\", \"jvm_threads_current\": \"gauge\", \"jvm_threads_started_total\": \"gauge\", \"jvm_memory_bytes_used\": \"gauge\", \"jvm_memory_bytes_init\": \"gauge\", \"jvm_gc_collection_seconds_count\": \"gauge\", \"jvm_gc_collection_seconds\": \"gauge\", \"process_cpu_seconds_total\": \"gauge\", \"java_lang_operatingsystem_processcpuload\": \"gauge\", \"java_lang_operatingsystem_processcputime\": \"gauge\", \"metrics_threads_runnable_count_value\": \"gauge\", \"metrics_threads_waiting_count_value\": \"gauge\", \"java_lang_memory_heapmemoryusage_used\": \"gauge\", \"metrics_memory_heap_init_value\": \"gauge\", \"metrics_api_get_partition_by_name_count\": \"gauge\", \"metrics_api_get_partitions_by_names_count\": \"gauge\", \"metrics_api_get_partition_names_count\": \"gauge\", \"metrics_api_get_partitions_by_expr_count\": \"gauge\", \"metrics_api_get_partitions_count\": \"gauge\", \"metrics_api_get_partition_count\": \"gauge\", \"metrics_api_get_partitions_by_filter_count\": \"gauge\", \"metrics_api_add_partitions_count\": \"gauge\", \"metrics_api_add_partitions_req_count\": \"gauge\", \"metrics_api_drop_partition_by_name_count\": \"gauge\", \"metrics_api_add_partition_count\": \"gauge\", \"metrics_api_alter_partitions_count\": \"gauge\", \"metrics_api_create_table_count\": \"gauge\", \"metrics_api_alter_table_with_cascade_count\": \"gauge\", \"metrics_api_get_table_meta_count\": \"gauge\", \"metrics_api_get_table_metas_count\": \"gauge\", \"metrics_api_get_table_count\": \"gauge\", \"metrics_api_alter_table_count\": \"gauge\", \"metrics_api_get_tables_count\": \"gauge\", \"metrics_api_get_all_tables_count\": \"gauge\", \"metrics_api_drop_table_count\": \"gauge\", \"metrics_api_get_multi_table_count\": \"gauge\", \"metrics_api_get_database_count\": \"gauge\", \"metrics_api_get_all_databases_count\": \"gauge\", \"metrics_api_get_databases_count\": \"gauge\", \"metrics_api_create_function_count\": \"gauge\", \"metrics_api_getmetaconf_count\": \"gauge\", \"metrics_api_alter_table_with_environment_context_count\": \"gauge\", \"metrics_api_delete_column_statistics_by_table_count\": \"gauge\", \"metrics_api_get_functions_count\": \"gauge\", \"metrics_api_get_function_count\": \"gauge\", \"metrics_api_shutdown_count\": \"gauge\", \"metrics_api_flushcache_count\": \"gauge\", \"metrics_api_get_indexes_count\": \"gauge\", \"metrics_api_get_config_value_count\": \"gauge\", \"metrics_api_set_ugi_count\": \"gauge\", \"metrics_api_get_all_functions_count\": \"gauge\", \"metrics_api_get_table_req_95thpercentile\": \"gauge\", \"metrics_api_get_table_req_50thpercentile\": \"gauge\", \"metrics_api_get_table_req_count\": \"gauge\", \"metrics_api_get_table_req_max\": \"gauge\", \"metrics_api_get_databases_count\": \"gauge\", \"metrics_api_get_databases_95thpercentile\": \"gauge\", \"metrics_api_get_databases_50thpercentile\": \"gauge\", \"metrics_api_get_databases_max\": \"gauge\", \"metrics_api_get_partitions_95thpercentile\": \"gauge\", \"metrics_api_get_partitions_50thpercentile\": \"gauge\", \"metrics_api_get_partitions_count\": \"gauge\", \"metrics_api_get_partitions_max\": \"gauge\", \"metrics_api_get_partitions_50thpercentile\": \"gauge\", \"metrics_api_get_table_req_50thpercentile\": \"gauge\",\"metrics_api_get_database_95thpercentile\": \"gauge\",\"metrics_api_get_database_50thpercentile\": \"gauge\", \"metrics_kafka_listener_failures_count\": \"gauge\", \"metrics_kafka_listener_successes_count\": \"gauge\", \"metrics_api_get_table_objects_by_name_req_max\": \"gauge\"} }]", + "com.datadoghq.ad.instances": "[{ \"prometheus_url\": \"http://%%host%%:8080/actuator/prometheus\", \"namespace\": \"${hms_metrics_namespace}\", \"metrics\": [\"${hms_metrics}\"],\"type_overrides\":{\"${hms_metrics_type_overrides}\": \"gauge\"} }]", "com.datadoghq.ad.check_names": "[\"prometheus\"]", "com.datadoghq.ad.init_configs": "[{}]" }, diff --git a/variables.tf b/variables.tf index 0326e0c..9151244 100644 --- a/variables.tf +++ b/variables.tf @@ -452,6 +452,30 @@ variable "hms_rw_k8s_pdb_settings" { } } +variable "hms_ecs_metrics_readwrite_namespace" { + description = "ECS readwrite metrics namespace" + type = string + default = "hms_readwrite_legacy" +} + +variable "hms_ecs_metrics_readonly_namespace" { + description = "ECS readonly metrics namespace" + type = string + default = "hms_readonly_legacy" +} + +variable "hms_k8s_metrics_readwrite_namespace" { + description = "K8s readwrite metrics namespace" + type = string + default = "hms_readwrite" +} + +variable "hms_k8s_metrics_readonly_namespace" { + description = "K8s readonly metrics namespace" + type = string + default = "hms_readonly" +} + variable "hms_rw_node_affinity" { description = <