diff --git a/CHANGELOG.md b/CHANGELOG.md index 130eb5d..cf71e81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [6.11.0] - 2021-10-28 +### Added +- `liveness_probe` and `readiness_probe` for HMS readwrite and HMS readonly. + ## [6.10.6] - 2021-10-26 ### Added - Add `restrict_public_buckets = true` to s3 bucket public access settings diff --git a/VARIABLES.md b/VARIABLES.md index bdba6db..cac6cb1 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -3,91 +3,98 @@ ## Inputs | Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| apiary_assume_roles | List of maps - each map describes an IAM role that can be assumed in this account to write data into the configured list of schemas. See section [`apiary_assume_roles`](#apiary_assume_roles) for more info. | list(map) | - | no | -| apiary_customer_accounts | AWS account IDs for clients of this Metastore. | list | - | no | -| apiary_customer_condition | IAM policy condition applied to customer account s3 access. | string | `` | no | -| apiary_database_name | Database name to create in RDS for Apiary. | string | `apiary` | no | -| apiary_deny_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | list | - | no | -| apiary_deny_iamrole_actions | List of S3 actions that `apiary_deny_iamroles` are not allowed to perform. | list | All actions except `Get*`, but not including `GetObject` | no | -| apiary_domain_name | Apiary domain name for Route 53. | string | `` | no | -| apiary_log_bucket | Bucket for Apiary logs. | string | - | yes | -| apiary_log_prefix | Prefix for Apiary logs. | string | `` | no | -| apiary_managed_schemas | List of maps - each map entry describes an Apiary schema, along with S3 storage properties for the schema. See section [`apiary_managed_schemas`](#apiary_managed_schemas) for more info. | list(map) | - | no | -| apiary_consumer_iamroles | AWS IAM roles allowed read access to managed Apiary S3 buckets. | map | `` | no | -| apiary_producer_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | map | `` | no | -| apiary_rds_additional_sg | Comma-separated string containing additional security groups to attach to RDS. | list | `` | no | -| apiary_shared_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | list | `` | no | -| apiary_tags | Common tags that get put on all resources. | map | - | yes | -| atlas_kafka_bootstrap_servers | Atlas kafka bootstrap servers. | string | `` | no | -| atlas_cluster_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | string | `` | no | -| aws_region | AWS region. | string | - | yes | -| dashboard_namespace | K8s namespace to deploy grafana dashboard as configmap. | string | `monitoring` | no | -| db_apply_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | bool | `false` | no | -| db_backup_retention | The number of days to retain backups for the RDS Metastore DB. | string | - | yes | -| db_backup_window | Preferred backup window for the RDS Metastore DB in UTC. | string | `02:00-03:00` | no | -| db_instance_class | Instance type for the RDS Metastore DB. | string | - | yes | -| db_instance_count | Desired count of database cluster instances. | string | `2` | no | -| db_maintenance_window | Preferred maintenance window for the RDS Metastore DB in UTC. | string | `wed:03:00-wed:04:00` | no | -| db_master_username | Aurora cluster MySQL master user name. | string | `apiary` | no | -| db_ro_secret_name | Aurora cluster MySQL read-only user SecretsManger secret name. | string | `` | no | -| db_rw_secret_name | Aurora cluster MySQL read/write user SecretsManager secret name. | string | `` | no | -| docker_registry_auth_secret_name | Docker Registry authentication SecretManager secret name. | string | `` | no | -| ecs_domain_extension | Domain name to use for hosted zone created by ECS service discovery. | string | `lcl` | no | -| elb_timeout | Idle timeout for Apiary ELB. | string | `1800` | no | -| enable_data_events | Enable managed buckets S3 event notifications. | bool | `false` | no | -| enable_gluesync | Enable metadata sync from Hive to the Glue catalog. | bool | `false` | no | -| enable_hive_metastore_metrics | Enable sending Hive Metastore metrics to CloudWatch. | bool | `false` | no | -| enable_metadata_events | Enable Hive Metastore SNS listener. | bool | `false` | no | -| enable_s3_paid_metrics | Enable managed S3 buckets request and data transfer metrics. | bool | `false` | no | -| enable\_vpc\_endpoint\_services | Enable metastore NLB,Route53 entries VPC access and VPC endpoint services,for cross-account access. | `bool` | `true` | no | -| encrypt\_db | Specifies whether the RDS cluster is encrypted. | `bool` | `false` | no | -| external_data_buckets | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access. | list | `` | no | -| external_database_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | string | `` | no | -| hms_docker_image | Docker image ID for the Hive Metastore. | string | - | yes | -| hms_docker_version | Version of the Docker image for the Hive Metastore. | string | - | yes | -| hms_instance_type | Hive Metastore instance type, possible values ecs, k8s. | string | ecs | no | -| hms_log_level | Log level for the Hive Metastore. | string | `INFO` | no | -| hms_nofile_ulimit | Ulimit for the Hive Metastore container. | string | `32768` | no | -| hms_ro_cpu | CPU for the read only Hive Metastore ECS task. Valid values can be 256, 512, 1024, 2048 and 4096. Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | string | `512` | no | -| hms_ro_ecs_task_count | Desired ECS task count of the read only Hive Metastore service. | string | `3` | no | -| hms_ro_heapsize | Heapsize for the read only Hive Metastore. Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | string | - | yes | -| hms_rw_cpu | CPU for the read/write Hive Metastore ECS task. Valid values can be 256, 512, 1024, 2048 and 4096. Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | string | `512` | no | -| hms_rw_ecs_task_count | Desired ECS task count of the read/write Hive Metastore service. | string | `3` | no | -| hms_rw_heapsize | Heapsize for the read/write Hive Metastore. Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | string | - | yes | -| iam_name_root | Name to identify Hive Metastore IAM roles. | string | `hms` | no | -| ingress_cidr | Generally allowed ingress CIDR list. | list | - | yes | -| rw_ingress_cidr | Read-Write metastore ingress CIDR list. | list | `var.ingress_cidr` | no | -| instance_name | Apiary instance name to identify resources in multi-instance deployments. | string | `` | no | -| k8s_docker_registry_secret| Docker Registry authentication K8s secret name. | string | `` | no | -| kiam_arn | Kiam server IAM role ARN. | string | `` | no | -| ldap_base | Active directory LDAP base DN to search users and groups. | string | `` | no | -| ldap_ca_cert | Base64 encoded Certificate Authority bundle to validate LDAPS connections. | string | `` | no | -| ldap_secret_name | Active directory LDAP bind DN SecretsManager secret name. | string | `` | no | -| ldap_url | Active directory LDAP URL to configure Hadoop LDAP group mapping. | string | `` | no | -| metastore_namespace | K8s namespace to deploy Hive metastore containers. | string | `metastore` | no | -| oidc_provider | EKS cluster OIDC provider name, required for configuring IAM using IRSA. | string | `` | no | -| private_subnets | Private subnets. | list | - | yes | -| ranger_audit_db_url | Ranger DB audit provider configuration. | string | `` | no | -| ranger_audit_secret_name | Ranger DB audit secret name. | string | `` | no | -| ranger_audit_solr_url | Ranger Solr audit provider configuration. | string | `` | no | -| ranger_policy_manager_url | Ranger admin URL to synchronize policies. | string | `` | no | -| rds_max_allowed_packet | RDS/MySQL setting for parameter 'max_allowed_packet' in bytes. | number | `134217728` | no | -| s3_enable_inventory | Enable S3 inventory configuration. | bool | `false` | no | -| s3_inventory_customer_accounts | AWS account IDs allowed to access s3 inventory database. | list | - | no | -| s3_inventory_format | Output format for S3 inventory results. Can be Parquet, ORC, CSV | string | `ORC` | no | -| s3_inventory_update_schedule | Cron schedule to update S3 inventory tables (if enabled). Defaults to every 12 hours. | string | `0 */12 * * *` | no | -| s3_lifecycle_policy_transition_period | Number of days for transition to a different storage class using lifecycle policy. | string | `30` | no | -| s3_lifecycle_abort_incomplete_multipart_upload_days | Number of days after which incomplete multipart uploads will be deleted. | string | `7` | no | +|------|-------------|------|---------|:--------:| +| apiary\_assume\_roles | Cross account AWS IAM roles allowed write access to managed Apiary S3 buckets using assume policy. | `list(any)` | `[]` | no | +| apiary\_consumer\_iamroles | AWS IAM roles allowed read access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| apiary\_customer\_accounts | AWS account IDs for clients of this Metastore. | `list(string)` | `[]` | no | +| apiary\_customer\_condition | IAM policy condition applied to customer account s3 object access. | `string` | `""` | no | +| apiary\_database\_name | Database name to create in RDS for Apiary. | `string` | `"apiary"` | no | +| apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | +| apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | +| apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | +| apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | +| apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | +| apiary\_managed\_schemas | List of maps, each map contains schema name from which S3 bucket names will be derived, and various properties. The corresponding S3 bucket will be named as apiary\_instance-aws\_account-aws\_region-schema\_name. | `list(map(string))` | `[]` | no | +| apiary\_producer\_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | `map(any)` | `{}` | no | +| apiary\_rds\_additional\_sg | Comma-separated string containing additional security groups to attach to RDS. | `list(any)` | `[]` | no | +| apiary\_shared\_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | `list(any)` | `[]` | no | +| apiary\_tags | Common tags that get put on all resources. | `map(any)` | n/a | yes | +| atlas\_cluster\_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | `string` | `""` | no | +| atlas\_kafka\_bootstrap\_servers | Kafka instance url. | `string` | `""` | no | +| aws\_region | AWS region. | `string` | n/a | yes | +| dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | +| db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | +| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | +| db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | +| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | n/a | yes | +| db\_instance\_count | Desired count of database cluster instances. | `string` | `"2"` | no | +| db\_maintenance\_window | Preferred maintenance window for the RDS Metastore DB in UTC. | `string` | `"wed:03:00-wed:04:00"` | no | +| db\_master\_username | Aurora cluster MySQL master user name. | `string` | `"apiary"` | no | +| db\_ro\_secret\_name | Aurora cluster MySQL read-only user SecretsManger secret name. | `string` | `""` | no | +| db\_rw\_secret\_name | Aurora cluster MySQL read/write user SecretsManager secret name. | `string` | `""` | no | +| disallow\_incompatible\_col\_type\_changes | Hive metastore setting to disallow validation when incompatible schema type changes. | `bool` | `true` | no | +| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | `string` | `""` | no | +| ecs\_domain\_extension | Domain name to use for hosted zone created by ECS service discovery. | `string` | `"lcl"` | no | +| elb\_timeout | Idle timeout for Apiary ELB. | `string` | `"1800"` | no | +| enable\_apiary\_s3\_log\_hive | Create hive database to archive s3 logs in parquet format.Only applicable when module manages logs S3 bucket. | `bool` | `true` | no | +| enable\_data\_events | Enable managed buckets S3 event notifications. | `bool` | `false` | no | +| enable\_gluesync | Enable metadata sync from Hive to the Glue catalog. | `bool` | `false` | no | +| enable\_hive\_metastore\_metrics | Enable sending Hive Metastore metrics to CloudWatch. | `bool` | `false` | no | +| enable\_metadata\_events | Enable Hive Metastore SNS listener. | `bool` | `false` | no | +| enable\_s3\_paid\_metrics | Enable managed S3 buckets request and data transfer metrics. | `bool` | `false` | no | +| enable\_vpc\_endpoint\_services | Enable metastore NLB, Route53 entries VPC access and VPC endpoint services, for cross-account access. | `bool` | `true` | no | +| encrypt\_db | Specifies whether the DB cluster is encrypted | `bool` | `false` | no | +| external\_data\_buckets | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access. | `list(any)` | `[]` | no | +| external\_database\_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | `string` | `""` | no | +| hive\_metastore\_port | Port on which both Hive Metastore readwrite and readonly will run. | `number` | `9083` | no | +| hms\_docker\_image | Docker image ID for the Hive Metastore. | `string` | n/a | yes | +| hms\_docker\_version | Version of the Docker image for the Hive Metastore. | `string` | n/a | yes | +| hms\_instance\_type | Hive Metastore instance type, possible values: ecs,k8s. | `string` | `"ecs"` | no | +| hms\_log\_level | Log level for the Hive Metastore. | `string` | `"INFO"` | no | +| hms\_nofile\_ulimit | Ulimit for the Hive Metastore container. | `string` | `"32768"` | no | +| hms\_ro\_cpu | CPU for the read only Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | +| hms\_ro\_ecs\_task\_count | Desired ECS task count of the read only Hive Metastore service. | `string` | `"3"` | no | +| hms\_ro\_heapsize | Heapsize for the read only Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | +| hms\_rw\_cpu | CPU for the read/write Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | +| hms\_rw\_ecs\_task\_count | Desired ECS task count of the read/write Hive Metastore service. | `string` | `"3"` | no | +| hms\_rw\_heapsize | Heapsize for the read/write Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | +| iam\_name\_root | Name to identify Hive Metastore IAM roles. | `string` | `"hms"` | no | +| ingress\_cidr | Generally allowed ingress CIDR list. | `list(string)` | n/a | yes | +| instance\_name | Apiary instance name to identify resources in multi-instance deployments. | `string` | `""` | no | +| k8s\_docker\_registry\_secret | Docker Registry authentication K8s secret name. | `string` | `""` | no | +| kafka\_bootstrap\_servers | Kafka bootstrap servers to send metastore events, setting this enables Hive Metastore Kafka listener. | `string` | `""` | no | +| kafka\_topic\_name | Kafka topic to send metastore events. | `string` | `""` | no | +| kiam\_arn | Kiam server IAM role ARN. | `string` | `""` | no | +| ldap\_base | Active directory LDAP base DN to search users and groups. | `string` | `""` | no | +| ldap\_ca\_cert | Base64 encoded Certificate Authority bundle to validate LDAPS connections. | `string` | `""` | no | +| ldap\_secret\_name | Active directory LDAP bind DN SecretsManager secret name. | `string` | `""` | no | +| ldap\_url | Active directory LDAP URL to configure Hadoop LDAP group mapping. | `string` | `""` | no | +| metastore\_namespace | k8s namespace to deploy metastore containers. | `string` | `"metastore"` | no | +| oidc\_provider | EKS cluster OIDC provider name, required for configuring IAM using IRSA. | `string` | `""` | no | +| private\_subnets | Private subnets. | `list(any)` | n/a | yes | +| ranger\_audit\_db\_url | Ranger DB audit provider configuration. | `string` | `""` | no | +| ranger\_audit\_secret\_name | Ranger DB audit secret name. | `string` | `""` | no | +| ranger\_audit\_solr\_url | Ranger Solr audit provider configuration. | `string` | `""` | no | +| ranger\_policy\_manager\_url | Ranger admin URL to synchronize policies. | `string` | `""` | no | +| rds\_max\_allowed\_packet | RDS/MySQL setting for parameter 'max\_allowed\_packet' in bytes. Default is 128MB (Note that MySQL default is 4MB). | `number` | `134217728` | no | +| rw\_ingress\_cidr | Read-Write metastore ingress CIDR list. If not set, defaults to `var.ingress_cidr`. | `list(string)` | `[]` | no | +| s3\_enable\_inventory | Enable S3 inventory configuration. | `bool` | `false` | no | +| s3\_inventory\_customer\_accounts | AWS account IDs allowed to access s3 inventory database. | `list(string)` | `[]` | no | +| s3\_inventory\_format | Output format for S3 inventory results. Can be Parquet, ORC, CSV | `string` | `"ORC"` | no | +| s3\_inventory\_update\_schedule | Cron schedule to update S3 inventory tables (if enabled). Defaults to every 12 hours. | `string` | `"0 */12 * * *"` | no | +| s3\_lifecycle\_abort\_incomplete\_multipart\_upload\_days | Number of days after which incomplete multipart uploads will be deleted. | `string` | `"7"` | no | +| s3\_lifecycle\_policy\_transition\_period | S3 Lifecycle Policy number of days for Transition rule | `string` | `"30"` | no | +| s3\_log\_expiry | Number of days after which Apiary S3 bucket logs expire. | `string` | `"365"` | no | | s3\_logs\_sqs\_delay\_seconds | The time in seconds that the delivery of all messages in the queue will be delayed. | `number` | `300` | no | | s3\_logs\_sqs\_message\_retention\_seconds | Time in seconds after which message will be deleted from the queue. | `number` | `345600` | no | | s3\_logs\_sqs\_receive\_wait\_time\_seconds | The time for which a ReceiveMessage call will wait for a message to arrive (long polling) before returning. | `number` | `10` | no | | s3\_logs\_sqs\_visibility\_timeout\_seconds | Time in seconds after which message will be returned to the queue if it is not deleted. | `number` | `3600` | no | -| s3_storage_class | Destination S3 storage class for transition in the lifecycle policy. | string | `INTELLIGENT_TIERING` | no | -| secondary_vpcs | List of VPCs to associate with Service Discovery namespace. | list | `` | no | -| system_schema_customer_accounts | AWS account IDs allowed to access system database. | list | - | no | -| table_param_filter | A regular expression for selecting necessary table parameters for the SNS listener. If the value isn't set, then no table parameters are selected. | string | `` | no | -| vpc_id | VPC ID. | string | - | yes | +| s3\_storage\_class | S3 storage class after transition using lifecycle policy | `string` | `"INTELLIGENT_TIERING"` | no | +| secondary\_vpcs | List of VPCs to associate with Service Discovery namespace. | `list(any)` | `[]` | no | +| system\_schema\_customer\_accounts | AWS account IDs allowed to access system database. | `list(string)` | `[]` | no | +| system\_schema\_name | Name for the internal system database | `string` | `"apiary_system"` | no | +| table\_param\_filter | A regular expression for selecting necessary table parameters for the SNS listener. If the value isn't set, then no table parameters are selected. | `string` | `""` | no | +| vpc\_id | VPC ID. | `string` | n/a | yes | ### apiary_assume_roles diff --git a/k8s-readonly.tf b/k8s-readonly.tf index 693da09..9260691 100644 --- a/k8s-readonly.tf +++ b/k8s-readonly.tf @@ -89,7 +89,7 @@ resource "kubernetes_deployment" "apiary_hms_readonly" { image = "${var.hms_docker_image}:${var.hms_docker_version}" name = "${local.hms_alias}-readonly" port { - container_port = 9083 + container_port = var.hive_metastore_port } env { name = "MYSQL_DB_HOST" @@ -172,6 +172,28 @@ resource "kubernetes_deployment" "apiary_hms_readonly" { value = local.hms_ro_maxthreads } + liveness_probe { + tcp_socket { + port = var.hive_metastore_port + } + timeout_seconds = 60 + failure_threshold = 3 + success_threshold = 1 + initial_delay_seconds = 60 + period_seconds = 20 + } + + readiness_probe { + tcp_socket { + port = var.hive_metastore_port + } + timeout_seconds = 60 + failure_threshold = 3 + success_threshold = 1 + initial_delay_seconds = 60 + period_seconds = 20 + } + resources { limits { memory = "${var.hms_ro_heapsize}Mi" diff --git a/k8s-readwrite.tf b/k8s-readwrite.tf index bebf3b8..3ed04a2 100644 --- a/k8s-readwrite.tf +++ b/k8s-readwrite.tf @@ -88,7 +88,7 @@ resource "kubernetes_deployment" "apiary_hms_readwrite" { image = "${var.hms_docker_image}:${var.hms_docker_version}" name = "${local.hms_alias}-readwrite" port { - container_port = 9083 + container_port = var.hive_metastore_port } env { name = "MYSQL_DB_HOST" @@ -212,6 +212,28 @@ resource "kubernetes_deployment" "apiary_hms_readwrite" { value = var.disallow_incompatible_col_type_changes } + liveness_probe { + tcp_socket { + port = var.hive_metastore_port + } + timeout_seconds = 60 + failure_threshold = 3 + success_threshold = 1 + initial_delay_seconds = 60 + period_seconds = 20 + } + + readiness_probe { + tcp_socket { + port = var.hive_metastore_port + } + timeout_seconds = 60 + failure_threshold = 3 + success_threshold = 1 + initial_delay_seconds = 60 + period_seconds = 20 + } + resources { limits { memory = "${var.hms_rw_heapsize}Mi" diff --git a/variables.tf b/variables.tf index ed36cf7..0472e6b 100644 --- a/variables.tf +++ b/variables.tf @@ -48,6 +48,12 @@ variable "aws_region" { type = string } +variable "hive_metastore_port" { + description = "Port on which both Hive Metastore readwrite and readonly will run." + type = number + default = 9083 +} + variable "apiary_log_bucket" { description = "Bucket for Apiary logs.If this is blank, module will create a bucket." type = string