From 2a758e3b383237c706c7f50b199f05db9e8a12ce Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Tue, 23 Jan 2024 15:42:09 +0000 Subject: [PATCH 01/14] Feature/datadog (#247) * Added datadog agent for BEX ECS * Added datadog agent for BEX ECS * Removed the health check * Added dockerLabel * Added changelog * Added the VARIABLES.md --------- Co-authored-by: Dhrubajyoti Sadhu --- CHANGELOG.md | 4 + VARIABLES.md | 227 ++++++++++++++-------------- cloudwatch.tf | 6 + common.tf | 20 +++ templates.tf | 14 ++ templates/apiary-hms-readonly.json | 34 ++++- templates/apiary-hms-readwrite.json | 30 ++++ variables.tf | 18 +++ 8 files changed, 239 insertions(+), 114 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8040db..e1da8d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.0.1] - 2024-01-22 +### Added +- Added `datadog-agent` for HMS-Readonly and HMS-Readwrite in ECS. + ## [7.0.0] - 2023-11-16 ### Changed - Changed `k8s` API to work with provider 2.x diff --git a/VARIABLES.md b/VARIABLES.md index 31241a9..dcdee63 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -2,119 +2,122 @@ ## Inputs -| Name | Description | Type | Default | Required | -|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:| -| apiary\_assume\_roles | Cross account AWS IAM roles allowed write access to managed Apiary S3 buckets using assume policy. | `list(any)` | `[]` | no | -| apiary\_consumer\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to all data in managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| Name | Description | Type | Default | Required | +|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:| +| apiary\_assume\_roles | Cross account AWS IAM roles allowed write access to managed Apiary S3 buckets using assume policy. | `list(any)` | `[]` | no | +| apiary\_consumer\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to all data in managed Apiary S3 buckets. | `list(string)` | `[]` | no | | apiary\_consumer\_prefix\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to certain prefixes in managed Apiary S3 buckets. See below section for more information and format. | `map(map(list(string)))` | `{}` | no | -| apiary\_customer\_accounts | AWS account IDs for clients of this Metastore. | `list(string)` | `[]` | no | -| apiary\_customer\_condition | IAM policy condition applied to customer account S3 object access. | `string` | `""` | no | -| apiary\_database\_name | Database name to create in RDS for Apiary. | `string` | `"apiary"` | no | -| apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | -| apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | -| apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | -| apiary\_governance\_iamroles | AWS IAM governance roles allowed read and tagging access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | -| apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | -| apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | -| apiary\_managed\_schemas | List of maps, each map contains schema name from which S3 bucket names will be derived, and various properties. The corresponding S3 bucket will be named as apiary\_instance-aws\_account-aws\_region-schema\_name. | `list(map(string))` | `[]` | no | -| apiary\_producer\_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | `map(any)` | `{}` | no | -| apiary\_rds\_additional\_sg | Comma-separated string containing additional security groups to attach to RDS. | `list(any)` | `[]` | no | -| apiary\_shared\_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | `list(any)` | `[]` | no | -| apiary\_tags | Common tags that get put on all resources. | `map(any)` | n/a | yes | -| atlas\_cluster\_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | `string` | `""` | no | -| atlas\_kafka\_bootstrap\_servers | Kafka instance url. | `string` | `""` | no | -| aws\_region | AWS region. | `string` | n/a | yes | -| dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | -| db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | -| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | -| db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | -| db\_enable\_performance\_insights | Enable RDS Performance Insights | `bool` | `false` | no | -| db\_enhanced\_monitoring\_interval | RDS monitoring interval (in seconds) for enhanced monitoring. Valid values are 0, 1, 5, 10, 15, 30, 60. Default is 0. | `number` | `0` | no | -| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | n/a | yes | -| db\_instance\_count | Desired count of database cluster instances. | `string` | `"2"` | no | -| db\_maintenance\_window | Preferred maintenance window for the RDS Metastore DB in UTC. | `string` | `"wed:03:00-wed:04:00"` | no | -| db\_master\_username | Aurora cluster MySQL master user name. | `string` | `"apiary"` | no | -| db\_ro\_secret\_name | Aurora cluster MySQL read-only user SecretsManger secret name. | `string` | `""` | no | -| db\_rw\_secret\_name | Aurora cluster MySQL read/write user SecretsManager secret name. | `string` | `""` | no | -| disallow\_incompatible\_col\_type\_changes | Hive metastore setting to disallow validation when incompatible schema type changes. | `bool` | `true` | no | -| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | `string` | `""` | no | -| ecs\_domain\_extension | Domain name to use for hosted zone created by ECS service discovery. | `string` | `"lcl"` | no | -| elb\_timeout | Idle timeout for Apiary ELB. | `string` | `"1800"` | no | -| enable\_apiary\_s3\_log\_hive | Create hive database to archive s3 logs in parquet format.Only applicable when module manages logs S3 bucket. | `bool` | `true` | no | -| enable\_autoscaling | Enable read only Hive Metastore k8s horizontal pod autoscaling. | `bool` | `true` | no | -| enable\_data\_events | Enable managed buckets S3 event notifications. | `bool` | `false` | no | -| enable\_gluesync | Enable metadata sync from Hive to the Glue catalog. | `bool` | `false` | no | -| enable\_hive\_metastore\_metrics | Enable sending Hive Metastore metrics to CloudWatch. | `bool` | `false` | no | -| enable\_metadata\_events | Enable Hive Metastore SNS listener. | `bool` | `false` | no | -| enable\_s3\_paid\_metrics | Enable managed S3 buckets request and data transfer metrics. | `bool` | `false` | no | -| enable\_vpc\_endpoint\_services | Enable metastore NLB, Route53 entries VPC access and VPC endpoint services, for cross-account access. | `bool` | `true` | no | -| encrypt\_db | Specifies whether the DB cluster is encrypted | `bool` | `false` | no | -| external\_data\_buckets | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access. | `list(any)` | `[]` | no | -| external\_database\_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | `string` | `""` | no | -| hive\_metastore\_port | Port on which both Hive Metastore readwrite and readonly will run. | `number` | `9083` | no | -| hms\_additional\_environment\_variables | Additional environment variables for the Hive Metastore. | `map(any)` | `{}` | no | -| hms\_autogather\_stats | Read-write Hive metastore setting to enable/disable statistics auto-gather on table/partition creation. | `bool` | `true` | no | -| hms\_docker\_image | Docker image ID for the Hive Metastore. | `string` | n/a | yes | -| hms\_docker\_version | Version of the Docker image for the Hive Metastore. | `string` | n/a | yes | -| hms\_instance\_type | Hive Metastore instance type, possible values: ecs,k8s. | `string` | `"ecs"` | no | -| hms\_log\_level | Log level for the Hive Metastore. | `string` | `"INFO"` | no | -| hms\_nofile\_ulimit | Ulimit for the Hive Metastore container. | `string` | `"32768"` | no | -| hms\_ro\_cpu | CPU for the read only Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | -| hms\_ro\_db\_connection\_pool\_size | Read-only Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | -| hms\_ro\_ecs\_task\_count | Desired ECS task count of the read only Hive Metastore service. | `string` | `"3"` | no | -| hms\_ro\_heapsize | Heapsize for the read only Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | -| hms\_ro\_k8s\_replica\_count | Initial Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| hms\_ro\_k8s\_max\_replica\_count | Max Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| hms\_ro\_target\_cpu\_percentage | Read only Hive Metastore autoscaling threshold for CPU target usage. | `number` | `"2048"` | no | -| hms\_ro\_request\_partition\_limit | Read only Hive Metastore limits of request partitions. | `string` | n/a | no | -| hms\_rw\_request\_partition\_limit | Read Write Hive Metastore limits of request partitions. | `string` | n/a | no | -| hms\_rw\_cpu | CPU for the read/write Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | -| hms\_rw\_db\_connection\_pool\_size | Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | -| hms\_rw\_ecs\_task\_count | Desired ECS task count of the read/write Hive Metastore service. | `string` | `"3"` | no | -| hms\_rw\_heapsize | Heapsize for the read/write Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | -| hms\_rw\_k8s\_replica\_count | Initial Number of read/write Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| iam\_name\_root | Name to identify Hive Metastore IAM roles. | `string` | `"hms"` | no | -| ingress\_cidr | Generally allowed ingress CIDR list. | `list(string)` | n/a | yes | -| instance\_name | Apiary instance name to identify resources in multi-instance deployments. | `string` | `""` | no | -| k8s\_docker\_registry\_secret | Docker Registry authentication K8s secret name. | `string` | `""` | no | -| kafka\_bootstrap\_servers | Kafka bootstrap servers to send metastore events, setting this enables Hive Metastore Kafka listener. | `string` | `""` | no | -| kafka\_topic\_name | Kafka topic to send metastore events. | `string` | `""` | no | -| kiam\_arn | Kiam server IAM role ARN. | `string` | `""` | no | -| ldap\_base | Active directory LDAP base DN to search users and groups. | `string` | `""` | no | -| ldap\_ca\_cert | Base64 encoded Certificate Authority bundle to validate LDAPS connections. | `string` | `""` | no | -| ldap\_secret\_name | Active directory LDAP bind DN SecretsManager secret name. | `string` | `""` | no | -| ldap\_url | Active directory LDAP URL to configure Hadoop LDAP group mapping. | `string` | `""` | no | -| metastore\_namespace | k8s namespace to deploy metastore containers. | `string` | `"metastore"` | no | -| oidc\_provider | EKS cluster OIDC provider name, required for configuring IAM using IRSA. | `string` | `""` | no | -| private\_subnets | Private subnets. | `list(any)` | n/a | yes | -| ranger\_audit\_db\_url | Ranger DB audit provider configuration. | `string` | `""` | no | -| ranger\_audit\_secret\_name | Ranger DB audit secret name. | `string` | `""` | no | -| ranger\_audit\_solr\_url | Ranger Solr audit provider configuration. | `string` | `""` | no | -| ranger\_policy\_manager\_url | Ranger admin URL to synchronize policies. | `string` | `""` | no | -| rds\_max\_allowed\_packet | RDS/MySQL setting for parameter 'max\_allowed\_packet' in bytes. Default is 128MB (Note that MySQL default is 4MB). | `number` | `134217728` | no | -| rw\_ingress\_cidr | Read-Write metastore ingress CIDR list. If not set, defaults to `var.ingress_cidr`. | `list(string)` | `[]` | no | -| s3\_enable\_inventory | Enable S3 inventory configuration. | `bool` | `false` | no | -| s3\_inventory\_customer\_accounts | AWS account IDs allowed to access s3 inventory database. | `list(string)` | `[]` | no | -| s3\_inventory\_format | Output format for S3 inventory results. Can be Parquet, ORC, CSV | `string` | `"ORC"` | no | -| s3\_inventory\_update\_schedule | Cron schedule to update S3 inventory tables (if enabled). Defaults to every 12 hours. | `string` | `"0 */12 * * *"` | no | -| s3\_lifecycle\_abort\_incomplete\_multipart\_upload\_days | Number of days after which incomplete multipart uploads will be deleted. | `string` | `"7"` | no | -| s3\_lifecycle\_policy\_transition\_period | S3 Lifecycle Policy number of days for Transition rule | `string` | `"30"` | no | -| s3\_log\_expiry | Number of days after which Apiary S3 bucket logs expire. | `string` | `"365"` | no | -| s3\_logs\_sqs\_delay\_seconds | The time in seconds that the delivery of all messages in the queue will be delayed. | `number` | `300` | no | -| s3\_logs\_sqs\_message\_retention\_seconds | Time in seconds after which message will be deleted from the queue. | `number` | `345600` | no | -| s3\_logs\_sqs\_receive\_wait\_time\_seconds | The time for which a ReceiveMessage call will wait for a message to arrive (long polling) before returning. | `number` | `10` | no | -| s3\_logs\_sqs\_visibility\_timeout\_seconds | Time in seconds after which message will be returned to the queue if it is not deleted. | `number` | `3600` | no | -| s3\_storage\_class | S3 storage class after transition using lifecycle policy | `string` | `"INTELLIGENT_TIERING"` | no | -| secondary\_vpcs | List of VPCs to associate with Service Discovery namespace. | `list(any)` | `[]` | no | -| system\_schema\_customer\_accounts | AWS account IDs allowed to access system database. | `list(string)` | `[]` | no | -| system\_schema\_name | Name for the internal system database | `string` | `"apiary_system"` | no | -| table\_param\_filter | A regular expression for selecting necessary table parameters for the SNS listener. If the value isn't set, then no table parameters are selected. | `string` | `""` | no | -| vpc\_id | VPC ID. | `string` | n/a | yes | -| enable\_dashboard | make EKS & ECS dashboard optional | `bool` | true | no | -| rds\_family | RDS Family | `string` | aurora5.6 | no | -| datadog_metrics_enabled | Enable Datadog metrics for HMS | `bool` | false | no | -| datadog_metrics_hms_readwrite_readonly | Prometheus Metrics sent to datadog | list(string) | ["metrics_classloading_loaded_value","metrics_threads_count_value","metrics_memory_heap_max_value","metrics_init_total_count_tables_value","metrics_init_total_count_dbs_value","metrics_memory_heap_used_value","metrics_init_total_count_partitions_value"] | no | -| datadog_metrics_port | Port in which metrics will be send for Datadog | string | 8080 | no | +| apiary\_customer\_accounts | AWS account IDs for clients of this Metastore. | `list(string)` | `[]` | no | +| apiary\_customer\_condition | IAM policy condition applied to customer account S3 object access. | `string` | `""` | no | +| apiary\_database\_name | Database name to create in RDS for Apiary. | `string` | `"apiary"` | no | +| apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | +| apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | +| apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | +| apiary\_governance\_iamroles | AWS IAM governance roles allowed read and tagging access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | +| apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | +| apiary\_managed\_schemas | List of maps, each map contains schema name from which S3 bucket names will be derived, and various properties. The corresponding S3 bucket will be named as apiary\_instance-aws\_account-aws\_region-schema\_name. | `list(map(string))` | `[]` | no | +| apiary\_producer\_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | `map(any)` | `{}` | no | +| apiary\_rds\_additional\_sg | Comma-separated string containing additional security groups to attach to RDS. | `list(any)` | `[]` | no | +| apiary\_shared\_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | `list(any)` | `[]` | no | +| apiary\_tags | Common tags that get put on all resources. | `map(any)` | n/a | yes | +| atlas\_cluster\_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | `string` | `""` | no | +| atlas\_kafka\_bootstrap\_servers | Kafka instance url. | `string` | `""` | no | +| aws\_region | AWS region. | `string` | n/a | yes | +| dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | +| db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | +| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | +| db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | +| db\_enable\_performance\_insights | Enable RDS Performance Insights | `bool` | `false` | no | +| db\_enhanced\_monitoring\_interval | RDS monitoring interval (in seconds) for enhanced monitoring. Valid values are 0, 1, 5, 10, 15, 30, 60. Default is 0. | `number` | `0` | no | +| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | n/a | yes | +| db\_instance\_count | Desired count of database cluster instances. | `string` | `"2"` | no | +| db\_maintenance\_window | Preferred maintenance window for the RDS Metastore DB in UTC. | `string` | `"wed:03:00-wed:04:00"` | no | +| db\_master\_username | Aurora cluster MySQL master user name. | `string` | `"apiary"` | no | +| db\_ro\_secret\_name | Aurora cluster MySQL read-only user SecretsManger secret name. | `string` | `""` | no | +| db\_rw\_secret\_name | Aurora cluster MySQL read/write user SecretsManager secret name. | `string` | `""` | no | +| disallow\_incompatible\_col\_type\_changes | Hive metastore setting to disallow validation when incompatible schema type changes. | `bool` | `true` | no | +| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | `string` | `""` | no | +| ecs\_domain\_extension | Domain name to use for hosted zone created by ECS service discovery. | `string` | `"lcl"` | no | +| elb\_timeout | Idle timeout for Apiary ELB. | `string` | `"1800"` | no | +| enable\_apiary\_s3\_log\_hive | Create hive database to archive s3 logs in parquet format.Only applicable when module manages logs S3 bucket. | `bool` | `true` | no | +| enable\_autoscaling | Enable read only Hive Metastore k8s horizontal pod autoscaling. | `bool` | `true` | no | +| enable\_data\_events | Enable managed buckets S3 event notifications. | `bool` | `false` | no | +| enable\_gluesync | Enable metadata sync from Hive to the Glue catalog. | `bool` | `false` | no | +| enable\_hive\_metastore\_metrics | Enable sending Hive Metastore metrics to CloudWatch. | `bool` | `false` | no | +| enable\_metadata\_events | Enable Hive Metastore SNS listener. | `bool` | `false` | no | +| enable\_s3\_paid\_metrics | Enable managed S3 buckets request and data transfer metrics. | `bool` | `false` | no | +| enable\_vpc\_endpoint\_services | Enable metastore NLB, Route53 entries VPC access and VPC endpoint services, for cross-account access. | `bool` | `true` | no | +| encrypt\_db | Specifies whether the DB cluster is encrypted | `bool` | `false` | no | +| external\_data\_buckets | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access. | `list(any)` | `[]` | no | +| external\_database\_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | `string` | `""` | no | +| hive\_metastore\_port | Port on which both Hive Metastore readwrite and readonly will run. | `number` | `9083` | no | +| hms\_additional\_environment\_variables | Additional environment variables for the Hive Metastore. | `map(any)` | `{}` | no | +| hms\_autogather\_stats | Read-write Hive metastore setting to enable/disable statistics auto-gather on table/partition creation. | `bool` | `true` | no | +| hms\_docker\_image | Docker image ID for the Hive Metastore. | `string` | n/a | yes | +| hms\_docker\_version | Version of the Docker image for the Hive Metastore. | `string` | n/a | yes | +| hms\_instance\_type | Hive Metastore instance type, possible values: ecs,k8s. | `string` | `"ecs"` | no | +| hms\_log\_level | Log level for the Hive Metastore. | `string` | `"INFO"` | no | +| hms\_nofile\_ulimit | Ulimit for the Hive Metastore container. | `string` | `"32768"` | no | +| hms\_ro\_cpu | CPU for the read only Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | +| hms\_ro\_db\_connection\_pool\_size | Read-only Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | +| hms\_ro\_ecs\_task\_count | Desired ECS task count of the read only Hive Metastore service. | `string` | `"3"` | no | +| hms\_ro\_heapsize | Heapsize for the read only Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | +| hms\_ro\_k8s\_replica\_count | Initial Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| hms\_ro\_k8s\_max\_replica\_count | Max Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| hms\_ro\_target\_cpu\_percentage | Read only Hive Metastore autoscaling threshold for CPU target usage. | `number` | `"2048"` | no | +| hms\_ro\_request\_partition\_limit | Read only Hive Metastore limits of request partitions. | `string` | n/a | no | +| hms\_rw\_request\_partition\_limit | Read Write Hive Metastore limits of request partitions. | `string` | n/a | no | +| hms\_rw\_cpu | CPU for the read/write Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | +| hms\_rw\_db\_connection\_pool\_size | Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | +| hms\_rw\_ecs\_task\_count | Desired ECS task count of the read/write Hive Metastore service. | `string` | `"3"` | no | +| hms\_rw\_heapsize | Heapsize for the read/write Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | +| hms\_rw\_k8s\_replica\_count | Initial Number of read/write Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| iam\_name\_root | Name to identify Hive Metastore IAM roles. | `string` | `"hms"` | no | +| ingress\_cidr | Generally allowed ingress CIDR list. | `list(string)` | n/a | yes | +| instance\_name | Apiary instance name to identify resources in multi-instance deployments. | `string` | `""` | no | +| k8s\_docker\_registry\_secret | Docker Registry authentication K8s secret name. | `string` | `""` | no | +| kafka\_bootstrap\_servers | Kafka bootstrap servers to send metastore events, setting this enables Hive Metastore Kafka listener. | `string` | `""` | no | +| kafka\_topic\_name | Kafka topic to send metastore events. | `string` | `""` | no | +| kiam\_arn | Kiam server IAM role ARN. | `string` | `""` | no | +| ldap\_base | Active directory LDAP base DN to search users and groups. | `string` | `""` | no | +| ldap\_ca\_cert | Base64 encoded Certificate Authority bundle to validate LDAPS connections. | `string` | `""` | no | +| ldap\_secret\_name | Active directory LDAP bind DN SecretsManager secret name. | `string` | `""` | no | +| ldap\_url | Active directory LDAP URL to configure Hadoop LDAP group mapping. | `string` | `""` | no | +| metastore\_namespace | k8s namespace to deploy metastore containers. | `string` | `"metastore"` | no | +| oidc\_provider | EKS cluster OIDC provider name, required for configuring IAM using IRSA. | `string` | `""` | no | +| private\_subnets | Private subnets. | `list(any)` | n/a | yes | +| ranger\_audit\_db\_url | Ranger DB audit provider configuration. | `string` | `""` | no | +| ranger\_audit\_secret\_name | Ranger DB audit secret name. | `string` | `""` | no | +| ranger\_audit\_solr\_url | Ranger Solr audit provider configuration. | `string` | `""` | no | +| ranger\_policy\_manager\_url | Ranger admin URL to synchronize policies. | `string` | `""` | no | +| rds\_max\_allowed\_packet | RDS/MySQL setting for parameter 'max\_allowed\_packet' in bytes. Default is 128MB (Note that MySQL default is 4MB). | `number` | `134217728` | no | +| rw\_ingress\_cidr | Read-Write metastore ingress CIDR list. If not set, defaults to `var.ingress_cidr`. | `list(string)` | `[]` | no | +| s3\_enable\_inventory | Enable S3 inventory configuration. | `bool` | `false` | no | +| s3\_inventory\_customer\_accounts | AWS account IDs allowed to access s3 inventory database. | `list(string)` | `[]` | no | +| s3\_inventory\_format | Output format for S3 inventory results. Can be Parquet, ORC, CSV | `string` | `"ORC"` | no | +| s3\_inventory\_update\_schedule | Cron schedule to update S3 inventory tables (if enabled). Defaults to every 12 hours. | `string` | `"0 */12 * * *"` | no | +| s3\_lifecycle\_abort\_incomplete\_multipart\_upload\_days | Number of days after which incomplete multipart uploads will be deleted. | `string` | `"7"` | no | +| s3\_lifecycle\_policy\_transition\_period | S3 Lifecycle Policy number of days for Transition rule | `string` | `"30"` | no | +| s3\_log\_expiry | Number of days after which Apiary S3 bucket logs expire. | `string` | `"365"` | no | +| s3\_logs\_sqs\_delay\_seconds | The time in seconds that the delivery of all messages in the queue will be delayed. | `number` | `300` | no | +| s3\_logs\_sqs\_message\_retention\_seconds | Time in seconds after which message will be deleted from the queue. | `number` | `345600` | no | +| s3\_logs\_sqs\_receive\_wait\_time\_seconds | The time for which a ReceiveMessage call will wait for a message to arrive (long polling) before returning. | `number` | `10` | no | +| s3\_logs\_sqs\_visibility\_timeout\_seconds | Time in seconds after which message will be returned to the queue if it is not deleted. | `number` | `3600` | no | +| s3\_storage\_class | S3 storage class after transition using lifecycle policy | `string` | `"INTELLIGENT_TIERING"` | no | +| secondary\_vpcs | List of VPCs to associate with Service Discovery namespace. | `list(any)` | `[]` | no | +| system\_schema\_customer\_accounts | AWS account IDs allowed to access system database. | `list(string)` | `[]` | no | +| system\_schema\_name | Name for the internal system database | `string` | `"apiary_system"` | no | +| table\_param\_filter | A regular expression for selecting necessary table parameters for the SNS listener. If the value isn't set, then no table parameters are selected. | `string` | `""` | no | +| vpc\_id | VPC ID. | `string` | n/a | yes | +| enable\_dashboard | make EKS & ECS dashboard optional | `bool` | true | no | +| rds\_family | RDS Family | `string` | aurora5.6 | no | +| datadog_metrics_enabled | Enable Datadog metrics for HMS | `bool` | false | no | +| datadog_metrics_hms_readwrite_readonly | Prometheus Metrics sent to datadog | list(string) | ["metrics_classloading_loaded_value","metrics_threads_count_value","metrics_memory_heap_max_value","metrics_init_total_count_tables_value","metrics_init_total_count_dbs_value","metrics_memory_heap_used_value","metrics_init_total_count_partitions_value"] | no | +| datadog_metrics_port | Port in which metrics will be send for Datadog | string | 8080 | no | +| datadog_key_secret_name | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments. | string | null | no | +| datadog_agent_version | Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments. | string | 7.50.3-jmx | no | +| datadog_agent_enabled | Whether to include the datadog-agent container. This is only applicable to ECS deployments. | string | false | no | ### apiary_assume_roles diff --git a/cloudwatch.tf b/cloudwatch.tf index ff7d224..c96882c 100644 --- a/cloudwatch.tf +++ b/cloudwatch.tf @@ -4,6 +4,12 @@ * Licensed under the Apache License, Version 2.0 (the "License"); */ +resource "aws_cloudwatch_log_group" "ecs" { + count = var.hms_instance_type == "ecs" ? 1 : 0 + name = local.instance_alias + tags = var.apiary_tags +} + data "template_file" "s3_widgets" { count = length(local.schemas_info) diff --git a/common.tf b/common.tf index 63e22d7..3c6f4e6 100644 --- a/common.tf +++ b/common.tf @@ -78,3 +78,23 @@ data "aws_route53_zone" "apiary_zone" { name = var.apiary_domain_name vpc_id = var.vpc_id } + +data "aws_secretsmanager_secret" "datadog_key" { + count = length(var.datadog_key_secret_name) > 0 ? 1 : 0 + name = var.datadog_key_secret_name +} + +data "aws_secretsmanager_secret_version" "datadog_key" { + count = length(var.datadog_key_secret_name) > 0 ? 1 : 0 + secret_id = data.aws_secretsmanager_secret.datadog_key[0].id +} + +data "external" "datadog_key" { + count = length(var.datadog_key_secret_name) > 0 ? 1 : 0 + program = ["echo", "${data.aws_secretsmanager_secret_version.datadog_key[0].secret_string}"] +} + +provider "datadog" { + api_key = chomp(data.external.datadog_key[0].result["api_key"]) + app_key = chomp(data.external.datadog_key[0].result["app_key"]) +} diff --git a/templates.tf b/templates.tf index 985706e..f4b5941 100644 --- a/templates.tf +++ b/templates.tf @@ -61,6 +61,13 @@ locals{ mysql_permissions = "ALL" mysql_master_cred_arn = var.external_database_host == "" ? aws_secretsmanager_secret.apiary_mysql_master_credentials[0].arn : null mysql_user_cred_arn = data.aws_secretsmanager_secret.db_rw_user.arn + + # Datadog variables + datadog_secret_key = length(var.datadog_key_secret_name) > 0 ? chomp(data.external.datadog_key[0].result["api_key"]) : "" + wd_instance_type = var.hms_instance_type + metrics_port = var.datadog_metrics_port + datadog_agent_version = var.datadog_agent_version + datadog_agent_enabled = var.datadog_agent_enabled }) hms_readonly_template = templatefile("${path.module}/templates/apiary-hms-readonly.json", { @@ -104,5 +111,12 @@ locals{ mysql_write_db = "${var.external_database_host == "" ? join("", aws_rds_cluster.apiary_cluster.*.endpoint) : var.external_database_host}" mysql_master_cred_arn = var.external_database_host == "" ? aws_secretsmanager_secret.apiary_mysql_master_credentials[0].arn : null mysql_user_cred_arn = data.aws_secretsmanager_secret.db_ro_user.arn + + # Datadog variables + datadog_agent_enabled = var.datadog_agent_enabled + datadog_secret_key = length(var.datadog_key_secret_name) > 0 ? chomp(data.external.datadog_key[0].result["api_key"]) : "" + wd_instance_type = var.hms_instance_type + metrics_port = var.datadog_metrics_port + datadog_agent_version = var.datadog_agent_version }) } diff --git a/templates/apiary-hms-readonly.json b/templates/apiary-hms-readonly.json index df313cf..48d31ea 100644 --- a/templates/apiary-hms-readonly.json +++ b/templates/apiary-hms-readonly.json @@ -39,7 +39,7 @@ ], "command": ["sh", "/allow-grant.sh"] }, -%{ endif } +%{ endif } { "name": "apiary-hms-readonly", "image": "${hms_docker_image}:${hms_docker_version}", @@ -60,6 +60,11 @@ "awslogs-stream-prefix": "/" } }, + "dockerLabels": { + "com.datadoghq.ad.instances": "[{ \"openmetrics_endpoint\": \"http://%%host%%:8080/actuator/prometheus\", \"namespace\": \"hmsreadonlylegacy\", \"metrics\": [\"metrics_classloading_loaded_value\", \"metrics_threads_count_value\", \"metrics_memory_heap_max_value\", \"metrics_init_total_count_tables_value\", \"metrics_init_total_count_dbs_value\", \"metrics_memory_heap_used_value\", \"metrics_init_total_count_partitions_value\", \"jvm_threads_current\", \"jvm_threads_started_total\", \"jvm_memory_bytes_used\", \"jvm_memory_bytes_init\", \"jvm_gc_collection_seconds_count\", \"jvm_gc_collection_seconds\", \"process_cpu_seconds_total\", \"java_lang_operatingsystem_processcpuload\", \"java_lang_operatingsystem_processcputime\", \"metrics_threads_runnable_count_value\", \"metrics_threads_waiting_count_value\", \"java_lang_memory_heapmemoryusage_used\", \"metrics_memory_heap_init_value\", \"metrics_api_get_partition_by_name_count\", \"metrics_api_get_partitions_by_names_count\", \"metrics_api_get_partition_names_count\", \"metrics_api_get_partitions_by_expr_count\", \"metrics_api_get_partitions_count\", \"metrics_api_get_partition_count\", \"metrics_api_get_partitions_by_filter_count\", \"metrics_api_add_partitions_count\", \"metrics_api_add_partitions_req_count\", \"metrics_api_drop_partition_by_name_count\", \"metrics_api_add_partition_count\", \"metrics_api_alter_partitions_count\", \"metrics_api_create_table_count\", \"metrics_api_alter_table_with_cascade_count\", \"metrics_api_get_table_meta_count\", \"metrics_api_get_table_metas_count\", \"metrics_api_get_table_count\", \"metrics_api_alter_table_count\", \"metrics_api_get_tables_count\", \"metrics_api_get_all_tables_count\", \"metrics_api_drop_table_count\", \"metrics_api_get_multi_table_count\", \"metrics_api_get_database_count\", \"metrics_api_get_all_databases_count\", \"metrics_api_get_databases_count\", \"metrics_api_create_function_count\", \"metrics_api_getmetaconf_count\", \"metrics_api_alter_table_with_environment_context_count\", \"metrics_api_delete_column_statistics_by_table_count\", \"metrics_api_get_functions_count\", \"metrics_api_get_function_count\", \"metrics_api_shutdown_count\", \"metrics_api_flushcache_count\", \"metrics_api_get_indexes_count\", \"metrics_api_get_config_value_count\", \"metrics_api_set_ugi_count\", \"metrics_api_get_all_functions_count\", \"metrics_api_get_table_req_95thpercentile\", \"metrics_api_get_table_req_50thpercentile\", \"metrics_api_get_table_req_count\", \"metrics_api_get_table_req_max\", \"metrics_api_get_databases_count\", \"metrics_api_get_databases_95thpercentile\", \"metrics_api_get_databases_50thpercentile\", \"metrics_api_get_databases_max\", \"metrics_api_get_partitions_95thpercentile\", \"metrics_api_get_partitions_50thpercentile\", \"metrics_api_get_partitions_count\", \"metrics_api_get_partitions_max\", \"metrics_api_get_partitions_50thpercentile\", \"metrics_api_get_table_req_50thpercentile\", \"metrics_api_get_database_95thpercentile\", \"metrics_api_get_database_50thpercentile\", \"metrics_kafka_listener_failures_count\", \"metrics_kafka_listener_successes_count\", \"metrics_api_get_table_objects_by_name_req_max\" ], \"type_overrides\": { \"metrics_classloading_loaded_value\": \"gauge\", \"metrics_threads_count_value\": \"gauge\", \"metrics_memory_heap_max_value\": \"gauge\", \"metrics_init_total_count_tables_value\": \"gauge\", \"metrics_init_total_count_dbs_value\": \"gauge\", \"metrics_memory_heap_used_value\": \"gauge\", \"metrics_init_total_count_partitions_value\": \"gauge\", \"jvm_threads_current\": \"gauge\", \"jvm_threads_started_total\": \"gauge\", \"jvm_memory_bytes_used\": \"gauge\", \"jvm_memory_bytes_init\": \"gauge\", \"jvm_gc_collection_seconds_count\": \"gauge\", \"jvm_gc_collection_seconds\": \"gauge\", \"process_cpu_seconds_total\": \"gauge\", \"java_lang_operatingsystem_processcpuload\": \"gauge\", \"java_lang_operatingsystem_processcputime\": \"gauge\", \"metrics_threads_runnable_count_value\": \"gauge\", \"metrics_threads_waiting_count_value\": \"gauge\", \"java_lang_memory_heapmemoryusage_used\": \"gauge\", \"metrics_memory_heap_init_value\": \"gauge\", \"metrics_api_get_partition_by_name_count\": \"gauge\", \"metrics_api_get_partitions_by_names_count\": \"gauge\", \"metrics_api_get_partition_names_count\": \"gauge\", \"metrics_api_get_partitions_by_expr_count\": \"gauge\", \"metrics_api_get_partitions_count\": \"gauge\", \"metrics_api_get_partition_count\": \"gauge\", \"metrics_api_get_partitions_by_filter_count\": \"gauge\", \"metrics_api_add_partitions_count\": \"gauge\", \"metrics_api_add_partitions_req_count\": \"gauge\", \"metrics_api_drop_partition_by_name_count\": \"gauge\", \"metrics_api_add_partition_count\": \"gauge\", \"metrics_api_alter_partitions_count\": \"gauge\", \"metrics_api_create_table_count\": \"gauge\", \"metrics_api_alter_table_with_cascade_count\": \"gauge\", \"metrics_api_get_table_meta_count\": \"gauge\", \"metrics_api_get_table_metas_count\": \"gauge\", \"metrics_api_get_table_count\": \"gauge\", \"metrics_api_alter_table_count\": \"gauge\", \"metrics_api_get_tables_count\": \"gauge\", \"metrics_api_get_all_tables_count\": \"gauge\", \"metrics_api_drop_table_count\": \"gauge\", \"metrics_api_get_multi_table_count\": \"gauge\", \"metrics_api_get_database_count\": \"gauge\", \"metrics_api_get_all_databases_count\": \"gauge\", \"metrics_api_get_databases_count\": \"gauge\", \"metrics_api_create_function_count\": \"gauge\", \"metrics_api_getmetaconf_count\": \"gauge\", \"metrics_api_alter_table_with_environment_context_count\": \"gauge\", \"metrics_api_delete_column_statistics_by_table_count\": \"gauge\", \"metrics_api_get_functions_count\": \"gauge\", \"metrics_api_get_function_count\": \"gauge\", \"metrics_api_shutdown_count\": \"gauge\", \"metrics_api_flushcache_count\": \"gauge\", \"metrics_api_get_indexes_count\": \"gauge\", \"metrics_api_get_config_value_count\": \"gauge\", \"metrics_api_set_ugi_count\": \"gauge\", \"metrics_api_get_all_functions_count\": \"gauge\", \"metrics_api_get_table_req_95thpercentile\": \"gauge\", \"metrics_api_get_table_req_50thpercentile\": \"gauge\", \"metrics_api_get_table_req_count\": \"gauge\", \"metrics_api_get_table_req_max\": \"gauge\", \"metrics_api_get_databases_count\": \"gauge\", \"metrics_api_get_databases_95thpercentile\": \"gauge\", \"metrics_api_get_databases_50thpercentile\": \"gauge\", \"metrics_api_get_databases_max\": \"gauge\", \"metrics_api_get_partitions_95thpercentile\": \"gauge\", \"metrics_api_get_partitions_50thpercentile\": \"gauge\", \"metrics_api_get_partitions_count\": \"gauge\", \"metrics_api_get_partitions_max\": \"gauge\", \"metrics_api_get_partitions_50thpercentile\": \"gauge\", \"metrics_api_get_table_req_50thpercentile\": \"gauge\",\"metrics_api_get_database_95thpercentile\": \"gauge\",\"metrics_api_get_database_50thpercentile\": \"gauge\", \"metrics_kafka_listener_failures_count\": \"gauge\", \"metrics_kafka_listener_successes_count\": \"gauge\", \"metrics_api_get_table_objects_by_name_req_max\": \"gauge\"} }]", + "com.datadoghq.ad.check_names": "[\"openmetrics\"]", + "com.datadoghq.ad.init_configs": "[{}]" + }, "portMappings": [ { "containerPort": 9083, @@ -73,7 +78,7 @@ "condition": "SUCCESS" } ], -%{ endif } +%{ endif } "environment":[ { "name": "MYSQL_DB_HOST", @@ -179,4 +184,29 @@ %{ endfor } ] } +%{ if datadog_agent_enabled } + ,{ + "name": "datadog-agent", + "image": "public.ecr.aws/datadog/agent:${datadog_agent_version}", + "essential": true, + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "${loggroup}", + "awslogs-region": "${region}", + "awslogs-stream-prefix": "/" + } + }, + "environment": [ + { + "name": "DD_API_KEY", + "value": "${datadog_secret_key}" + }, + { + "name": "ECS_FARGATE", + "value": "true" + } + ] + } +%{ endif } ] diff --git a/templates/apiary-hms-readwrite.json b/templates/apiary-hms-readwrite.json index 0afae54..20a4071 100644 --- a/templates/apiary-hms-readwrite.json +++ b/templates/apiary-hms-readwrite.json @@ -60,6 +60,11 @@ "awslogs-stream-prefix": "/" } }, + "dockerLabels": { + "com.datadoghq.ad.instances": "[{ \"openmetrics_endpoint\": \"http://%%host%%:8080/actuator/prometheus\", \"namespace\": \"hmsreadwritelegacy\", \"metrics\": [\"metrics_classloading_loaded_value\", \"metrics_threads_count_value\", \"metrics_memory_heap_max_value\", \"metrics_init_total_count_tables_value\", \"metrics_init_total_count_dbs_value\", \"metrics_memory_heap_used_value\", \"metrics_init_total_count_partitions_value\", \"jvm_threads_current\", \"jvm_threads_started_total\", \"jvm_memory_bytes_used\", \"jvm_memory_bytes_init\", \"jvm_gc_collection_seconds_count\", \"jvm_gc_collection_seconds\", \"process_cpu_seconds_total\", \"java_lang_operatingsystem_processcpuload\", \"java_lang_operatingsystem_processcputime\", \"metrics_threads_runnable_count_value\", \"metrics_threads_waiting_count_value\", \"java_lang_memory_heapmemoryusage_used\", \"metrics_memory_heap_init_value\", \"metrics_api_get_partition_by_name_count\", \"metrics_api_get_partitions_by_names_count\", \"metrics_api_get_partition_names_count\", \"metrics_api_get_partitions_by_expr_count\", \"metrics_api_get_partitions_count\", \"metrics_api_get_partition_count\", \"metrics_api_get_partitions_by_filter_count\", \"metrics_api_add_partitions_count\", \"metrics_api_add_partitions_req_count\", \"metrics_api_drop_partition_by_name_count\", \"metrics_api_add_partition_count\", \"metrics_api_alter_partitions_count\", \"metrics_api_create_table_count\", \"metrics_api_alter_table_with_cascade_count\", \"metrics_api_get_table_meta_count\", \"metrics_api_get_table_metas_count\", \"metrics_api_get_table_count\", \"metrics_api_alter_table_count\", \"metrics_api_get_tables_count\", \"metrics_api_get_all_tables_count\", \"metrics_api_drop_table_count\", \"metrics_api_get_multi_table_count\", \"metrics_api_get_database_count\", \"metrics_api_get_all_databases_count\", \"metrics_api_get_databases_count\", \"metrics_api_create_function_count\", \"metrics_api_getmetaconf_count\", \"metrics_api_alter_table_with_environment_context_count\", \"metrics_api_delete_column_statistics_by_table_count\", \"metrics_api_get_functions_count\", \"metrics_api_get_function_count\", \"metrics_api_shutdown_count\", \"metrics_api_flushcache_count\", \"metrics_api_get_indexes_count\", \"metrics_api_get_config_value_count\", \"metrics_api_set_ugi_count\", \"metrics_api_get_all_functions_count\", \"metrics_api_get_table_req_95thpercentile\", \"metrics_api_get_table_req_50thpercentile\", \"metrics_api_get_table_req_count\", \"metrics_api_get_table_req_max\", \"metrics_api_get_databases_count\", \"metrics_api_get_databases_95thpercentile\", \"metrics_api_get_databases_50thpercentile\", \"metrics_api_get_databases_max\", \"metrics_api_get_partitions_95thpercentile\", \"metrics_api_get_partitions_50thpercentile\", \"metrics_api_get_partitions_count\", \"metrics_api_get_partitions_max\", \"metrics_api_get_partitions_50thpercentile\", \"metrics_api_get_table_req_50thpercentile\", \"metrics_api_get_database_95thpercentile\", \"metrics_api_get_database_50thpercentile\", \"metrics_kafka_listener_failures_count\", \"metrics_kafka_listener_successes_count\", \"metrics_api_get_table_objects_by_name_req_max\" ], \"type_overrides\": { \"metrics_classloading_loaded_value\": \"gauge\", \"metrics_threads_count_value\": \"gauge\", \"metrics_memory_heap_max_value\": \"gauge\", \"metrics_init_total_count_tables_value\": \"gauge\", \"metrics_init_total_count_dbs_value\": \"gauge\", \"metrics_memory_heap_used_value\": \"gauge\", \"metrics_init_total_count_partitions_value\": \"gauge\", \"jvm_threads_current\": \"gauge\", \"jvm_threads_started_total\": \"gauge\", \"jvm_memory_bytes_used\": \"gauge\", \"jvm_memory_bytes_init\": \"gauge\", \"jvm_gc_collection_seconds_count\": \"gauge\", \"jvm_gc_collection_seconds\": \"gauge\", \"process_cpu_seconds_total\": \"gauge\", \"java_lang_operatingsystem_processcpuload\": \"gauge\", \"java_lang_operatingsystem_processcputime\": \"gauge\", \"metrics_threads_runnable_count_value\": \"gauge\", \"metrics_threads_waiting_count_value\": \"gauge\", \"java_lang_memory_heapmemoryusage_used\": \"gauge\", \"metrics_memory_heap_init_value\": \"gauge\", \"metrics_api_get_partition_by_name_count\": \"gauge\", \"metrics_api_get_partitions_by_names_count\": \"gauge\", \"metrics_api_get_partition_names_count\": \"gauge\", \"metrics_api_get_partitions_by_expr_count\": \"gauge\", \"metrics_api_get_partitions_count\": \"gauge\", \"metrics_api_get_partition_count\": \"gauge\", \"metrics_api_get_partitions_by_filter_count\": \"gauge\", \"metrics_api_add_partitions_count\": \"gauge\", \"metrics_api_add_partitions_req_count\": \"gauge\", \"metrics_api_drop_partition_by_name_count\": \"gauge\", \"metrics_api_add_partition_count\": \"gauge\", \"metrics_api_alter_partitions_count\": \"gauge\", \"metrics_api_create_table_count\": \"gauge\", \"metrics_api_alter_table_with_cascade_count\": \"gauge\", \"metrics_api_get_table_meta_count\": \"gauge\", \"metrics_api_get_table_metas_count\": \"gauge\", \"metrics_api_get_table_count\": \"gauge\", \"metrics_api_alter_table_count\": \"gauge\", \"metrics_api_get_tables_count\": \"gauge\", \"metrics_api_get_all_tables_count\": \"gauge\", \"metrics_api_drop_table_count\": \"gauge\", \"metrics_api_get_multi_table_count\": \"gauge\", \"metrics_api_get_database_count\": \"gauge\", \"metrics_api_get_all_databases_count\": \"gauge\", \"metrics_api_get_databases_count\": \"gauge\", \"metrics_api_create_function_count\": \"gauge\", \"metrics_api_getmetaconf_count\": \"gauge\", \"metrics_api_alter_table_with_environment_context_count\": \"gauge\", \"metrics_api_delete_column_statistics_by_table_count\": \"gauge\", \"metrics_api_get_functions_count\": \"gauge\", \"metrics_api_get_function_count\": \"gauge\", \"metrics_api_shutdown_count\": \"gauge\", \"metrics_api_flushcache_count\": \"gauge\", \"metrics_api_get_indexes_count\": \"gauge\", \"metrics_api_get_config_value_count\": \"gauge\", \"metrics_api_set_ugi_count\": \"gauge\", \"metrics_api_get_all_functions_count\": \"gauge\", \"metrics_api_get_table_req_95thpercentile\": \"gauge\", \"metrics_api_get_table_req_50thpercentile\": \"gauge\", \"metrics_api_get_table_req_count\": \"gauge\", \"metrics_api_get_table_req_max\": \"gauge\", \"metrics_api_get_databases_count\": \"gauge\", \"metrics_api_get_databases_95thpercentile\": \"gauge\", \"metrics_api_get_databases_50thpercentile\": \"gauge\", \"metrics_api_get_databases_max\": \"gauge\", \"metrics_api_get_partitions_95thpercentile\": \"gauge\", \"metrics_api_get_partitions_50thpercentile\": \"gauge\", \"metrics_api_get_partitions_count\": \"gauge\", \"metrics_api_get_partitions_max\": \"gauge\", \"metrics_api_get_partitions_50thpercentile\": \"gauge\", \"metrics_api_get_table_req_50thpercentile\": \"gauge\",\"metrics_api_get_database_95thpercentile\": \"gauge\",\"metrics_api_get_database_50thpercentile\": \"gauge\", \"metrics_kafka_listener_failures_count\": \"gauge\", \"metrics_kafka_listener_successes_count\": \"gauge\", \"metrics_api_get_table_objects_by_name_req_max\": \"gauge\"} }]", + "com.datadoghq.ad.check_names": "[\"openmetrics\"]", + "com.datadoghq.ad.init_configs": "[{}]" + }, "portMappings": [ { "containerPort": 9083, @@ -231,4 +236,29 @@ %{ endfor } ] } +%{ if datadog_agent_enabled } + ,{ + "name": "datadog-agent", + "image": "public.ecr.aws/datadog/agent:${datadog_agent_version}", + "essential": true, + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "${loggroup}", + "awslogs-region": "${region}", + "awslogs-stream-prefix": "/" + } + }, + "environment": [ + { + "name": "DD_API_KEY", + "value": "${datadog_secret_key}" + }, + { + "name": "ECS_FARGATE", + "value": "true" + } + ] + } +%{ endif } ] diff --git a/variables.tf b/variables.tf index f6de153..d0b90fe 100644 --- a/variables.tf +++ b/variables.tf @@ -715,3 +715,21 @@ variable "hms_ro_request_partition_limit" { type = string default = "" } + +variable "datadog_key_secret_name" { + description = "Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments." + type = string + default = null +} + +variable "datadog_agent_version" { + description = "Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments." + type = string + default = "7.50.3-jmx" +} + +variable "datadog_agent_enabled" { + description = "Whether to include the datadog-agent container. This is only applicable to ECS deployments." + type = bool + default = false +} From 510cf55d52fe5051fdb033670e83416a53547c0e Mon Sep 17 00:00:00 2001 From: Eddy Decena Date: Mon, 25 Mar 2024 12:59:40 -0400 Subject: [PATCH 02/14] fix: Enable Datadog filtering for HMS/WD metrics on ECS with Apiary tags (#252) Co-authored-by: Eddy Perez --- common.tf | 2 ++ templates.tf | 2 ++ templates/apiary-hms-readonly.json | 4 ++++ templates/apiary-hms-readwrite.json | 4 ++++ 4 files changed, 12 insertions(+) diff --git a/common.tf b/common.tf index 3c6f4e6..4990bcd 100644 --- a/common.tf +++ b/common.tf @@ -9,6 +9,8 @@ locals { apiary_bucket_prefix = "${local.instance_alias}-${data.aws_caller_identity.current.account_id}-${data.aws_region.current.name}" apiary_assume_role_bucket_prefix = [for assumerole in var.apiary_assume_roles : "${local.instance_alias}-${data.aws_caller_identity.current.account_id}-${lookup(assumerole, "allow_cross_region_access", false) ? "*" : data.aws_region.current.name}"] enable_route53_records = var.apiary_domain_name == "" ? false : true + + datadog_tags = join(" ", formatlist("%s:%s", keys(var.apiary_tags), values(var.apiary_tags))) # # Create a new list of maps with some extra attributes needed later # diff --git a/templates.tf b/templates.tf index f4b5941..8df9a3f 100644 --- a/templates.tf +++ b/templates.tf @@ -68,6 +68,7 @@ locals{ metrics_port = var.datadog_metrics_port datadog_agent_version = var.datadog_agent_version datadog_agent_enabled = var.datadog_agent_enabled + datadog_tags = local.datadog_tags }) hms_readonly_template = templatefile("${path.module}/templates/apiary-hms-readonly.json", { @@ -118,5 +119,6 @@ locals{ wd_instance_type = var.hms_instance_type metrics_port = var.datadog_metrics_port datadog_agent_version = var.datadog_agent_version + datadog_tags = local.datadog_tags }) } diff --git a/templates/apiary-hms-readonly.json b/templates/apiary-hms-readonly.json index 48d31ea..e2a8ab1 100644 --- a/templates/apiary-hms-readonly.json +++ b/templates/apiary-hms-readonly.json @@ -205,6 +205,10 @@ { "name": "ECS_FARGATE", "value": "true" + }, + { + "name": "DD_TAGS", + "value": "${datadog_tags}" } ] } diff --git a/templates/apiary-hms-readwrite.json b/templates/apiary-hms-readwrite.json index 20a4071..b9a98ec 100644 --- a/templates/apiary-hms-readwrite.json +++ b/templates/apiary-hms-readwrite.json @@ -257,6 +257,10 @@ { "name": "ECS_FARGATE", "value": "true" + }, + { + "name": "DD_TAGS", + "value": "${datadog_tags}" } ] } From 3e442345f1d18a51b490ec5e1120906c5d559cc7 Mon Sep 17 00:00:00 2001 From: dazou-exp <164045802+dazou-exp@users.noreply.github.com> Date: Mon, 25 Mar 2024 12:47:28 -0500 Subject: [PATCH 03/14] feature: Added common_producer_iamroles to s3 bucket policy (#251) * added common_producer_iamroles to grant read-write access to all apiary managed schemas * fix whitespace issues * update description for common_producer_iamroles * adjusted permissions granted to common_producer_iamroles * adjusted permissions granted to common_producer_iamroles * bump version to 7.1.0 * added permissions to read-write object tagging * fix typo missing comma * fix typo extra comma --------- Co-authored-by: David Zou --- CHANGELOG.md | 4 + VARIABLES.md | 255 +++++++++++++++------------- s3.tf | 1 + templates/apiary-bucket-policy.json | 31 ++++ variables.tf | 6 + 5 files changed, 177 insertions(+), 120 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1da8d0..ada126d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.0] - 2024-03-21 +### Added +- Added `common_producer_iamroles` to allow roles read-write access to all Apiary managed schemas. + ## [7.0.1] - 2024-01-22 ### Added - Added `datadog-agent` for HMS-Readonly and HMS-Readwrite in ECS. diff --git a/VARIABLES.md b/VARIABLES.md index dcdee63..3bd023c 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -2,122 +2,123 @@ ## Inputs -| Name | Description | Type | Default | Required | -|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:| -| apiary\_assume\_roles | Cross account AWS IAM roles allowed write access to managed Apiary S3 buckets using assume policy. | `list(any)` | `[]` | no | -| apiary\_consumer\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to all data in managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| Name | Description | Type | Default | Required | +|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------:| +| apiary\_assume\_roles | Cross account AWS IAM roles allowed write access to managed Apiary S3 buckets using assume policy. | `list(any)` | `[]` | no | +| apiary\_consumer\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to all data in managed Apiary S3 buckets. | `list(string)` | `[]` | no | | apiary\_consumer\_prefix\_iamroles | AWS IAM roles allowed unrestricted (not subject to `apiary_customer_condition`) read access to certain prefixes in managed Apiary S3 buckets. See below section for more information and format. | `map(map(list(string)))` | `{}` | no | -| apiary\_customer\_accounts | AWS account IDs for clients of this Metastore. | `list(string)` | `[]` | no | -| apiary\_customer\_condition | IAM policy condition applied to customer account S3 object access. | `string` | `""` | no | -| apiary\_database\_name | Database name to create in RDS for Apiary. | `string` | `"apiary"` | no | -| apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | -| apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | -| apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | -| apiary\_governance\_iamroles | AWS IAM governance roles allowed read and tagging access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | -| apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | -| apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | -| apiary\_managed\_schemas | List of maps, each map contains schema name from which S3 bucket names will be derived, and various properties. The corresponding S3 bucket will be named as apiary\_instance-aws\_account-aws\_region-schema\_name. | `list(map(string))` | `[]` | no | -| apiary\_producer\_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | `map(any)` | `{}` | no | -| apiary\_rds\_additional\_sg | Comma-separated string containing additional security groups to attach to RDS. | `list(any)` | `[]` | no | -| apiary\_shared\_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | `list(any)` | `[]` | no | -| apiary\_tags | Common tags that get put on all resources. | `map(any)` | n/a | yes | -| atlas\_cluster\_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | `string` | `""` | no | -| atlas\_kafka\_bootstrap\_servers | Kafka instance url. | `string` | `""` | no | -| aws\_region | AWS region. | `string` | n/a | yes | -| dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | -| db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | -| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | -| db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | -| db\_enable\_performance\_insights | Enable RDS Performance Insights | `bool` | `false` | no | -| db\_enhanced\_monitoring\_interval | RDS monitoring interval (in seconds) for enhanced monitoring. Valid values are 0, 1, 5, 10, 15, 30, 60. Default is 0. | `number` | `0` | no | -| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | n/a | yes | -| db\_instance\_count | Desired count of database cluster instances. | `string` | `"2"` | no | -| db\_maintenance\_window | Preferred maintenance window for the RDS Metastore DB in UTC. | `string` | `"wed:03:00-wed:04:00"` | no | -| db\_master\_username | Aurora cluster MySQL master user name. | `string` | `"apiary"` | no | -| db\_ro\_secret\_name | Aurora cluster MySQL read-only user SecretsManger secret name. | `string` | `""` | no | -| db\_rw\_secret\_name | Aurora cluster MySQL read/write user SecretsManager secret name. | `string` | `""` | no | -| disallow\_incompatible\_col\_type\_changes | Hive metastore setting to disallow validation when incompatible schema type changes. | `bool` | `true` | no | -| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | `string` | `""` | no | -| ecs\_domain\_extension | Domain name to use for hosted zone created by ECS service discovery. | `string` | `"lcl"` | no | -| elb\_timeout | Idle timeout for Apiary ELB. | `string` | `"1800"` | no | -| enable\_apiary\_s3\_log\_hive | Create hive database to archive s3 logs in parquet format.Only applicable when module manages logs S3 bucket. | `bool` | `true` | no | -| enable\_autoscaling | Enable read only Hive Metastore k8s horizontal pod autoscaling. | `bool` | `true` | no | -| enable\_data\_events | Enable managed buckets S3 event notifications. | `bool` | `false` | no | -| enable\_gluesync | Enable metadata sync from Hive to the Glue catalog. | `bool` | `false` | no | -| enable\_hive\_metastore\_metrics | Enable sending Hive Metastore metrics to CloudWatch. | `bool` | `false` | no | -| enable\_metadata\_events | Enable Hive Metastore SNS listener. | `bool` | `false` | no | -| enable\_s3\_paid\_metrics | Enable managed S3 buckets request and data transfer metrics. | `bool` | `false` | no | -| enable\_vpc\_endpoint\_services | Enable metastore NLB, Route53 entries VPC access and VPC endpoint services, for cross-account access. | `bool` | `true` | no | -| encrypt\_db | Specifies whether the DB cluster is encrypted | `bool` | `false` | no | -| external\_data\_buckets | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access. | `list(any)` | `[]` | no | -| external\_database\_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | `string` | `""` | no | -| hive\_metastore\_port | Port on which both Hive Metastore readwrite and readonly will run. | `number` | `9083` | no | -| hms\_additional\_environment\_variables | Additional environment variables for the Hive Metastore. | `map(any)` | `{}` | no | -| hms\_autogather\_stats | Read-write Hive metastore setting to enable/disable statistics auto-gather on table/partition creation. | `bool` | `true` | no | -| hms\_docker\_image | Docker image ID for the Hive Metastore. | `string` | n/a | yes | -| hms\_docker\_version | Version of the Docker image for the Hive Metastore. | `string` | n/a | yes | -| hms\_instance\_type | Hive Metastore instance type, possible values: ecs,k8s. | `string` | `"ecs"` | no | -| hms\_log\_level | Log level for the Hive Metastore. | `string` | `"INFO"` | no | -| hms\_nofile\_ulimit | Ulimit for the Hive Metastore container. | `string` | `"32768"` | no | -| hms\_ro\_cpu | CPU for the read only Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | -| hms\_ro\_db\_connection\_pool\_size | Read-only Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | -| hms\_ro\_ecs\_task\_count | Desired ECS task count of the read only Hive Metastore service. | `string` | `"3"` | no | -| hms\_ro\_heapsize | Heapsize for the read only Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | -| hms\_ro\_k8s\_replica\_count | Initial Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| hms\_ro\_k8s\_max\_replica\_count | Max Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| hms\_ro\_target\_cpu\_percentage | Read only Hive Metastore autoscaling threshold for CPU target usage. | `number` | `"2048"` | no | -| hms\_ro\_request\_partition\_limit | Read only Hive Metastore limits of request partitions. | `string` | n/a | no | -| hms\_rw\_request\_partition\_limit | Read Write Hive Metastore limits of request partitions. | `string` | n/a | no | -| hms\_rw\_cpu | CPU for the read/write Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | -| hms\_rw\_db\_connection\_pool\_size | Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | -| hms\_rw\_ecs\_task\_count | Desired ECS task count of the read/write Hive Metastore service. | `string` | `"3"` | no | -| hms\_rw\_heapsize | Heapsize for the read/write Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | -| hms\_rw\_k8s\_replica\_count | Initial Number of read/write Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | -| iam\_name\_root | Name to identify Hive Metastore IAM roles. | `string` | `"hms"` | no | -| ingress\_cidr | Generally allowed ingress CIDR list. | `list(string)` | n/a | yes | -| instance\_name | Apiary instance name to identify resources in multi-instance deployments. | `string` | `""` | no | -| k8s\_docker\_registry\_secret | Docker Registry authentication K8s secret name. | `string` | `""` | no | -| kafka\_bootstrap\_servers | Kafka bootstrap servers to send metastore events, setting this enables Hive Metastore Kafka listener. | `string` | `""` | no | -| kafka\_topic\_name | Kafka topic to send metastore events. | `string` | `""` | no | -| kiam\_arn | Kiam server IAM role ARN. | `string` | `""` | no | -| ldap\_base | Active directory LDAP base DN to search users and groups. | `string` | `""` | no | -| ldap\_ca\_cert | Base64 encoded Certificate Authority bundle to validate LDAPS connections. | `string` | `""` | no | -| ldap\_secret\_name | Active directory LDAP bind DN SecretsManager secret name. | `string` | `""` | no | -| ldap\_url | Active directory LDAP URL to configure Hadoop LDAP group mapping. | `string` | `""` | no | -| metastore\_namespace | k8s namespace to deploy metastore containers. | `string` | `"metastore"` | no | -| oidc\_provider | EKS cluster OIDC provider name, required for configuring IAM using IRSA. | `string` | `""` | no | -| private\_subnets | Private subnets. | `list(any)` | n/a | yes | -| ranger\_audit\_db\_url | Ranger DB audit provider configuration. | `string` | `""` | no | -| ranger\_audit\_secret\_name | Ranger DB audit secret name. | `string` | `""` | no | -| ranger\_audit\_solr\_url | Ranger Solr audit provider configuration. | `string` | `""` | no | -| ranger\_policy\_manager\_url | Ranger admin URL to synchronize policies. | `string` | `""` | no | -| rds\_max\_allowed\_packet | RDS/MySQL setting for parameter 'max\_allowed\_packet' in bytes. Default is 128MB (Note that MySQL default is 4MB). | `number` | `134217728` | no | -| rw\_ingress\_cidr | Read-Write metastore ingress CIDR list. If not set, defaults to `var.ingress_cidr`. | `list(string)` | `[]` | no | -| s3\_enable\_inventory | Enable S3 inventory configuration. | `bool` | `false` | no | -| s3\_inventory\_customer\_accounts | AWS account IDs allowed to access s3 inventory database. | `list(string)` | `[]` | no | -| s3\_inventory\_format | Output format for S3 inventory results. Can be Parquet, ORC, CSV | `string` | `"ORC"` | no | -| s3\_inventory\_update\_schedule | Cron schedule to update S3 inventory tables (if enabled). Defaults to every 12 hours. | `string` | `"0 */12 * * *"` | no | -| s3\_lifecycle\_abort\_incomplete\_multipart\_upload\_days | Number of days after which incomplete multipart uploads will be deleted. | `string` | `"7"` | no | -| s3\_lifecycle\_policy\_transition\_period | S3 Lifecycle Policy number of days for Transition rule | `string` | `"30"` | no | -| s3\_log\_expiry | Number of days after which Apiary S3 bucket logs expire. | `string` | `"365"` | no | -| s3\_logs\_sqs\_delay\_seconds | The time in seconds that the delivery of all messages in the queue will be delayed. | `number` | `300` | no | -| s3\_logs\_sqs\_message\_retention\_seconds | Time in seconds after which message will be deleted from the queue. | `number` | `345600` | no | -| s3\_logs\_sqs\_receive\_wait\_time\_seconds | The time for which a ReceiveMessage call will wait for a message to arrive (long polling) before returning. | `number` | `10` | no | -| s3\_logs\_sqs\_visibility\_timeout\_seconds | Time in seconds after which message will be returned to the queue if it is not deleted. | `number` | `3600` | no | -| s3\_storage\_class | S3 storage class after transition using lifecycle policy | `string` | `"INTELLIGENT_TIERING"` | no | -| secondary\_vpcs | List of VPCs to associate with Service Discovery namespace. | `list(any)` | `[]` | no | -| system\_schema\_customer\_accounts | AWS account IDs allowed to access system database. | `list(string)` | `[]` | no | -| system\_schema\_name | Name for the internal system database | `string` | `"apiary_system"` | no | -| table\_param\_filter | A regular expression for selecting necessary table parameters for the SNS listener. If the value isn't set, then no table parameters are selected. | `string` | `""` | no | -| vpc\_id | VPC ID. | `string` | n/a | yes | -| enable\_dashboard | make EKS & ECS dashboard optional | `bool` | true | no | -| rds\_family | RDS Family | `string` | aurora5.6 | no | -| datadog_metrics_enabled | Enable Datadog metrics for HMS | `bool` | false | no | -| datadog_metrics_hms_readwrite_readonly | Prometheus Metrics sent to datadog | list(string) | ["metrics_classloading_loaded_value","metrics_threads_count_value","metrics_memory_heap_max_value","metrics_init_total_count_tables_value","metrics_init_total_count_dbs_value","metrics_memory_heap_used_value","metrics_init_total_count_partitions_value"] | no | -| datadog_metrics_port | Port in which metrics will be send for Datadog | string | 8080 | no | -| datadog_key_secret_name | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments. | string | null | no | -| datadog_agent_version | Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments. | string | 7.50.3-jmx | no | -| datadog_agent_enabled | Whether to include the datadog-agent container. This is only applicable to ECS deployments. | string | false | no | +| apiary\_customer\_accounts | AWS account IDs for clients of this Metastore. | `list(string)` | `[]` | no | +| apiary\_customer\_condition | IAM policy condition applied to customer account S3 object access. | `string` | `""` | no | +| apiary\_database\_name | Database name to create in RDS for Apiary. | `string` | `"apiary"` | no | +| apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | +| apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | +| apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | +| apiary\_governance\_iamroles | AWS IAM governance roles allowed read and tagging access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | +| apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | +| apiary\_managed\_schemas | List of maps, each map contains schema name from which S3 bucket names will be derived, and various properties. The corresponding S3 bucket will be named as apiary\_instance-aws\_account-aws\_region-schema\_name. | `list(map(string))` | `[]` | no | +| apiary\_producer\_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | `map(any)` | `{}` | no | +| apiary\_rds\_additional\_sg | Comma-separated string containing additional security groups to attach to RDS. | `list(any)` | `[]` | no | +| apiary\_shared\_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | `list(any)` | `[]` | no | +| apiary\_tags | Common tags that get put on all resources. | `map(any)` | n/a | yes | +| atlas\_cluster\_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | `string` | `""` | no | +| atlas\_kafka\_bootstrap\_servers | Kafka instance url. | `string` | `""` | no | +| aws\_region | AWS region. | `string` | n/a | yes | +| common\_producer\_iamroles | AWS IAM roles allowed general (not tied to schema) write access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | +| db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | +| db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | +| db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | +| db\_enable\_performance\_insights | Enable RDS Performance Insights | `bool` | `false` | no | +| db\_enhanced\_monitoring\_interval | RDS monitoring interval (in seconds) for enhanced monitoring. Valid values are 0, 1, 5, 10, 15, 30, 60. Default is 0. | `number` | `0` | no | +| db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | n/a | yes | +| db\_instance\_count | Desired count of database cluster instances. | `string` | `"2"` | no | +| db\_maintenance\_window | Preferred maintenance window for the RDS Metastore DB in UTC. | `string` | `"wed:03:00-wed:04:00"` | no | +| db\_master\_username | Aurora cluster MySQL master user name. | `string` | `"apiary"` | no | +| db\_ro\_secret\_name | Aurora cluster MySQL read-only user SecretsManger secret name. | `string` | `""` | no | +| db\_rw\_secret\_name | Aurora cluster MySQL read/write user SecretsManager secret name. | `string` | `""` | no | +| disallow\_incompatible\_col\_type\_changes | Hive metastore setting to disallow validation when incompatible schema type changes. | `bool` | `true` | no | +| docker\_registry\_auth\_secret\_name | Docker Registry authentication SecretManager secret name. | `string` | `""` | no | +| ecs\_domain\_extension | Domain name to use for hosted zone created by ECS service discovery. | `string` | `"lcl"` | no | +| elb\_timeout | Idle timeout for Apiary ELB. | `string` | `"1800"` | no | +| enable\_apiary\_s3\_log\_hive | Create hive database to archive s3 logs in parquet format.Only applicable when module manages logs S3 bucket. | `bool` | `true` | no | +| enable\_autoscaling | Enable read only Hive Metastore k8s horizontal pod autoscaling. | `bool` | `true` | no | +| enable\_data\_events | Enable managed buckets S3 event notifications. | `bool` | `false` | no | +| enable\_gluesync | Enable metadata sync from Hive to the Glue catalog. | `bool` | `false` | no | +| enable\_hive\_metastore\_metrics | Enable sending Hive Metastore metrics to CloudWatch. | `bool` | `false` | no | +| enable\_metadata\_events | Enable Hive Metastore SNS listener. | `bool` | `false` | no | +| enable\_s3\_paid\_metrics | Enable managed S3 buckets request and data transfer metrics. | `bool` | `false` | no | +| enable\_vpc\_endpoint\_services | Enable metastore NLB, Route53 entries VPC access and VPC endpoint services, for cross-account access. | `bool` | `true` | no | +| encrypt\_db | Specifies whether the DB cluster is encrypted | `bool` | `false` | no | +| external\_data\_buckets | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access. | `list(any)` | `[]` | no | +| external\_database\_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | `string` | `""` | no | +| hive\_metastore\_port | Port on which both Hive Metastore readwrite and readonly will run. | `number` | `9083` | no | +| hms\_additional\_environment\_variables | Additional environment variables for the Hive Metastore. | `map(any)` | `{}` | no | +| hms\_autogather\_stats | Read-write Hive metastore setting to enable/disable statistics auto-gather on table/partition creation. | `bool` | `true` | no | +| hms\_docker\_image | Docker image ID for the Hive Metastore. | `string` | n/a | yes | +| hms\_docker\_version | Version of the Docker image for the Hive Metastore. | `string` | n/a | yes | +| hms\_instance\_type | Hive Metastore instance type, possible values: ecs,k8s. | `string` | `"ecs"` | no | +| hms\_log\_level | Log level for the Hive Metastore. | `string` | `"INFO"` | no | +| hms\_nofile\_ulimit | Ulimit for the Hive Metastore container. | `string` | `"32768"` | no | +| hms\_ro\_cpu | CPU for the read only Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | +| hms\_ro\_db\_connection\_pool\_size | Read-only Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | +| hms\_ro\_ecs\_task\_count | Desired ECS task count of the read only Hive Metastore service. | `string` | `"3"` | no | +| hms\_ro\_heapsize | Heapsize for the read only Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | +| hms\_ro\_k8s\_replica\_count | Initial Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| hms\_ro\_k8s\_max\_replica\_count | Max Number of read only Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| hms\_ro\_target\_cpu\_percentage | Read only Hive Metastore autoscaling threshold for CPU target usage. | `number` | `"2048"` | no | +| hms\_ro\_request\_partition\_limit | Read only Hive Metastore limits of request partitions. | `string` | n/a | no | +| hms\_rw\_request\_partition\_limit | Read Write Hive Metastore limits of request partitions. | `string` | n/a | no | +| hms\_rw\_cpu | CPU for the read/write Hive Metastore ECS task.
Valid values can be 256, 512, 1024, 2048 and 4096.
Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"512"` | no | +| hms\_rw\_db\_connection\_pool\_size | Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10. | `number` | `10` | no | +| hms\_rw\_ecs\_task\_count | Desired ECS task count of the read/write Hive Metastore service. | `string` | `"3"` | no | +| hms\_rw\_heapsize | Heapsize for the read/write Hive Metastore.
Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | `string` | `"2048"` | no | +| hms\_rw\_k8s\_replica\_count | Initial Number of read/write Hive Metastore k8s pod replicas to create. | `number` | `"2048"` | no | +| iam\_name\_root | Name to identify Hive Metastore IAM roles. | `string` | `"hms"` | no | +| ingress\_cidr | Generally allowed ingress CIDR list. | `list(string)` | n/a | yes | +| instance\_name | Apiary instance name to identify resources in multi-instance deployments. | `string` | `""` | no | +| k8s\_docker\_registry\_secret | Docker Registry authentication K8s secret name. | `string` | `""` | no | +| kafka\_bootstrap\_servers | Kafka bootstrap servers to send metastore events, setting this enables Hive Metastore Kafka listener. | `string` | `""` | no | +| kafka\_topic\_name | Kafka topic to send metastore events. | `string` | `""` | no | +| kiam\_arn | Kiam server IAM role ARN. | `string` | `""` | no | +| ldap\_base | Active directory LDAP base DN to search users and groups. | `string` | `""` | no | +| ldap\_ca\_cert | Base64 encoded Certificate Authority bundle to validate LDAPS connections. | `string` | `""` | no | +| ldap\_secret\_name | Active directory LDAP bind DN SecretsManager secret name. | `string` | `""` | no | +| ldap\_url | Active directory LDAP URL to configure Hadoop LDAP group mapping. | `string` | `""` | no | +| metastore\_namespace | k8s namespace to deploy metastore containers. | `string` | `"metastore"` | no | +| oidc\_provider | EKS cluster OIDC provider name, required for configuring IAM using IRSA. | `string` | `""` | no | +| private\_subnets | Private subnets. | `list(any)` | n/a | yes | +| ranger\_audit\_db\_url | Ranger DB audit provider configuration. | `string` | `""` | no | +| ranger\_audit\_secret\_name | Ranger DB audit secret name. | `string` | `""` | no | +| ranger\_audit\_solr\_url | Ranger Solr audit provider configuration. | `string` | `""` | no | +| ranger\_policy\_manager\_url | Ranger admin URL to synchronize policies. | `string` | `""` | no | +| rds\_max\_allowed\_packet | RDS/MySQL setting for parameter 'max\_allowed\_packet' in bytes. Default is 128MB (Note that MySQL default is 4MB). | `number` | `134217728` | no | +| rw\_ingress\_cidr | Read-Write metastore ingress CIDR list. If not set, defaults to `var.ingress_cidr`. | `list(string)` | `[]` | no | +| s3\_enable\_inventory | Enable S3 inventory configuration. | `bool` | `false` | no | +| s3\_inventory\_customer\_accounts | AWS account IDs allowed to access s3 inventory database. | `list(string)` | `[]` | no | +| s3\_inventory\_format | Output format for S3 inventory results. Can be Parquet, ORC, CSV | `string` | `"ORC"` | no | +| s3\_inventory\_update\_schedule | Cron schedule to update S3 inventory tables (if enabled). Defaults to every 12 hours. | `string` | `"0 */12 * * *"` | no | +| s3\_lifecycle\_abort\_incomplete\_multipart\_upload\_days | Number of days after which incomplete multipart uploads will be deleted. | `string` | `"7"` | no | +| s3\_lifecycle\_policy\_transition\_period | S3 Lifecycle Policy number of days for Transition rule | `string` | `"30"` | no | +| s3\_log\_expiry | Number of days after which Apiary S3 bucket logs expire. | `string` | `"365"` | no | +| s3\_logs\_sqs\_delay\_seconds | The time in seconds that the delivery of all messages in the queue will be delayed. | `number` | `300` | no | +| s3\_logs\_sqs\_message\_retention\_seconds | Time in seconds after which message will be deleted from the queue. | `number` | `345600` | no | +| s3\_logs\_sqs\_receive\_wait\_time\_seconds | The time for which a ReceiveMessage call will wait for a message to arrive (long polling) before returning. | `number` | `10` | no | +| s3\_logs\_sqs\_visibility\_timeout\_seconds | Time in seconds after which message will be returned to the queue if it is not deleted. | `number` | `3600` | no | +| s3\_storage\_class | S3 storage class after transition using lifecycle policy | `string` | `"INTELLIGENT_TIERING"` | no | +| secondary\_vpcs | List of VPCs to associate with Service Discovery namespace. | `list(any)` | `[]` | no | +| system\_schema\_customer\_accounts | AWS account IDs allowed to access system database. | `list(string)` | `[]` | no | +| system\_schema\_name | Name for the internal system database | `string` | `"apiary_system"` | no | +| table\_param\_filter | A regular expression for selecting necessary table parameters for the SNS listener. If the value isn't set, then no table parameters are selected. | `string` | `""` | no | +| vpc\_id | VPC ID. | `string` | n/a | yes | +| enable\_dashboard | make EKS & ECS dashboard optional | `bool` | true | no | +| rds\_family | RDS Family | `string` | aurora5.6 | no | +| datadog_metrics_enabled | Enable Datadog metrics for HMS | `bool` | false | no | +| datadog_metrics_hms_readwrite_readonly | Prometheus Metrics sent to datadog | list(string) | ["metrics_classloading_loaded_value","metrics_threads_count_value","metrics_memory_heap_max_value","metrics_init_total_count_tables_value","metrics_init_total_count_dbs_value","metrics_memory_heap_used_value","metrics_init_total_count_partitions_value"] | no | +| datadog_metrics_port | Port in which metrics will be send for Datadog | string | 8080 | no | +| datadog_key_secret_name | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments. | string | null | no | +| datadog_agent_version | Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments. | string | 7.50.3-jmx | no | +| datadog_agent_enabled | Whether to include the datadog-agent container. This is only applicable to ECS deployments. | string | false | no | ### apiary_assume_roles @@ -230,15 +231,15 @@ apiary_consumer_prefix_iamroles = { ### apiary_customer_condition A string that defines a list of conditions that restrict which objects in an Apiary schema's S3 bucket may be read cross-account by accounts in the `customer_accounts` list. -The string is a semicolon-delimited list of comma-delimited strings that specify conditions that are valid in AWS S3 bucket policy +The string is a semicolon-delimited list of comma-delimited strings that specify conditions that are valid in AWS S3 bucket policy [Condition](https://docs.aws.amazon.com/AmazonS3/latest/userguide/amazon-s3-policy-keys.html) sections. This condition is applied to every Apiary schema's S3 bucket policy. An example entry to limit access to: - Only requests from certain VPC CIDR blocks - And only to objects that have: - - Either an S3 tag of `data-sensitivity=false` or - - An S3 tag of `data-type=image*` -looks like: + - Either an S3 tag of `data-sensitivity=false` or + - An S3 tag of `data-type=image*` + looks like: ``` apiary_customer_condition = <:role/", + "arn:aws:iam:::role/", + ... +] +``` \ No newline at end of file diff --git a/s3.tf b/s3.tf index 5c8ca14..c4bf078 100644 --- a/s3.tf +++ b/s3.tf @@ -25,6 +25,7 @@ locals { client_roles = replace(lookup(schema, "client_roles", ""), ",", "\",\"") governance_iamroles = join("\",\"", var.apiary_governance_iamroles) consumer_prefix_roles = lookup(var.apiary_consumer_prefix_iamroles, schema["schema_name"], {}) + common_producer_iamroles = join("\",\"", var.common_producer_iamroles) }) } } diff --git a/templates/apiary-bucket-policy.json b/templates/apiary-bucket-policy.json index cf75053..da4422d 100644 --- a/templates/apiary-bucket-policy.json +++ b/templates/apiary-bucket-policy.json @@ -150,6 +150,37 @@ ] }, %{endif} +%{if common_producer_iamroles != ""} + { + "Sid": "General read-write iamrole permissions", + "Effect": "Allow", + "Principal": "*", + "Action": [ + "s3:GetBucketLocation", + "s3:GetObject", + "s3:GetObjectAcl", + "s3:GetBucketAcl", + "s3:GetObjectTagging", + "s3:ListBucket", + "s3:PutObject", + "s3:PutObjectAcl", + "s3:PutObjectTagging", + "s3:DeleteObject", + "s3:GetBucketVersioning", + "s3:PutBucketVersioning", + "s3:ObjectOwnerOverrideToBucketOwner" + ], + "Resource": [ + "arn:aws:s3:::${bucket_name}", + "arn:aws:s3:::${bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:PrincipalArn": [ "${common_producer_iamroles}" ] + } + } + }, +%{endif} %{if governance_iamroles != ""} { "Sid": "Apiary governance iamrole permissions", diff --git a/variables.tf b/variables.tf index d0b90fe..c426ab7 100644 --- a/variables.tf +++ b/variables.tf @@ -733,3 +733,9 @@ variable "datadog_agent_enabled" { type = bool default = false } + +variable "common_producer_iamroles" { + description = "AWS IAM roles allowed read-write access to managed Apiary S3 buckets." + type = list(string) + default = [] +} \ No newline at end of file From 90939aa1729bd2ea0bafedb5fb22e21a9298bd19 Mon Sep 17 00:00:00 2001 From: githubjianli <51385385+githubjianli@users.noreply.github.com> Date: Wed, 3 Apr 2024 07:33:22 -0700 Subject: [PATCH 04/14] fix: fixed datadog secret key default value (#253) * fix: changed datadog_key_secret_name default value * fix: fixed typo * Update CHANGELOG.md * Update CHANGELOG.md --------- Co-authored-by: janli Co-authored-by: Jay Green-Stevens --- CHANGELOG.md | 5 +++++ VARIABLES.md | 2 +- s3.tf | 2 +- variables.tf | 6 +++--- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ada126d..ab20c34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.1] - 2024-04-03 +### Fixed +- Renamed variable from `common_producer_iamroles` to `apiary_common_producer_iamroles` to make the name consistent. +- Change default value for `datadog_key_secret_name` from `null` to `""`. + ## [7.1.0] - 2024-03-21 ### Added - Added `common_producer_iamroles` to allow roles read-write access to all Apiary managed schemas. diff --git a/VARIABLES.md b/VARIABLES.md index 3bd023c..dc25a7c 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -24,7 +24,7 @@ | atlas\_cluster\_name | Name of the Atlas cluster where metastore plugin will send DDL events. Defaults to `var.instance_name` if not set. | `string` | `""` | no | | atlas\_kafka\_bootstrap\_servers | Kafka instance url. | `string` | `""` | no | | aws\_region | AWS region. | `string` | n/a | yes | -| common\_producer\_iamroles | AWS IAM roles allowed general (not tied to schema) write access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | +| apiary\_common\_producer\_iamroles | AWS IAM roles allowed general (not tied to schema) write access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | | dashboard\_namespace | k8s namespace to deploy grafana dashboard. | `string` | `"monitoring"` | no | | db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | | db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | diff --git a/s3.tf b/s3.tf index c4bf078..3c5211f 100644 --- a/s3.tf +++ b/s3.tf @@ -25,7 +25,7 @@ locals { client_roles = replace(lookup(schema, "client_roles", ""), ",", "\",\"") governance_iamroles = join("\",\"", var.apiary_governance_iamroles) consumer_prefix_roles = lookup(var.apiary_consumer_prefix_iamroles, schema["schema_name"], {}) - common_producer_iamroles = join("\",\"", var.common_producer_iamroles) + common_producer_iamroles = join("\",\"", var.apiary_common_producer_iamroles) }) } } diff --git a/variables.tf b/variables.tf index c426ab7..1f48a34 100644 --- a/variables.tf +++ b/variables.tf @@ -719,7 +719,7 @@ variable "hms_ro_request_partition_limit" { variable "datadog_key_secret_name" { description = "Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments." type = string - default = null + default = "" } variable "datadog_agent_version" { @@ -734,8 +734,8 @@ variable "datadog_agent_enabled" { default = false } -variable "common_producer_iamroles" { +variable "apiary_common_producer_iamroles" { description = "AWS IAM roles allowed read-write access to managed Apiary S3 buckets." type = list(string) default = [] -} \ No newline at end of file +} From 68ce35d4f251b2458d6fa7b24a2ea1cd6e32cd5c Mon Sep 17 00:00:00 2001 From: githubjianli <51385385+githubjianli@users.noreply.github.com> Date: Wed, 3 Apr 2024 12:39:45 -0700 Subject: [PATCH 05/14] fix: added datadog provider source to avoid conflicts (#254) * fix: set datadog provider source * fix: update change log --------- Co-authored-by: janli --- CHANGELOG.md | 4 ++++ s3-other.tf | 2 +- version.tf | 4 ++++ vpc-endpoint-service.tf | 4 ++-- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab20c34..6ee0e14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.2] - 2024-04-03 +### Fixed +- Added provider source for `datadog`. + ## [7.1.1] - 2024-04-03 ### Fixed - Renamed variable from `common_producer_iamroles` to `apiary_common_producer_iamroles` to make the name consistent. diff --git a/s3-other.tf b/s3-other.tf index ef4a60d..1c79b36 100644 --- a/s3-other.tf +++ b/s3-other.tf @@ -7,7 +7,7 @@ resource "aws_s3_bucket" "apiary_inventory_bucket" { count = var.s3_enable_inventory == true ? 1 : 0 bucket = local.s3_inventory_bucket - tags = merge(tomap({"Name"="${local.s3_inventory_bucket}"}), "${var.apiary_tags}") + tags = merge(tomap({"Name"="${local.s3_inventory_bucket}"}), var.apiary_tags) policy = < Date: Wed, 1 May 2024 17:29:12 -0400 Subject: [PATCH 06/14] fix: Add tags to ecs services (#257) Co-authored-by: Eddy Perez --- ecs.tf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ecs.tf b/ecs.tf index f8804df..694ef7f 100644 --- a/ecs.tf +++ b/ecs.tf @@ -65,6 +65,8 @@ resource "aws_ecs_service" "apiary_hms_readwrite_service" { service_registries { registry_arn = aws_service_discovery_service.hms_readwrite[0].arn } + + tags = var.apiary_tags } resource "aws_ecs_service" "apiary_hms_readonly_service" { @@ -90,4 +92,6 @@ resource "aws_ecs_service" "apiary_hms_readonly_service" { service_registries { registry_arn = aws_service_discovery_service.hms_readonly[0].arn } + + tags = var.apiary_tags } From af9efa1aee984f7d1a972f7b72995f2ebafdf955 Mon Sep 17 00:00:00 2001 From: Eddy Decena Date: Thu, 2 May 2024 08:31:36 -0400 Subject: [PATCH 07/14] fix: Update changelog (#258) --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ee0e14..3054d23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.3] - 2024-04-03 +### Fixed +- Add tags to ecs services + ## [7.1.2] - 2024-04-03 ### Fixed - Added provider source for `datadog`. From 35fa9adf8856b98e8df6e3d5713a47c31836ca36 Mon Sep 17 00:00:00 2001 From: ninhomilton Date: Tue, 7 May 2024 10:30:46 -0500 Subject: [PATCH 08/14] Update kubernetes provider version to 2.13.0 (#259) * Update K8s version to 2.29.0 * updated CHANGELOG * Update kubernetes version --------- Co-authored-by: Milton Ortegon --- CHANGELOG.md | 4 ++++ version.tf | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3054d23..789c2f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.4] - 2024-05-06 +### Fixed +- Change provider version for `kubernetes`. + ## [7.1.3] - 2024-04-03 ### Fixed - Add tags to ecs services diff --git a/version.tf b/version.tf index bbddd25..20ca73b 100644 --- a/version.tf +++ b/version.tf @@ -9,7 +9,7 @@ terraform { required_providers { kubernetes = { source = "hashicorp/kubernetes" - version = "~> 2.7.0" + version = "~> 2.13.0" } aws = { source = "hashicorp/aws" From cddd0a5fff0d83ab5d906c2423494762c893e4e3 Mon Sep 17 00:00:00 2001 From: Patrick Duin Date: Wed, 22 May 2024 16:03:50 +0200 Subject: [PATCH 09/14] added var to set copy_tag_to_snapshot. Fixes issue #17 (#260) --- CHANGELOG.md | 4 ++++ VARIABLES.md | 1 + db.tf | 1 + variables.tf | 6 ++++++ 4 files changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 789c2f6..703e049 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.5] - 2024-05-22 +### Fixed +- Add `copy_tags_to_snapshot` to aws_rds_cluster. + ## [7.1.4] - 2024-05-06 ### Fixed - Change provider version for `kubernetes`. diff --git a/VARIABLES.md b/VARIABLES.md index dc25a7c..7b1164b 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -29,6 +29,7 @@ | db\_apply\_immediately | Specifies whether any cluster modifications are applied immediately, or during the next maintenance window. | `bool` | `false` | no | | db\_backup\_retention | The number of days to retain backups for the RDS Metastore DB. | `string` | n/a | yes | | db\_backup\_window | Preferred backup window for the RDS Metastore DB in UTC. | `string` | `"02:00-03:00"` | no | +| db\_copy\_tags\_to\_snapshot | Copy all Cluster tags to snapshots. | `bool` | `true` | no | | db\_enable\_performance\_insights | Enable RDS Performance Insights | `bool` | `false` | no | | db\_enhanced\_monitoring\_interval | RDS monitoring interval (in seconds) for enhanced monitoring. Valid values are 0, 1, 5, 10, 15, 30, 60. Default is 0. | `number` | `0` | no | | db\_instance\_class | Instance type for the RDS Metastore DB. | `string` | n/a | yes | diff --git a/db.tf b/db.tf index 7b080d7..2d9de3f 100644 --- a/db.tf +++ b/db.tf @@ -79,6 +79,7 @@ resource "aws_rds_cluster" "apiary_cluster" { apply_immediately = var.db_apply_immediately db_cluster_parameter_group_name = aws_rds_cluster_parameter_group.apiary_rds_param_group.name storage_encrypted = var.encrypt_db + copy_tags_to_snapshot = var.db_copy_tags_to_snapshot lifecycle { create_before_destroy = true } diff --git a/variables.tf b/variables.tf index 1f48a34..9a3516b 100644 --- a/variables.tf +++ b/variables.tf @@ -268,6 +268,12 @@ variable "db_maintenance_window" { default = "wed:03:00-wed:04:00" } +variable "db_copy_tags_to_snapshot" { + description = "Copy all Cluster tags to snapshots." + type = bool + default = true +} + variable "encrypt_db" { description = "Specifies whether the DB cluster is encrypted" type = bool From 5a30c71d4d80373f9e98d7ea139799e11eb1004c Mon Sep 17 00:00:00 2001 From: githubjianli <51385385+githubjianli@users.noreply.github.com> Date: Fri, 31 May 2024 09:45:11 -0700 Subject: [PATCH 10/14] =?UTF-8?q?feat:=20add=20new=20variable=20apiary=5Fd?= =?UTF-8?q?omain=5Fprivate=5Fzone=20to=20support=20zone=20pri=E2=80=A6=20(?= =?UTF-8?q?#261)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add new variable apiary_domain_private_zone to support zone private or public * fix: remove vpc_id filter * fix: fix hms alias route53 records * fix: fix typo --------- Co-authored-by: janli --- CHANGELOG.md | 4 ++++ VARIABLES.md | 1 + common.tf | 6 +++--- route53.tf | 20 ++++++-------------- variables.tf | 6 ++++++ 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 703e049..03f8197 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.6] - 2024-05-31 +### Added +- Add `apiary_domain_private_zone` to provide option to use private or public zone. + ## [7.1.5] - 2024-05-22 ### Fixed - Add `copy_tags_to_snapshot` to aws_rds_cluster. diff --git a/VARIABLES.md b/VARIABLES.md index 7b1164b..305d8da 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -13,6 +13,7 @@ | apiary\_deny\_iamrole\_actions | List of S3 actions that 'apiary\_deny\_iamroles' are not allowed to perform. | `list(string)` |
[
"s3:Abort*",
"s3:Bypass*",
"s3:Delete*",
"s3:GetObject",
"s3:GetObjectTorrent",
"s3:GetObjectVersion",
"s3:GetObjectVersionTorrent",
"s3:ObjectOwnerOverrideToBucketOwner",
"s3:Put*",
"s3:Replicate*",
"s3:Restore*"
]
| no | | apiary\_deny\_iamroles | AWS IAM roles denied access to Apiary managed S3 buckets. | `list(string)` | `[]` | no | | apiary\_domain\_name | Apiary domain name for Route 53. | `string` | `""` | no | +| apiary\_domain\_private\_zone | Apiary domain private zone 53. | `bool` | `true` | no | | apiary\_governance\_iamroles | AWS IAM governance roles allowed read and tagging access to managed Apiary S3 buckets. | `list(string)` | `[]` | no | | apiary\_log\_bucket | Bucket for Apiary logs.If this is blank, module will create a bucket. | `string` | `""` | no | | apiary\_log\_prefix | Prefix for Apiary logs. | `string` | `""` | no | diff --git a/common.tf b/common.tf index 4990bcd..57df24d 100644 --- a/common.tf +++ b/common.tf @@ -76,9 +76,9 @@ data "aws_vpc" "apiary_vpc" { } data "aws_route53_zone" "apiary_zone" { - count = local.enable_route53_records ? 1 : 0 - name = var.apiary_domain_name - vpc_id = var.vpc_id + count = local.enable_route53_records ? 1 : 0 + name = var.apiary_domain_name + private_zone = var.apiary_domain_private_zone } data "aws_secretsmanager_secret" "datadog_key" { diff --git a/route53.tf b/route53.tf index a994b71..814d8f8 100644 --- a/route53.tf +++ b/route53.tf @@ -8,26 +8,18 @@ resource "aws_route53_record" "hms_readwrite_alias" { count = local.enable_route53_records ? 1 : 0 zone_id = data.aws_route53_zone.apiary_zone[0].zone_id name = "${local.instance_alias}-hms-readwrite" - type = "A" - - alias { - name = aws_lb.apiary_hms_rw_lb[0].dns_name - zone_id = aws_lb.apiary_hms_rw_lb[0].zone_id - evaluate_target_health = true - } + type = "CNAME" + ttl = "300" + records = var.hms_instance_type == "ecs" ? aws_lb.apiary_hms_rw_lb[0].dns_name : kubernetes_service.hms_readwrite[0].status.0.load_balancer.0.ingress.*.hostname } resource "aws_route53_record" "hms_readonly_alias" { count = local.enable_route53_records ? 1 : 0 zone_id = data.aws_route53_zone.apiary_zone[0].zone_id name = "${local.instance_alias}-hms-readonly" - type = "A" - - alias { - name = aws_lb.apiary_hms_ro_lb[0].dns_name - zone_id = aws_lb.apiary_hms_ro_lb[0].zone_id - evaluate_target_health = true - } + type = "CNAME" + ttl = "300" + records = var.hms_instance_type == "ecs" ? aws_lb.apiary_hms_ro_lb[0].dns_name : kubernetes_service.hms_readonly[0].status.0.load_balancer.0.ingress.*.hostname } resource "aws_route53_zone" "apiary" { diff --git a/variables.tf b/variables.tf index 9a3516b..efe75d8 100644 --- a/variables.tf +++ b/variables.tf @@ -21,6 +21,12 @@ variable "apiary_domain_name" { default = "" } +variable "apiary_domain_private_zone" { + description = "Apiary domain zone private" + type = bool + default = true +} + variable "ecs_domain_extension" { description = "Domain name to use for hosted zone created by ECS service discovery." type = string From 39d29ecdf4701b976b0f9d5f0b636541365675a6 Mon Sep 17 00:00:00 2001 From: githubjianli <51385385+githubjianli@users.noreply.github.com> Date: Tue, 4 Jun 2024 10:08:28 -0700 Subject: [PATCH 11/14] fix: fixed iam and service account (#262) * fix: fixing irsa working mode * fix: fix kubernates api * fix: fix api * feat: added aud:sts.amazonaws.com --------- Co-authored-by: janli --- CHANGELOG.md | 5 ++++ iam.tf | 9 ++++--- k8s-cronjobs.tf | 4 +-- k8s-housekeeper.tf | 4 +-- k8s-readonly.tf | 4 +-- k8s-readwrite.tf | 4 +-- k8s-service-accounts.tf | 57 ++++++++++++++++++++++++++++++++++++----- 7 files changed, 70 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03f8197..c6309c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.7] - 2024-06-04 +### Fixed +- Fixed k8s IRSA. +- Changed k8s service account creation to compatible with newer version kubernetes provider.(eks 1.24 and later, create service account no longer create account token automatically) + ## [7.1.6] - 2024-05-31 ### Added - Add `apiary_domain_private_zone` to provide option to use private or public zone. diff --git a/iam.tf b/iam.tf index 903c43d..25df815 100644 --- a/iam.tf +++ b/iam.tf @@ -59,7 +59,8 @@ resource "aws_iam_role" "apiary_hms_readonly" { "Action": "sts:AssumeRoleWithWebIdentity", "Condition": { "StringEquals": { - "${var.oidc_provider}:sub": "system:serviceaccount:${var.metastore_namespace}:${local.hms_alias}-readonly" + "${var.oidc_provider}:sub": "system:serviceaccount:${var.metastore_namespace}:${local.hms_alias}-readonly", + "${var.oidc_provider}:aud": "sts.amazonaws.com" } } }, @@ -109,7 +110,8 @@ resource "aws_iam_role" "apiary_hms_readwrite" { "Action": "sts:AssumeRoleWithWebIdentity", "Condition": { "StringEquals": { - "${var.oidc_provider}:sub": "system:serviceaccount:${var.metastore_namespace}:${local.hms_alias}-readwrite" + "${var.oidc_provider}:sub": "system:serviceaccount:${var.metastore_namespace}:${local.hms_alias}-readwrite", + "${var.oidc_provider}:aud": "sts.amazonaws.com" } } }, @@ -159,7 +161,8 @@ resource "aws_iam_role" "apiary_s3_inventory" { "Action": "sts:AssumeRoleWithWebIdentity", "Condition": { "StringEquals": { - "${var.oidc_provider}:sub": "system:serviceaccount:${var.metastore_namespace}:${local.instance_alias}-s3-inventory" + "${var.oidc_provider}:sub": "system:serviceaccount:${var.metastore_namespace}:${local.instance_alias}-s3-inventory", + "${var.oidc_provider}:aud": "sts.amazonaws.com" } } }, diff --git a/k8s-cronjobs.tf b/k8s-cronjobs.tf index dfc9fee..10c9aa1 100644 --- a/k8s-cronjobs.tf +++ b/k8s-cronjobs.tf @@ -29,12 +29,12 @@ resource "kubernetes_cron_job" "apiary_inventory" { name = "${local.instance_alias}-s3-inventory" } annotations = { - "iam.amazonaws.com/role" = aws_iam_role.apiary_s3_inventory.name + "iam.amazonaws.com/role" = var.oidc_provider == "" ? aws_iam_role.apiary_s3_inventory.name : null } } spec { - service_account_name = kubernetes_service_account.s3_inventory[0].metadata.0.name + service_account_name = kubernetes_service_account_v1.s3_inventory[0].metadata.0.name automount_service_account_token = true container { image = "${var.hms_docker_image}:${var.hms_docker_version}" diff --git a/k8s-housekeeper.tf b/k8s-housekeeper.tf index 55209c5..0f63843 100644 --- a/k8s-housekeeper.tf +++ b/k8s-housekeeper.tf @@ -32,7 +32,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_housekeeper" { "ad.datadoghq.com/${local.hms_alias}-housekeeper.check_names" = var.datadog_metrics_enabled ? "[\"prometheus\"]" : null "ad.datadoghq.com/${local.hms_alias}-housekeeper.init_configs" = var.datadog_metrics_enabled ? "[{}]" : null "ad.datadoghq.com/${local.hms_alias}-housekeeper.instances" = var.datadog_metrics_enabled ? "[{ \"prometheus_url\": \"http://%%host%%:${var.datadog_metrics_port}/actuator/prometheus\", \"namespace\": \"hms_readwrite\", \"metrics\": [ \"${join("\",\"", var.datadog_metrics_hms_readwrite_readonly)}\" ] , \"type_overrides\": { \"${join("\": \"gauge\",\"", var.datadog_metrics_hms_readwrite_readonly)}\": \"gauge\"} }]" : null - "iam.amazonaws.com/role" = aws_iam_role.apiary_hms_readwrite.name + "iam.amazonaws.com/role" = var.oidc_provider == "" ? aws_iam_role.apiary_hms_readwrite.name : null "prometheus.io/path" = "/metrics" "prometheus.io/port" = "8080" "prometheus.io/scrape" = "true" @@ -40,7 +40,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_housekeeper" { } spec { - service_account_name = kubernetes_service_account.hms_readwrite[0].metadata.0.name + service_account_name = kubernetes_service_account_v1.hms_readwrite[0].metadata.0.name automount_service_account_token = true dynamic "init_container" { for_each = var.external_database_host == "" ? ["enabled"] : [] diff --git a/k8s-readonly.tf b/k8s-readonly.tf index e4b4a2a..437b282 100644 --- a/k8s-readonly.tf +++ b/k8s-readonly.tf @@ -32,7 +32,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_readonly" { "ad.datadoghq.com/${local.hms_alias}-readonly.check_names" = var.datadog_metrics_enabled ? "[\"prometheus\"]" : null "ad.datadoghq.com/${local.hms_alias}-readonly.init_configs" = var.datadog_metrics_enabled ? "[{}]" : null "ad.datadoghq.com/${local.hms_alias}-readonly.instances" = var.datadog_metrics_enabled ? "[{ \"prometheus_url\": \"http://%%host%%:${var.datadog_metrics_port}/actuator/prometheus\", \"namespace\": \"hms_readonly\", \"metrics\": [ \"${join("\",\"", var.datadog_metrics_hms_readwrite_readonly)}\" ] , \"type_overrides\": { \"${join("\": \"gauge\",\"", var.datadog_metrics_hms_readwrite_readonly)}\": \"gauge\"} }]" : null - "iam.amazonaws.com/role" = aws_iam_role.apiary_hms_readonly.name + "iam.amazonaws.com/role" = var.oidc_provider == "" ? aws_iam_role.apiary_hms_readonly.name : null "prometheus.io/path" = "/metrics" "prometheus.io/port" = "8080" "prometheus.io/scrape" = "true" @@ -40,7 +40,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_readonly" { } spec { - service_account_name = kubernetes_service_account.hms_readonly[0].metadata.0.name + service_account_name = kubernetes_service_account_v1.hms_readonly[0].metadata.0.name automount_service_account_token = true dynamic "init_container" { for_each = var.external_database_host == "" ? ["enabled"] : [] diff --git a/k8s-readwrite.tf b/k8s-readwrite.tf index 14cce7f..c8fc9d6 100644 --- a/k8s-readwrite.tf +++ b/k8s-readwrite.tf @@ -32,7 +32,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_readwrite" { "ad.datadoghq.com/${local.hms_alias}-readwrite.check_names" = var.datadog_metrics_enabled ? "[\"prometheus\"]" : null "ad.datadoghq.com/${local.hms_alias}-readwrite.init_configs" = var.datadog_metrics_enabled ? "[{}]" : null "ad.datadoghq.com/${local.hms_alias}-readwrite.instances" = var.datadog_metrics_enabled ? "[{ \"prometheus_url\": \"http://%%host%%:${var.datadog_metrics_port}/actuator/prometheus\", \"namespace\": \"hms_readwrite\", \"metrics\": [ \"${join("\",\"", var.datadog_metrics_hms_readwrite_readonly)}\" ] , \"type_overrides\": { \"${join("\": \"gauge\",\"", var.datadog_metrics_hms_readwrite_readonly)}\": \"gauge\"} }]" : null - "iam.amazonaws.com/role" = aws_iam_role.apiary_hms_readwrite.name + "iam.amazonaws.com/role" = var.oidc_provider == "" ? aws_iam_role.apiary_hms_readwrite.name : null "prometheus.io/path" = "/metrics" "prometheus.io/port" = "8080" "prometheus.io/scrape" = "true" @@ -40,7 +40,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_readwrite" { } spec { - service_account_name = kubernetes_service_account.hms_readwrite[0].metadata.0.name + service_account_name = kubernetes_service_account_v1.hms_readwrite[0].metadata.0.name automount_service_account_token = true dynamic "init_container" { for_each = var.external_database_host == "" ? ["enabled"] : [] diff --git a/k8s-service-accounts.tf b/k8s-service-accounts.tf index 4921e9b..c1dba95 100644 --- a/k8s-service-accounts.tf +++ b/k8s-service-accounts.tf @@ -1,4 +1,4 @@ -resource "kubernetes_service_account" "hms_readwrite" { +resource "kubernetes_service_account_v1" "hms_readwrite" { count = var.hms_instance_type == "k8s" ? 1 : 0 metadata { name = "${local.hms_alias}-readwrite" @@ -7,10 +7,25 @@ resource "kubernetes_service_account" "hms_readwrite" { "eks.amazonaws.com/role-arn" = var.oidc_provider == "" ? "" : aws_iam_role.apiary_hms_readwrite.arn } } - automount_service_account_token = true } -resource "kubernetes_service_account" "hms_readonly" { +resource "kubernetes_secret_v1" "hms_readwrite" { + metadata { + name = "${local.hms_alias}-readwrite" + namespace = var.metastore_namespace + annotations = { + "kubernetes.io/service-account.name" ="${local.hms_alias}-readwrite" + "kubernetes.io/service-account.namespace" = var.metastore_namespace + } + } + type = "kubernetes.io/service-account-token" + + depends_on = [ + kubernetes_service_account_v1.hms_readwrite + ] +} + +resource "kubernetes_service_account_v1" "hms_readonly" { count = var.hms_instance_type == "k8s" ? 1 : 0 metadata { name = "${local.hms_alias}-readonly" @@ -19,10 +34,25 @@ resource "kubernetes_service_account" "hms_readonly" { "eks.amazonaws.com/role-arn" = var.oidc_provider == "" ? "" : aws_iam_role.apiary_hms_readonly.arn } } - automount_service_account_token = true } -resource "kubernetes_service_account" "s3_inventory" { +resource "kubernetes_secret_v1" "hms_readonly" { + metadata { + name = "${local.hms_alias}-readonly" + namespace = var.metastore_namespace + annotations = { + "kubernetes.io/service-account.name" ="${local.hms_alias}-readonly" + "kubernetes.io/service-account.namespace" = var.metastore_namespace + } + } + type = "kubernetes.io/service-account-token" + + depends_on = [ + kubernetes_service_account_v1.hms_readonly + ] +} + +resource "kubernetes_service_account_v1" "s3_inventory" { count = var.hms_instance_type == "k8s" ? 1 : 0 metadata { name = "${local.instance_alias}-s3-inventory" @@ -31,5 +61,20 @@ resource "kubernetes_service_account" "s3_inventory" { "eks.amazonaws.com/role-arn" = var.oidc_provider == "" ? "" : aws_iam_role.apiary_s3_inventory.arn } } - automount_service_account_token = true +} + +resource "kubernetes_secret_v1" "s3_inventory" { + metadata { + name = "${local.hms_alias}-s3-inventory" + namespace = var.metastore_namespace + annotations = { + "kubernetes.io/service-account.name" ="${local.hms_alias}-s3-inventory" + "kubernetes.io/service-account.namespace" = var.metastore_namespace + } + } + type = "kubernetes.io/service-account-token" + + depends_on = [ + kubernetes_service_account_v1.s3_inventory + ] } From 885df768d908fce70514f389ddd4ae399c123a25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20S=C3=A1nchez=20Beltr=C3=A1n?= <36443689+javsanbel2@users.noreply.github.com> Date: Wed, 19 Jun 2024 17:43:52 +0200 Subject: [PATCH 12/14] feat: Add additional environment variables in hms-housekeeper (#263) * feat: Add additional environment variables in hms-housekeeper * version --- CHANGELOG.md | 4 ++++ VARIABLES.md | 1 + k8s-housekeeper.tf | 8 ++++++++ variables.tf | 6 ++++++ 4 files changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6309c6..93a697c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.8] - 2024-06-19 +### Added +- `hms_housekeeper_additional_environment_variables` variable to provide ability to add a list of environment variables in `hms-housekeeper` deployment. + ## [7.1.7] - 2024-06-04 ### Fixed - Fixed k8s IRSA. diff --git a/VARIABLES.md b/VARIABLES.md index 305d8da..733345a 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -56,6 +56,7 @@ | external\_database\_host | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified. | `string` | `""` | no | | hive\_metastore\_port | Port on which both Hive Metastore readwrite and readonly will run. | `number` | `9083` | no | | hms\_additional\_environment\_variables | Additional environment variables for the Hive Metastore. | `map(any)` | `{}` | no | +| hms\_housekeeper\_additional\_environment\_variables | Additional environment variables for Hive Housekeeper. | `map(any)` | `{}` | no | | hms\_autogather\_stats | Read-write Hive metastore setting to enable/disable statistics auto-gather on table/partition creation. | `bool` | `true` | no | | hms\_docker\_image | Docker image ID for the Hive Metastore. | `string` | n/a | yes | | hms\_docker\_version | Version of the Docker image for the Hive Metastore. | `string` | n/a | yes | diff --git a/k8s-housekeeper.tf b/k8s-housekeeper.tf index 0f63843..99f3568 100644 --- a/k8s-housekeeper.tf +++ b/k8s-housekeeper.tf @@ -133,6 +133,14 @@ resource "kubernetes_deployment_v1" "apiary_hms_housekeeper" { name = "ENABLE_HIVE_LOCK_HOUSE_KEEPER" value = var.enable_hms_housekeeper ? "true" : "" } + dynamic "env" { + for_each = var.hms_housekeeper_additional_environment_variables + + content { + name = env.key + value = env.value + } + } liveness_probe { tcp_socket { diff --git a/variables.tf b/variables.tf index efe75d8..0da134f 100644 --- a/variables.tf +++ b/variables.tf @@ -690,6 +690,12 @@ variable "hms_additional_environment_variables" { default = {} } +variable "hms_housekeeper_additional_environment_variables" { + description = "Additional environment variables for Hive metastore." + type = map(any) + default = {} +} + variable "datadog_metrics_hms_readwrite_readonly" { description = "HMS metrics to be sent to Datadog." type = list(string) From 5bc8655294855adc2d81735e14b67e984ee72c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20S=C3=A1nchez=20Beltr=C3=A1n?= <36443689+javsanbel2@users.noreply.github.com> Date: Thu, 20 Jun 2024 13:01:52 +0200 Subject: [PATCH 13/14] Changed hms-housekeeper heapsize since it is low memory container (#264) --- CHANGELOG.md | 4 ++++ k8s-housekeeper.tf | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93a697c..48325de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.1.9] - 2024-06-20 +### Fixed +- Housekeeper deployment should not use common `HADOOP_HEAPSIZE` variable since it is a low memory container. + ## [7.1.8] - 2024-06-19 ### Added - `hms_housekeeper_additional_environment_variables` variable to provide ability to add a list of environment variables in `hms-housekeeper` deployment. diff --git a/k8s-housekeeper.tf b/k8s-housekeeper.tf index 99f3568..c2f2f94 100644 --- a/k8s-housekeeper.tf +++ b/k8s-housekeeper.tf @@ -111,7 +111,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_housekeeper" { } env { name = "HADOOP_HEAPSIZE" - value = local.hms_rw_heapsize + value = "1740" } env { name = "AWS_REGION" From f695b43ae9139c59bbb3d659c0d14cc476b0d2e8 Mon Sep 17 00:00:00 2001 From: Georgi Ivanov Date: Wed, 26 Jun 2024 16:59:55 +0100 Subject: [PATCH 14/14] Added capability to choose Connection Pooling driver and specify it's configuration (#265) * Added `hms_ro_datanucleus_connection_pooling_type`, `hms_rw_datanucleus_connection_pooling_type`, `hms_ro_datanucleus_connection_pool_config`, `hms_rw_datanucleus_connection_pool_config`, `hms_housekeeper_db_connection_pool_size` variables to allow specifying the pooling driver and its config * remove check for null for var.hms_ro_datanucleus_connection_pool_config and var.hms_rw_datanucleus_connection_pool_config * updated Changelog --------- Co-authored-by: Georgi Ivanov --- CHANGELOG.md | 4 ++++ k8s-housekeeper.tf | 11 +++++++++++ k8s-readonly.tf | 22 ++++++++++++++++++---- k8s-readwrite.tf | 22 ++++++++++++++++++---- variables.tf | 34 ++++++++++++++++++++++++++++++++-- 5 files changed, 83 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48325de..4569688 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.2.0] - 2024-06-26 +### Added +- Added `hms_ro_datanucleus_connection_pooling_type`, `hms_rw_datanucleus_connection_pooling_type`, `hms_ro_datanucleus_connection_pool_config`, `hms_rw_datanucleus_connection_pool_config`, `hms_housekeeper_db_connection_pool_size` variables to allow specifying the pooling driver and its config + ## [7.1.9] - 2024-06-20 ### Fixed - Housekeeper deployment should not use common `HADOOP_HEAPSIZE` variable since it is a low memory container. diff --git a/k8s-housekeeper.tf b/k8s-housekeeper.tf index c2f2f94..1fe2259 100644 --- a/k8s-housekeeper.tf +++ b/k8s-housekeeper.tf @@ -133,6 +133,17 @@ resource "kubernetes_deployment_v1" "apiary_hms_housekeeper" { name = "ENABLE_HIVE_LOCK_HOUSE_KEEPER" value = var.enable_hms_housekeeper ? "true" : "" } + + env { + name = "DATANUCLEUS_CONNECTION_POOLING_TYPE" + value = var.hms_rw_datanucleus_connection_pooling_type + } + + env { + name = "DATANUCLEUS_CONNECTION_POOL_MAX_POOLSIZE" + value = var.hms_housekeeper_db_connection_pool_size + } + dynamic "env" { for_each = var.hms_housekeeper_additional_environment_variables diff --git a/k8s-readonly.tf b/k8s-readonly.tf index 437b282..9d5e438 100644 --- a/k8s-readonly.tf +++ b/k8s-readonly.tf @@ -174,10 +174,6 @@ resource "kubernetes_deployment_v1" "apiary_hms_readonly" { name = "HMS_MAX_THREADS" value = local.hms_ro_maxthreads } - env { - name = "MYSQL_CONNECTION_POOL_SIZE" - value = var.hms_ro_db_connection_pool_size - } env { name = "HMS_AUTOGATHER_STATS" value = "false" @@ -186,6 +182,15 @@ resource "kubernetes_deployment_v1" "apiary_hms_readonly" { name = "LIMIT_PARTITION_REQUEST_NUMBER" value = var.hms_ro_request_partition_limit == "" ? "" : var.hms_ro_request_partition_limit } + env { + name = "DATANUCLEUS_CONNECTION_POOLING_TYPE" + value = var.hms_ro_datanucleus_connection_pooling_type + } + env { + name = "DATANUCLEUS_CONNECTION_POOL_MAX_POOLSIZE" + value = var.hms_ro_db_connection_pool_size + } + dynamic "env" { for_each = var.hms_additional_environment_variables @@ -195,6 +200,15 @@ resource "kubernetes_deployment_v1" "apiary_hms_readonly" { } } + dynamic "env" { + for_each = var.hms_ro_datanucleus_connection_pool_config + + content { + name = env.key + value = env.value + } + } + liveness_probe { tcp_socket { port = var.hive_metastore_port diff --git a/k8s-readwrite.tf b/k8s-readwrite.tf index c8fc9d6..fca0bf5 100644 --- a/k8s-readwrite.tf +++ b/k8s-readwrite.tf @@ -214,10 +214,6 @@ resource "kubernetes_deployment_v1" "apiary_hms_readwrite" { name = "DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES" value = var.disallow_incompatible_col_type_changes } - env { - name = "MYSQL_CONNECTION_POOL_SIZE" - value = var.hms_rw_db_connection_pool_size - } env { name = "HMS_AUTOGATHER_STATS" value = var.hms_autogather_stats @@ -226,6 +222,15 @@ resource "kubernetes_deployment_v1" "apiary_hms_readwrite" { name = "LIMIT_PARTITION_REQUEST_NUMBER" value = var.hms_rw_request_partition_limit == "" ? "" : var.hms_rw_request_partition_limit } + env { + name = "DATANUCLEUS_CONNECTION_POOLING_TYPE" + value = var.hms_rw_datanucleus_connection_pooling_type + } + env { + name = "DATANUCLEUS_CONNECTION_POOL_MAX_POOLSIZE" + value = var.hms_rw_db_connection_pool_size + } + dynamic "env" { for_each = var.hms_additional_environment_variables @@ -235,6 +240,15 @@ resource "kubernetes_deployment_v1" "apiary_hms_readwrite" { } } + dynamic "env" { + for_each = var.hms_rw_datanucleus_connection_pool_config + + content { + name = env.key + value = env.value + } + } + liveness_probe { tcp_socket { port = var.hive_metastore_port diff --git a/variables.tf b/variables.tf index 0da134f..d11bc30 100644 --- a/variables.tf +++ b/variables.tf @@ -661,17 +661,23 @@ variable "hms_autogather_stats" { } variable "hms_ro_db_connection_pool_size" { - description = "Read-only Hive metastore setting for size of the MySQL connection pool. Default is 10." + description = "Read-only Hive metastore setting for max size of the MySQL connection pool. Default is 10." type = number default = 10 } variable "hms_rw_db_connection_pool_size" { - description = "Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10." + description = "Read-write Hive metastore setting for max size of the MySQL connection pool. Default is 10." type = number default = 10 } +variable "hms_housekeeper_db_connection_pool_size" { + description = "HMS Housekeeper setting for max size of the MySQL connection pool. Default is 5." + type = number + default = 5 +} + variable "db_enable_performance_insights" { description = "Enable RDS Performance Insights" type = bool @@ -757,3 +763,27 @@ variable "apiary_common_producer_iamroles" { type = list(string) default = [] } + +variable "hms_ro_datanucleus_connection_pooling_type" { + description = "The Datanucleus connection pool type: Valid types are BoneCP, HikariCP, c3p0, dbcp, dbcp2" + type = string + default = "HikariCP" +} + +variable "hms_rw_datanucleus_connection_pooling_type" { + description = "The Datanucleus connection pool type: Valid types are BoneCP, HikariCP, c3p0, dbcp, dbcp2" + type = string + default = "HikariCP" +} + +variable "hms_ro_datanucleus_connection_pool_config" { + description = "A map of env vars supported by Apiary docker image that can configure the chosen Datanucleus connection pool" + type = map(any) + default = {} +} + +variable "hms_rw_datanucleus_connection_pool_config" { + description = "A map of env vars supported by Apiary docker image that can configure the chosen Datanucleus connection pool" + type = map(any) + default = {} +} \ No newline at end of file