diff --git a/CHANGELOG.md b/CHANGELOG.md index 4454601..e1fc022 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [1.14.0] - 2020-05-11 +### Added +- Enable caller to set min and max size of the Hive metastore thread pool. If not set, defaults to 200/1000 (Hive defaults). + ## [1.13.0] - 2020-04-21 ### Added - If S3 access logs are enabled in `apiary-data-lake`, create Hive database `s3_logs_hive` on startup. Raw logs go to bucket `-s3-logs` and Hive Parquet data to bucket `-s3-logs-hive`. This is pre-work to prepare for S3 access-log Hive tables in a future version of Apiary. diff --git a/README.md b/README.md index 5111a8b..43b7416 100644 --- a/README.md +++ b/README.md @@ -8,31 +8,33 @@ For more information please refer to the main [Apiary](https://github.com/Expedi |----|----|----| |APIARY_S3_INVENTORY_PREFIX|No (defaults to `EntireBucketDaily`)|Prefix used by S3 Inventory when creating data in the inventory bucket.| |APIARY_S3_INVENTORY_TABLE_FORMAT|No (defaults to `ORC`)|Format of S3 inventory data - `ORC`, `Parquet`, or `CSV`| -|AWS_REGION|Yes|AWS region to configure various AWS clients.| |ATLAS_KAFKA_BOOTSTRAP_SERVERS|No|Atlas hive-bridge kafka bootstrap servers.| -|ENABLE_METRICS|No|Option to enable sending Hive Metastore metrics to CloudWatch.| +|AWS_REGION|Yes|AWS region to configure various AWS clients.| |ENABLE_GLUESYNC|No|Option to turn on GlueSync Hive Metastore listener.| +|ENABLE_METRICS|No|Option to enable sending Hive Metastore metrics to CloudWatch.| |ENABLE_S3_INVENTORY|No|Option to create Hive tables on top of S3 inventory data if enabled in `apiary-data-lake`. Enabled if value is not null/empty.| |ENABLE_S3_LOGS|No|Option to create Hive tables on top of S3 access logs data if enabled in `apiary-data-lake`. Enabled if value is not null/empty.| |EXTERNAL_DATABASE|No|Option to enable external database mode, when specified it disables managing Hive Metastore MySQL database schema.| |GLUE_PREFIX|No|Prefix added to Glue databases to handle database name collisions when synchronizing multiple Hive Metastores to the Glue catalog.| |HADOOP_HEAPSIZE|No|Hive Metastore Java process heapsize.| -|RANGER_POLICY_MANAGER_URL|No|Ranger admin URL from where policies will be downloaded.| -|RANGER_SERVICE_NAME|No|Ranger service name used to configure RangerAuth plugin.| -|RANGER_AUDIT_DB_URL|No|Ranger audit database JDBC URL.| -|RANGER_AUDIT_SECRET_ARN|No|Ranger audit database secret ARN.| -|RANGER_AUDIT_SOLR_URL|No|Ranger Solr audit URL.| -|LDAP_URL|No|Active Directory URL to enable group mapping in metastore.| -|LDAP_BASE|No|LDAP base DN used to search for user groups.| -|LDAP_SECRET_ARN|No|LDAP bind DN SecretsManager secret ARN.| -|LDAP_CA_CERT|Base64 encoded Certificate Authority Bundle to validate LDAP SSL connection.| |HIVE_METASTORE_ACCESS_MODE|No|Hive Metastore access mode, applicable values are: readwrite, readonly| |HIVE_DB_NAMES|No|comma separated list of Hive database names, when specified Hive databases will be created and mapped to corresponding S3 buckets.| |HIVE_METASTORE_LOG_LEVEL|No|Hive Metastore service Log4j log level.| +|HMS_MIN_THREADS|No (defaults to `200`)|Minimum size of the Hive metastore thread pool.| +|HMS_MAX_THREADS|No (defaults to `1000`)|Maximum size of the Hive metastore thread pool.| |INSTANCE_NAME|Yes|Apiary instance name, will be used as prefix on most AWS resources to allow multiple Apiary instance deployments.| +|LDAP_BASE|No|LDAP base DN used to search for user groups.| +|LDAP_CA_CERT|Base64 encoded Certificate Authority Bundle to validate LDAP SSL connection.| +|LDAP_SECRET_ARN|No|LDAP bind DN SecretsManager secret ARN.| +|LDAP_URL|No|Active Directory URL to enable group mapping in metastore.| |MYSQL_DB_HOST|Yes|Hive Metastore MySQL database hostname.| |MYSQL_DB_NAME|Yes|Hive Metastore MySQL database name.| |MYSQL_SECRET_ARN|Yes|Hive Metastore MySQL SecretsManager secret ARN.| +|RANGER_AUDIT_DB_URL|No|Ranger audit database JDBC URL.| +|RANGER_AUDIT_SECRET_ARN|No|Ranger audit database secret ARN.| +|RANGER_AUDIT_SOLR_URL|No|Ranger Solr audit URL.| +|RANGER_POLICY_MANAGER_URL|No|Ranger admin URL from where policies will be downloaded.| +|RANGER_SERVICE_NAME|No|Ranger service name used to configure RangerAuth plugin.| |SNS_ARN|No|The SNS topic ARN to which metadata updates will be sent.| |TABLE_PARAM_FILTER|No|A regular expression for selecting necessary table parameters. If the value isn't set, then no table parameters are selected.| diff --git a/files/hive-site.xml b/files/hive-site.xml index c833408..8070ab1 100644 --- a/files/hive-site.xml +++ b/files/hive-site.xml @@ -48,6 +48,16 @@ false + + hive.metastore.server.min.threads + 200 + + + + hive.metastore.server.max.threads + 1000 + + hive.service.metrics.class org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics diff --git a/files/startup.sh b/files/startup.sh index bf15077..c5a580e 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -7,6 +7,15 @@ set -x [[ -z "$MYSQL_DB_USERNAME" ]] && export MYSQL_DB_USERNAME=$(aws secretsmanager get-secret-value --secret-id ${MYSQL_SECRET_ARN}|jq .SecretString -r|jq .username -r) [[ -z "$MYSQL_DB_PASSWORD" ]] && export MYSQL_DB_PASSWORD=$(aws secretsmanager get-secret-value --secret-id ${MYSQL_SECRET_ARN}|jq .SecretString -r|jq .password -r) + +#config Hive min/max thread pool size. Terraform will set the env var based on size of memory +if [[ -n ${HMS_MIN_THREADS} ]]; then + update_property.py hive.metastore.server.min.threads "${HMS_MIN_THREADS}" /etc/hive/conf/hive-site.xml +fi +if [[ -n ${HMS_MAX_THREADS} ]]; then + update_property.py hive.metastore.server.max.threads "${HMS_MAX_THREADS}" /etc/hive/conf/hive-site.xml +fi + #configure LDAP group mapping, required for ranger authorization if [[ -n $LDAP_URL ]] ; then update_property.py hadoop.security.group.mapping.ldap.bind.user "$(aws secretsmanager get-secret-value --secret-id ${LDAP_SECRET_ARN}|jq .SecretString -r|jq .username -r)" /etc/hadoop/conf/core-site.xml