From 50decbfa4302a720ddb9a8141c4e28ddfe731d70 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Thu, 1 Feb 2024 02:29:37 +0000 Subject: [PATCH 01/18] Sending it via JMX --- files/hive-site.xml | 2 +- files/startup.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/files/hive-site.xml b/files/hive-site.xml index c4e271c..bb0c835 100644 --- a/files/hive-site.xml +++ b/files/hive-site.xml @@ -50,7 +50,7 @@ hive.metastore.metrics.enabled - false + true diff --git a/files/startup.sh b/files/startup.sh index c888e73..a085e13 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -89,7 +89,7 @@ if [ -n "$ENABLE_METRICS" ]; then if [ -n "$ECS_CONTAINER_METADATA_URI" ]; then export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') - update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml + #update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml fi #enable prometheus jmx agent when running on kubernetes if [ -n "$KUBERNETES_SERVICE_HOST" ]; then From 0a7ad4e49b093fb93bd9faeb4697daddb77efa95 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Thu, 1 Feb 2024 02:43:24 +0000 Subject: [PATCH 02/18] Sending it via JMX --- files/startup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/files/startup.sh b/files/startup.sh index e8259b5..e0bfd02 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -94,6 +94,7 @@ if [ -n "$ENABLE_METRICS" ]; then export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') #update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml + fi #enable prometheus jmx agent when running on kubernetes if [ -n "$KUBERNETES_SERVICE_HOST" ]; then From 0cc7b30025f519e638f96fa6768a0ccb55f52158 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Thu, 1 Feb 2024 02:58:27 +0000 Subject: [PATCH 03/18] Sending it via JMX --- files/startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/startup.sh b/files/startup.sh index e0bfd02..7bdc1c8 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -93,7 +93,7 @@ if [ -n "$ENABLE_METRICS" ]; then export EXPORTER_OPTS="-javaagent:/usr/lib/apiary/jmx_prometheus_javaagent-${EXPORTER_VERSION}.jar=8080:/etc/hive/conf/jmx-exporter.yaml" export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') - #update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml + update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml fi #enable prometheus jmx agent when running on kubernetes From fd8b906f432f575103c716dd328e79a6fdc4660b Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Mon, 5 Feb 2024 15:03:58 +0000 Subject: [PATCH 04/18] Added rules * and printing output of curl --- files/jmx-exporter.yaml | 2 ++ files/startup.sh | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/files/jmx-exporter.yaml b/files/jmx-exporter.yaml index c956187..7f9ee67 100644 --- a/files/jmx-exporter.yaml +++ b/files/jmx-exporter.yaml @@ -4,3 +4,5 @@ ssl: false lowercaseOutputName: true lowercaseOutputLabelNames: true attrNameSnakeCase: true +rules: + - pattern: ".*" \ No newline at end of file diff --git a/files/startup.sh b/files/startup.sh index 7bdc1c8..ed4125f 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -93,6 +93,8 @@ if [ -n "$ENABLE_METRICS" ]; then export EXPORTER_OPTS="-javaagent:/usr/lib/apiary/jmx_prometheus_javaagent-${EXPORTER_VERSION}.jar=8080:/etc/hive/conf/jmx-exporter.yaml" export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') + + #this is populating something in 8080 update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml fi @@ -215,3 +217,8 @@ fi export HADOOP_OPTS="-XshowSettings:vm -Xms${HADOOP_HEAPSIZE}m $EXPORTER_OPTS" su hive -s/bin/bash -c "/usr/lib/hive/bin/hive --service metastore --hiveconf javax.jdo.option.ConnectionURL=jdbc:mysql://${MYSQL_DB_HOST}:3306/${MYSQL_DB_NAME} --hiveconf javax.jdo.option.ConnectionUserName='${MYSQL_DB_USERNAME}' --hiveconf javax.jdo.option.ConnectionPassword='${MYSQL_DB_PASSWORD}'" + + +curl -v localhost:8080/metrics + + From 589464231d138a7f454dd631cddea3a5221207bd Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Mon, 5 Feb 2024 15:04:18 +0000 Subject: [PATCH 05/18] Added rules * and printing output of curl --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index ca72ea3..a7ef369 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,7 @@ RUN yum -y install java-1.8.0-openjdk \ wget \ zip \ unzip \ + curl \ jq \ tar \ net-tools \ From 50144df4143025b66fe7ef812e2fc39578570866 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Mon, 5 Feb 2024 15:22:03 +0000 Subject: [PATCH 06/18] Added rules * and printing output of curl --- files/startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/startup.sh b/files/startup.sh index ed4125f..befb685 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -218,7 +218,7 @@ fi export HADOOP_OPTS="-XshowSettings:vm -Xms${HADOOP_HEAPSIZE}m $EXPORTER_OPTS" su hive -s/bin/bash -c "/usr/lib/hive/bin/hive --service metastore --hiveconf javax.jdo.option.ConnectionURL=jdbc:mysql://${MYSQL_DB_HOST}:3306/${MYSQL_DB_NAME} --hiveconf javax.jdo.option.ConnectionUserName='${MYSQL_DB_USERNAME}' --hiveconf javax.jdo.option.ConnectionPassword='${MYSQL_DB_PASSWORD}'" - +echo "This is the curl command" curl -v localhost:8080/metrics From a4d50761aa6104fcda84919fd7392b3d58dab658 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Mon, 5 Feb 2024 15:57:31 +0000 Subject: [PATCH 07/18] 9083 container port scraping --- files/startup.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/files/startup.sh b/files/startup.sh index befb685..3fd424e 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -96,7 +96,8 @@ if [ -n "$ENABLE_METRICS" ]; then #this is populating something in 8080 update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml - + echo "This is the curl command" + curl -v localhost:8080/metrics fi #enable prometheus jmx agent when running on kubernetes if [ -n "$KUBERNETES_SERVICE_HOST" ]; then @@ -218,7 +219,4 @@ fi export HADOOP_OPTS="-XshowSettings:vm -Xms${HADOOP_HEAPSIZE}m $EXPORTER_OPTS" su hive -s/bin/bash -c "/usr/lib/hive/bin/hive --service metastore --hiveconf javax.jdo.option.ConnectionURL=jdbc:mysql://${MYSQL_DB_HOST}:3306/${MYSQL_DB_NAME} --hiveconf javax.jdo.option.ConnectionUserName='${MYSQL_DB_USERNAME}' --hiveconf javax.jdo.option.ConnectionPassword='${MYSQL_DB_PASSWORD}'" -echo "This is the curl command" -curl -v localhost:8080/metrics - From 573e2dd0074270e566bb1a81f519bb6de9bdedcc Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Mon, 5 Feb 2024 19:31:13 +0000 Subject: [PATCH 08/18] HADOOP_CLIENT_OPTS added on 8080 --- files/startup.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/files/startup.sh b/files/startup.sh index 3fd424e..a40491a 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -89,6 +89,8 @@ if [ -n "$ENABLE_METRICS" ]; then #configure to send metrics to cloudwatch when running on ECS if [ -n "$ECS_CONTAINER_METADATA_URI" ]; then + export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=8088" + # enable prometheus jmx agent when running on ECS export EXPORTER_OPTS="-javaagent:/usr/lib/apiary/jmx_prometheus_javaagent-${EXPORTER_VERSION}.jar=8080:/etc/hive/conf/jmx-exporter.yaml" export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" From 2e936aa230d4abdc2d14372d32b749b5a6ddaa5a Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Mon, 5 Feb 2024 20:05:38 +0000 Subject: [PATCH 09/18] not updating the property for cloudwatch --- files/startup.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/files/startup.sh b/files/startup.sh index a40491a..db03722 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -99,7 +99,10 @@ if [ -n "$ENABLE_METRICS" ]; then #this is populating something in 8080 update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml echo "This is the curl command" - curl -v localhost:8080/metrics + curl -v localhost:8080/metrics 2>&1 > curl_output.txt + cat curl_output.txt + echo "cat curl_output.txt" + fi #enable prometheus jmx agent when running on kubernetes if [ -n "$KUBERNETES_SERVICE_HOST" ]; then From ec5c84b2e64503e330e508c15919471c3b5ab704 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Mon, 5 Feb 2024 20:06:01 +0000 Subject: [PATCH 10/18] not updating the property for cloudwatch --- files/startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/startup.sh b/files/startup.sh index db03722..46efd56 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -97,7 +97,7 @@ if [ -n "$ENABLE_METRICS" ]; then export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') #this is populating something in 8080 - update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml + #update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml echo "This is the curl command" curl -v localhost:8080/metrics 2>&1 > curl_output.txt cat curl_output.txt From f1f7857180ba66098af9e58133c330f978c07058 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Mon, 5 Feb 2024 23:47:36 +0000 Subject: [PATCH 11/18] Pushing metrics now --- Dockerfile | 1 - files/jmx-exporter.yaml | 2 +- files/startup.sh | 14 ++------------ 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index a7ef369..ca72ea3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,6 @@ RUN yum -y install java-1.8.0-openjdk \ wget \ zip \ unzip \ - curl \ jq \ tar \ net-tools \ diff --git a/files/jmx-exporter.yaml b/files/jmx-exporter.yaml index 7f9ee67..005cda6 100644 --- a/files/jmx-exporter.yaml +++ b/files/jmx-exporter.yaml @@ -5,4 +5,4 @@ lowercaseOutputName: true lowercaseOutputLabelNames: true attrNameSnakeCase: true rules: - - pattern: ".*" \ No newline at end of file + - pattern: ".*" diff --git a/files/startup.sh b/files/startup.sh index 46efd56..0c8412c 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -86,23 +86,15 @@ fi if [ -n "$ENABLE_METRICS" ]; then update_property.py hive.metastore.metrics.enabled true /etc/hive/conf/hive-site.xml - #configure to send metrics to cloudwatch when running on ECS if [ -n "$ECS_CONTAINER_METADATA_URI" ]; then - - export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=8088" - # enable prometheus jmx agent when running on ECS + export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=8088" export EXPORTER_OPTS="-javaagent:/usr/lib/apiary/jmx_prometheus_javaagent-${EXPORTER_VERSION}.jar=8080:/etc/hive/conf/jmx-exporter.yaml" export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') - #this is populating something in 8080 + # Enable it to send metrics to Cloudwatch #update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml - echo "This is the curl command" - curl -v localhost:8080/metrics 2>&1 > curl_output.txt - cat curl_output.txt - echo "cat curl_output.txt" - fi #enable prometheus jmx agent when running on kubernetes if [ -n "$KUBERNETES_SERVICE_HOST" ]; then @@ -223,5 +215,3 @@ fi export HADOOP_OPTS="-XshowSettings:vm -Xms${HADOOP_HEAPSIZE}m $EXPORTER_OPTS" su hive -s/bin/bash -c "/usr/lib/hive/bin/hive --service metastore --hiveconf javax.jdo.option.ConnectionURL=jdbc:mysql://${MYSQL_DB_HOST}:3306/${MYSQL_DB_NAME} --hiveconf javax.jdo.option.ConnectionUserName='${MYSQL_DB_USERNAME}' --hiveconf javax.jdo.option.ConnectionPassword='${MYSQL_DB_PASSWORD}'" - - From 330fe1b29e3f3d778ef020d98eee15bb0937d3d2 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Tue, 6 Feb 2024 00:05:09 +0000 Subject: [PATCH 12/18] all metrics populated --- files/hive-site.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/hive-site.xml b/files/hive-site.xml index bb0c835..c4e271c 100644 --- a/files/hive-site.xml +++ b/files/hive-site.xml @@ -50,7 +50,7 @@ hive.metastore.metrics.enabled - true + false From 21b0915fc27dac0f2d964545751ea8ee1eb8799f Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Tue, 6 Feb 2024 00:11:02 +0000 Subject: [PATCH 13/18] Changelog and comments. --- CHANGELOG.md | 4 ++++ files/startup.sh | 9 ++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 602f13c..ff8fb28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -142,6 +142,10 @@ This is pre-work to prepare for Ranger access-log Hive tables in a future versio inventory tables in the inventory database (if S3 inventory is enabled). The intent is to run the image this way on a scheduled basis in Kubernetes after AWS creates new inventory partition files in S3 each day. +## [1.11.0] - 2024-02-06 +### Added +- Enable Prometheus exporter when running on ECS instead of sending metrics to CloudWatch. + ## [1.10.0] - 2020-03-16 ### Changed - Updated `apiary-metastore-listener` and `kafka-metastore-listener` versions to `6.0.0` (was `5.0.2`). diff --git a/files/startup.sh b/files/startup.sh index 0c8412c..453f0bb 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -87,13 +87,16 @@ fi if [ -n "$ENABLE_METRICS" ]; then update_property.py hive.metastore.metrics.enabled true /etc/hive/conf/hive-site.xml if [ -n "$ECS_CONTAINER_METADATA_URI" ]; then - # enable prometheus jmx agent when running on ECS + # This line enables JMX (Java Management Extensions) on Hadoop clients, allowing for remote monitoring and management of JVM-related metrics and operations. + # source: https://docs.datadoghq.com/integrations/hive/?tab=containerized export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=8088" + + # This line configures the JMX Prometheus exporter, enabling the collection of JMX metrics from the JVM and their exposure in Prometheus format for integration with monitoring systems. export EXPORTER_OPTS="-javaagent:/usr/lib/apiary/jmx_prometheus_javaagent-${EXPORTER_VERSION}.jar=8080:/etc/hive/conf/jmx-exporter.yaml" - export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" - export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') # Enable it to send metrics to Cloudwatch + export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" + export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') #update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml fi #enable prometheus jmx agent when running on kubernetes From 1aea508afa30383aec491a9637f45b71822d1017 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Tue, 6 Feb 2024 09:46:27 +0000 Subject: [PATCH 14/18] Changelog and comments. --- CHANGELOG.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff8fb28..9f57d02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [3.1.0] - 2024-02-06 +### Added +- Enables JMX (Java Management Extensions) on Hadoop clients, allowing for remote monitoring and management of JVM-related metrics +- Stop sending metrics to CloudWatch. + ## [3.0.17] - 2024-01-31 ### Added - Enable prometheus jmx agent when running on ECS by exporting `EXPORTER_OPTS` @@ -142,10 +147,6 @@ This is pre-work to prepare for Ranger access-log Hive tables in a future versio inventory tables in the inventory database (if S3 inventory is enabled). The intent is to run the image this way on a scheduled basis in Kubernetes after AWS creates new inventory partition files in S3 each day. -## [1.11.0] - 2024-02-06 -### Added -- Enable Prometheus exporter when running on ECS instead of sending metrics to CloudWatch. - ## [1.10.0] - 2020-03-16 ### Changed - Updated `apiary-metastore-listener` and `kafka-metastore-listener` versions to `6.0.0` (was `5.0.2`). From d008d1db6d101e4e365b456948991f24287c9296 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Tue, 6 Feb 2024 14:21:21 +0000 Subject: [PATCH 15/18] Removed cloudwatch completely --- files/startup.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/files/startup.sh b/files/startup.sh index 453f0bb..771e9a0 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -94,9 +94,9 @@ if [ -n "$ENABLE_METRICS" ]; then # This line configures the JMX Prometheus exporter, enabling the collection of JMX metrics from the JVM and their exposure in Prometheus format for integration with monitoring systems. export EXPORTER_OPTS="-javaagent:/usr/lib/apiary/jmx_prometheus_javaagent-${EXPORTER_VERSION}.jar=8080:/etc/hive/conf/jmx-exporter.yaml" - # Enable it to send metrics to Cloudwatch - export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" - export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') + # Uncomment the below lines to send metrics to Cloudwatch + #export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" + #export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') #update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml fi #enable prometheus jmx agent when running on kubernetes From 81c530e8f5fb068b819f7b16f3df763fa922ed49 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Tue, 6 Feb 2024 14:54:01 +0000 Subject: [PATCH 16/18] Update as per comments --- files/startup.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/files/startup.sh b/files/startup.sh index 771e9a0..0f01587 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -83,7 +83,7 @@ if [[ -n $RANGER_AUDIT_DB_URL ]]; then fi fi - +# Enables metrics for Hive Metastore and configures Prometheus JMX exporter for monitoring in ECS and Kubernetes environments. if [ -n "$ENABLE_METRICS" ]; then update_property.py hive.metastore.metrics.enabled true /etc/hive/conf/hive-site.xml if [ -n "$ECS_CONTAINER_METADATA_URI" ]; then @@ -93,11 +93,6 @@ if [ -n "$ENABLE_METRICS" ]; then # This line configures the JMX Prometheus exporter, enabling the collection of JMX metrics from the JVM and their exposure in Prometheus format for integration with monitoring systems. export EXPORTER_OPTS="-javaagent:/usr/lib/apiary/jmx_prometheus_javaagent-${EXPORTER_VERSION}.jar=8080:/etc/hive/conf/jmx-exporter.yaml" - - # Uncomment the below lines to send metrics to Cloudwatch - #export CLOUDWATCH_NAMESPACE="${INSTANCE_NAME}-metastore" - #export ECS_TASK_ID=$(wget -q -O - ${ECS_CONTAINER_METADATA_URI}/task|jq -r .TaskARN|awk -F/ '{ print $NF }') - #update_property.py hive.service.metrics.class com.expediagroup.apiary.extensions.metastore.metrics.CodahaleMetrics /etc/hive/conf/hive-site.xml fi #enable prometheus jmx agent when running on kubernetes if [ -n "$KUBERNETES_SERVICE_HOST" ]; then From de0f21ea7692cd9f48b833e1a01efa6fc1b854d1 Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Tue, 6 Feb 2024 15:28:34 +0000 Subject: [PATCH 17/18] Update changelog --- CHANGELOG.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f57d02..5e67559 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [3.1.0] - 2024-02-06 +## [4.0.0] - 2024-02-06 ### Added -- Enables JMX (Java Management Extensions) on Hadoop clients, allowing for remote monitoring and management of JVM-related metrics -- Stop sending metrics to CloudWatch. +- Enables JMX (Java Management Extensions) on Hadoop clients, allowing for remote monitoring and management of JVM-related metrics +### Removed +- CloudWatch metrics in favour of JMX Prometheus Exporter. ## [3.0.17] - 2024-01-31 ### Added From 3a9baf9d603af2da851eb32679a771950cde716b Mon Sep 17 00:00:00 2001 From: Dhrubajyoti Sadhu Date: Tue, 6 Feb 2024 16:18:49 +0000 Subject: [PATCH 18/18] Update Readme.md file --- README.md | 86 +++++++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index e6a01e4..1d5956e 100644 --- a/README.md +++ b/README.md @@ -4,50 +4,50 @@ For more information please refer to the main [Apiary](https://github.com/ExpediaGroup/apiary) project page. ## Environment Variables -|Environment Variable|Required|Description| -|----|----|----| -|APIARY_S3_INVENTORY_PREFIX|No (defaults to `EntireBucketDaily`)|Prefix used by S3 Inventory when creating data in the inventory bucket.| -|APIARY_S3_INVENTORY_TABLE_FORMAT|No (defaults to `ORC`)|Format of S3 inventory data - `ORC`, `Parquet`, or `CSV`| -|APIARY_SYSTEM_SCHEMA|No (defaults to `apiary_system`)|Name for internal system database.| -|AWS_REGION|Yes|AWS region to configure various AWS clients.| -|AWS_WEB_IDENTITY_TOKEN_FILE|No|Path of the AWS Web Identity Token File for IRSA/OIDC AWS authentication.| -|DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES|No|`true`/`false` value for hive.metastore.disallow.incompatible.col.type.changes, default `true`.| -|ENABLE_GLUESYNC|No|Option to turn on GlueSync Hive Metastore listener.| -|ENABLE_HIVE_LOCK_HOUSE_KEEPER|No|Option to turn on Hive Metastore Hive Lock House Keeper.| -|ENABLE_METRICS|No|Option to enable sending Hive Metastore metrics to CloudWatch.| -|ENABLE_S3_INVENTORY|No|Option to create Hive tables on top of S3 inventory data if enabled in `apiary-data-lake`. Enabled if value is not null/empty.| -|ENABLE_S3_LOGS|No|Option to create Hive tables on top of S3 access logs data if enabled in `apiary-data-lake`. Enabled if value is not null/empty.| -|EXTERNAL_DATABASE|No|Option to enable external database mode, when specified it disables managing Hive Metastore MySQL database schema.| -|GLUE_PREFIX|No|Prefix added to Glue databases to handle database name collisions when synchronizing multiple Hive Metastores to the Glue catalog.| -|HADOOP_HEAPSIZE|No|Hive Metastore Java process heapsize.| -|HMS_AUTOGATHER_STATS|No (default is `true`)|Whether or not to create basic statistics on table/partition creation. Valid values are `true` or `false`.| -|LIMIT_PARTITION_REQUEST_NUMBER|No (default is `-1`)|To protect the cluster, this controls how many partitions can be scanned for each partitioned table. The default value "-1" means no limit. The limit on partitions does not affect metadata-only queries.| -|HIVE_METASTORE_ACCESS_MODE|No|Hive Metastore access mode, applicable values are: readwrite, readonly| -|HIVE_DB_NAMES|No|comma separated list of Hive database names, when specified Hive databases will be created and mapped to corresponding S3 buckets.| -|HIVE_METASTORE_LOG_LEVEL|No|Hive Metastore service Log4j log level.| -|HMS_MIN_THREADS|No (defaults to `200`)|Minimum size of the Hive metastore thread pool.| -|HMS_MAX_THREADS|No (defaults to `1000`)|Maximum size of the Hive metastore thread pool.| -|INSTANCE_NAME|Yes|Apiary instance name, will be used as prefix on most AWS resources to allow multiple Apiary instance deployments.| -|KAFKA_BOOTSTRAP_SERVERS|No|Kafka Bootstrap Servers to enable Kafka Metastore listener and send Metastore events to Kafka.| -|KAFKA_CLIENT_ID|No|Kafka label you define that names the Kafka producer.| -|KAFKA_COMPRESSION_TYPE|No (defaults to `1048576`)|The maximum size of a request in bytes. This setting will limit the number of record batches the producer will send in a single request to avoid sending huge requests. This is also effectively a cap on the maximum uncompressed record batch size.| -|KAFKA_MAX_REQUEST_SIZE|No|Kafka Compression type, if none is specified there is no compression enabled. Values available are gzip, lz4 and snappy.| -|LDAP_BASE|No|LDAP base DN used to search for user groups.| +|Environment Variable|Required| Description | +|----|----|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +|APIARY_S3_INVENTORY_PREFIX|No (defaults to `EntireBucketDaily`)| Prefix used by S3 Inventory when creating data in the inventory bucket. | +|APIARY_S3_INVENTORY_TABLE_FORMAT|No (defaults to `ORC`)| Format of S3 inventory data - `ORC`, `Parquet`, or `CSV` | +|APIARY_SYSTEM_SCHEMA|No (defaults to `apiary_system`)| Name for internal system database. | +|AWS_REGION|Yes| AWS region to configure various AWS clients. | +|AWS_WEB_IDENTITY_TOKEN_FILE|No| Path of the AWS Web Identity Token File for IRSA/OIDC AWS authentication. | +|DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES|No| `true`/`false` value for hive.metastore.disallow.incompatible.col.type.changes, default `true`. | +|ENABLE_GLUESYNC|No| Option to turn on GlueSync Hive Metastore listener. | +|ENABLE_HIVE_LOCK_HOUSE_KEEPER|No| Option to turn on Hive Metastore Hive Lock House Keeper. | +|ENABLE_METRICS|No| Option to enable sending Hive Metastore and JMX metrics to Prometheus. | +|ENABLE_S3_INVENTORY|No| Option to create Hive tables on top of S3 inventory data if enabled in `apiary-data-lake`. Enabled if value is not null/empty. | +|ENABLE_S3_LOGS|No| Option to create Hive tables on top of S3 access logs data if enabled in `apiary-data-lake`. Enabled if value is not null/empty. | +|EXTERNAL_DATABASE|No| Option to enable external database mode, when specified it disables managing Hive Metastore MySQL database schema. | +|GLUE_PREFIX|No| Prefix added to Glue databases to handle database name collisions when synchronizing multiple Hive Metastores to the Glue catalog. | +|HADOOP_HEAPSIZE|No| Hive Metastore Java process heapsize. | +|HMS_AUTOGATHER_STATS|No (default is `true`)| Whether or not to create basic statistics on table/partition creation. Valid values are `true` or `false`. | +|LIMIT_PARTITION_REQUEST_NUMBER|No (default is `-1`)| To protect the cluster, this controls how many partitions can be scanned for each partitioned table. The default value "-1" means no limit. The limit on partitions does not affect metadata-only queries. | +|HIVE_METASTORE_ACCESS_MODE|No| Hive Metastore access mode, applicable values are: readwrite, readonly | +|HIVE_DB_NAMES|No| comma separated list of Hive database names, when specified Hive databases will be created and mapped to corresponding S3 buckets. | +|HIVE_METASTORE_LOG_LEVEL|No| Hive Metastore service Log4j log level. | +|HMS_MIN_THREADS|No (defaults to `200`)| Minimum size of the Hive metastore thread pool. | +|HMS_MAX_THREADS|No (defaults to `1000`)| Maximum size of the Hive metastore thread pool. | +|INSTANCE_NAME|Yes| Apiary instance name, will be used as prefix on most AWS resources to allow multiple Apiary instance deployments. | +|KAFKA_BOOTSTRAP_SERVERS|No| Kafka Bootstrap Servers to enable Kafka Metastore listener and send Metastore events to Kafka. | +|KAFKA_CLIENT_ID|No| Kafka label you define that names the Kafka producer. | +|KAFKA_COMPRESSION_TYPE|No (defaults to `1048576`)| The maximum size of a request in bytes. This setting will limit the number of record batches the producer will send in a single request to avoid sending huge requests. This is also effectively a cap on the maximum uncompressed record batch size. | +|KAFKA_MAX_REQUEST_SIZE|No| Kafka Compression type, if none is specified there is no compression enabled. Values available are gzip, lz4 and snappy. | +|LDAP_BASE|No| LDAP base DN used to search for user groups. | |LDAP_CA_CERT|Base64 encoded Certificate Authority Bundle to validate LDAP SSL connection.| -|LDAP_SECRET_ARN|No|LDAP bind DN SecretsManager secret ARN.| -|LDAP_URL|No|Active Directory URL to enable group mapping in metastore.| -|MYSQL_CONNECTION_POOL_SIZE|No (defaults to `10`)|MySQL Connection pool size for Hive Metastore. See [here](https://github.com/apache/hive/blob/master/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java#L1181) for more info.| -|MYSQL_DB_HOST|Yes|Hive Metastore MySQL database hostname.| -|MYSQL_DB_NAME|Yes|Hive Metastore MySQL database name.| -|MYSQL_SECRET_ARN|Yes|Hive Metastore MySQL SecretsManager secret ARN.| -|MYSQL_SECRET_USERNAME_KEY|No (defaults to `username`)|Hive Metastore MySQL SecretsManager secret username key.| -|RANGER_AUDIT_DB_URL|No|Ranger audit database JDBC URL.| -|RANGER_AUDIT_SECRET_ARN|No|Ranger audit database secret ARN.| -|RANGER_AUDIT_SOLR_URL|No|Ranger Solr audit URL.| -|RANGER_POLICY_MANAGER_URL|No|Ranger admin URL from where policies will be downloaded.| -|RANGER_SERVICE_NAME|No|Ranger service name used to configure RangerAuth plugin.| -|SNS_ARN|No|The SNS topic ARN to which metadata updates will be sent.| -|TABLE_PARAM_FILTER|No|A regular expression for selecting necessary table parameters. If the value isn't set, then no table parameters are selected.| +|LDAP_SECRET_ARN|No| LDAP bind DN SecretsManager secret ARN. | +|LDAP_URL|No| Active Directory URL to enable group mapping in metastore. | +|MYSQL_CONNECTION_POOL_SIZE|No (defaults to `10`)| MySQL Connection pool size for Hive Metastore. See [here](https://github.com/apache/hive/blob/master/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java#L1181) for more info. | +|MYSQL_DB_HOST|Yes| Hive Metastore MySQL database hostname. | +|MYSQL_DB_NAME|Yes| Hive Metastore MySQL database name. | +|MYSQL_SECRET_ARN|Yes| Hive Metastore MySQL SecretsManager secret ARN. | +|MYSQL_SECRET_USERNAME_KEY|No (defaults to `username`)| Hive Metastore MySQL SecretsManager secret username key. | +|RANGER_AUDIT_DB_URL|No| Ranger audit database JDBC URL. | +|RANGER_AUDIT_SECRET_ARN|No| Ranger audit database secret ARN. | +|RANGER_AUDIT_SOLR_URL|No| Ranger Solr audit URL. | +|RANGER_POLICY_MANAGER_URL|No| Ranger admin URL from where policies will be downloaded. | +|RANGER_SERVICE_NAME|No| Ranger service name used to configure RangerAuth plugin. | +|SNS_ARN|No| The SNS topic ARN to which metadata updates will be sent. | +|TABLE_PARAM_FILTER|No| A regular expression for selecting necessary table parameters. If the value isn't set, then no table parameters are selected. | # Contact