From dae86aa5bb8d6d1c50fd0888ee1b857bba993156 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20S=C3=A1nchez=20Beltr=C3=A1n?= <36443689+javsanbel2@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:01:41 +0200 Subject: [PATCH] Added variable MAX_REQUEST_SIZE to optionally increase the request size when sending records to Kafka (#116) * Added MAX_REQUEST_SIZE Kafka producer variable for Metastore events * Added MAX_REQUEST_SIZE Kafka producer variable for Metastore events * changelog --- CHANGELOG.md | 6 ++++++ Dockerfile | 4 ++-- README.md | 3 ++- files/hive-site.xml | 5 +++++ files/startup.sh | 1 + 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59f88c8..7b233a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [3.0.12] - 2023-08-02 +### Added +- Added variable `MAX_REQUEST_SIZE` to optionally increase the request size when sending records to Kafka. +- Upgraded `APIARY_EXTENSIONS_VERSION` to `7.3.8` (was `7.3.7`). +- Upgraded `APIARY_GLUESYNC_LISTENER_VERSION` to `7.3.8` (was `7.3.7`). + ## [3.0.11] - 2023-07-25 ### Added - Added variable `KAFKA_COMPRESSION_TYPE` to optionally add compression type when sending Metastore events to Kafka through apiary-metastore-listener library. diff --git a/Dockerfile b/Dockerfile index f138f7d..b6ef04d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,8 +4,8 @@ from amazonlinux:2 ENV RANGER_VERSION 2.0.0 -ENV APIARY_EXTENSIONS_VERSION 7.3.7 -ENV APIARY_GLUESYNC_LISTENER_VERSION 7.3.7 +ENV APIARY_EXTENSIONS_VERSION 7.3.8 +ENV APIARY_GLUESYNC_LISTENER_VERSION 7.3.8 ENV APIARY_RANGER_PLUGIN_VERSION 5.0.1 ENV APIARY_METASTORE_METRICS_VERSION 4.2.0 ENV APIARY_METASTORE_AUTH_VERSION 4.2.0 diff --git a/README.md b/README.md index 58c741c..fe3731f 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,8 @@ For more information please refer to the main [Apiary](https://github.com/Expedi |INSTANCE_NAME|Yes|Apiary instance name, will be used as prefix on most AWS resources to allow multiple Apiary instance deployments.| |KAFKA_BOOTSTRAP_SERVERS|No|Kafka Bootstrap Servers to enable Kafka Metastore listener and send Metastore events to Kafka.| |KAFKA_CLIENT_ID|No|Kafka label you define that names the Kafka producer.| -|KAFKA_COMPRESSION_TYPE|No|Kafka Compression type, if none is specified there is no compression enabled. Values available are gzip, lz4 and snappy.| +|KAFKA_COMPRESSION_TYPE|No (defaults to `1048576`)|The maximum size of a request in bytes. This setting will limit the number of record batches the producer will send in a single request to avoid sending huge requests. This is also effectively a cap on the maximum uncompressed record batch size.| +|KAFKA_MAX_REQUEST_SIZE|No|Kafka Compression type, if none is specified there is no compression enabled. Values available are gzip, lz4 and snappy.| |LDAP_BASE|No|LDAP base DN used to search for user groups.| |LDAP_CA_CERT|Base64 encoded Certificate Authority Bundle to validate LDAP SSL connection.| |LDAP_SECRET_ARN|No|LDAP bind DN SecretsManager secret ARN.| diff --git a/files/hive-site.xml b/files/hive-site.xml index 0f536b5..c4e271c 100644 --- a/files/hive-site.xml +++ b/files/hive-site.xml @@ -93,6 +93,11 @@ none + + com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.max.request.size + 1048576 + + hive.metastore.disallow.incompatible.col.type.changes true diff --git a/files/startup.sh b/files/startup.sh index 23e6ad3..d4d408d 100755 --- a/files/startup.sh +++ b/files/startup.sh @@ -105,6 +105,7 @@ if [[ ! -z $KAFKA_BOOTSTRAP_SERVERS ]]; then [[ -n $KUBERNETES_SERVICE_HOST ]] && export KAFKA_CLIENT_ID="$HOSTNAME" [[ -n $KAFKA_CLIENT_ID ]] && sed "s/KAFKA_CLIENT_ID/$KAFKA_CLIENT_ID/" -i /etc/hive/conf/hive-site.xml [[ -n $KAFKA_COMPRESSION_TYPE ]] && update_property.py com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.compression.type "$KAFKA_COMPRESSION_TYPE" /etc/hive/conf/hive-site.xml + [[ -n $KAFKA_MAX_REQUEST_SIZE ]] && update_property.py com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.max.request.size "$KAFKA_MAX_REQUEST_SIZE" /etc/hive/conf/hive-site.xml fi APIARY_S3_INVENTORY_SCHEMA=s3_inventory