From dae86aa5bb8d6d1c50fd0888ee1b857bba993156 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20S=C3=A1nchez=20Beltr=C3=A1n?=
<36443689+javsanbel2@users.noreply.github.com>
Date: Wed, 2 Aug 2023 14:01:41 +0200
Subject: [PATCH] Added variable MAX_REQUEST_SIZE to optionally increase the
request size when sending records to Kafka (#116)
* Added MAX_REQUEST_SIZE Kafka producer variable for Metastore events
* Added MAX_REQUEST_SIZE Kafka producer variable for Metastore events
* changelog
---
CHANGELOG.md | 6 ++++++
Dockerfile | 4 ++--
README.md | 3 ++-
files/hive-site.xml | 5 +++++
files/startup.sh | 1 +
5 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 59f88c8..7b233a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
+## [3.0.12] - 2023-08-02
+### Added
+- Added variable `MAX_REQUEST_SIZE` to optionally increase the request size when sending records to Kafka.
+- Upgraded `APIARY_EXTENSIONS_VERSION` to `7.3.8` (was `7.3.7`).
+- Upgraded `APIARY_GLUESYNC_LISTENER_VERSION` to `7.3.8` (was `7.3.7`).
+
## [3.0.11] - 2023-07-25
### Added
- Added variable `KAFKA_COMPRESSION_TYPE` to optionally add compression type when sending Metastore events to Kafka through apiary-metastore-listener library.
diff --git a/Dockerfile b/Dockerfile
index f138f7d..b6ef04d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,8 +4,8 @@
from amazonlinux:2
ENV RANGER_VERSION 2.0.0
-ENV APIARY_EXTENSIONS_VERSION 7.3.7
-ENV APIARY_GLUESYNC_LISTENER_VERSION 7.3.7
+ENV APIARY_EXTENSIONS_VERSION 7.3.8
+ENV APIARY_GLUESYNC_LISTENER_VERSION 7.3.8
ENV APIARY_RANGER_PLUGIN_VERSION 5.0.1
ENV APIARY_METASTORE_METRICS_VERSION 4.2.0
ENV APIARY_METASTORE_AUTH_VERSION 4.2.0
diff --git a/README.md b/README.md
index 58c741c..fe3731f 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,8 @@ For more information please refer to the main [Apiary](https://github.com/Expedi
|INSTANCE_NAME|Yes|Apiary instance name, will be used as prefix on most AWS resources to allow multiple Apiary instance deployments.|
|KAFKA_BOOTSTRAP_SERVERS|No|Kafka Bootstrap Servers to enable Kafka Metastore listener and send Metastore events to Kafka.|
|KAFKA_CLIENT_ID|No|Kafka label you define that names the Kafka producer.|
-|KAFKA_COMPRESSION_TYPE|No|Kafka Compression type, if none is specified there is no compression enabled. Values available are gzip, lz4 and snappy.|
+|KAFKA_COMPRESSION_TYPE|No (defaults to `1048576`)|The maximum size of a request in bytes. This setting will limit the number of record batches the producer will send in a single request to avoid sending huge requests. This is also effectively a cap on the maximum uncompressed record batch size.|
+|KAFKA_MAX_REQUEST_SIZE|No|Kafka Compression type, if none is specified there is no compression enabled. Values available are gzip, lz4 and snappy.|
|LDAP_BASE|No|LDAP base DN used to search for user groups.|
|LDAP_CA_CERT|Base64 encoded Certificate Authority Bundle to validate LDAP SSL connection.|
|LDAP_SECRET_ARN|No|LDAP bind DN SecretsManager secret ARN.|
diff --git a/files/hive-site.xml b/files/hive-site.xml
index 0f536b5..c4e271c 100644
--- a/files/hive-site.xml
+++ b/files/hive-site.xml
@@ -93,6 +93,11 @@
none
+
+ com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.max.request.size
+ 1048576
+
+
hive.metastore.disallow.incompatible.col.type.changes
true
diff --git a/files/startup.sh b/files/startup.sh
index 23e6ad3..d4d408d 100755
--- a/files/startup.sh
+++ b/files/startup.sh
@@ -105,6 +105,7 @@ if [[ ! -z $KAFKA_BOOTSTRAP_SERVERS ]]; then
[[ -n $KUBERNETES_SERVICE_HOST ]] && export KAFKA_CLIENT_ID="$HOSTNAME"
[[ -n $KAFKA_CLIENT_ID ]] && sed "s/KAFKA_CLIENT_ID/$KAFKA_CLIENT_ID/" -i /etc/hive/conf/hive-site.xml
[[ -n $KAFKA_COMPRESSION_TYPE ]] && update_property.py com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.compression.type "$KAFKA_COMPRESSION_TYPE" /etc/hive/conf/hive-site.xml
+ [[ -n $KAFKA_MAX_REQUEST_SIZE ]] && update_property.py com.expediagroup.apiary.extensions.events.metastore.kafka.messaging.max.request.size "$KAFKA_MAX_REQUEST_SIZE" /etc/hive/conf/hive-site.xml
fi
APIARY_S3_INVENTORY_SCHEMA=s3_inventory