From 99effb0406e87081b1aea3ebb0fb5e99afa00366 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:15:23 +0100 Subject: [PATCH] feat: run containerdebug in the background (#605) * feat: run containerdebug in the background * cargo update * remove unused env var * reintroduce the CONTAINERDEBUG_LOG_DIRECTORY env var * factor out ctnrdebug test * up test memory from @nightkr --- CHANGELOG.md | 2 ++ rust/operator-binary/src/hbase_controller.rs | 6 ++++++ tests/templates/kuttl/kerberos/30-install-hbase.yaml.j2 | 3 +++ tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 | 3 +++ tests/templates/kuttl/smoke/31-assert.yaml | 9 +++++++++ 5 files changed, 23 insertions(+) create mode 100644 tests/templates/kuttl/smoke/31-assert.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d634da3..beb855b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#598]). +- Run a `containerdebug` process in the background of each HBase container to collect debugging information ([#605]). ### Fixed @@ -15,6 +16,7 @@ [#594]: https://github.com/stackabletech/hbase-operator/pull/594 [#598]: https://github.com/stackabletech/hbase-operator/pull/598 +[#605]: https://github.com/stackabletech/hbase-operator/pull/605 ## [24.11.0] - 2024-11-18 diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 99077dcc..97f022fc 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -881,6 +881,7 @@ fn build_rolegroup_statefulset( {COMMON_BASH_TRAP_FUNCTIONS} {remove_vector_shutdown_file_command} prepare_signal_handlers + containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & bin/hbase {hbase_role_name_in_command} start & wait_for_termination $! {create_vector_shutdown_file_command} @@ -893,6 +894,11 @@ fn build_rolegroup_statefulset( create_vector_shutdown_file_command(STACKABLE_LOG_DIR), }]) .add_env_vars(merged_env) + // Needed for the `containerdebug` process to log it's tracing information to. + .add_env_var( + "CONTAINERDEBUG_LOG_DIRECTORY", + format!("{STACKABLE_LOG_DIR}/containerdebug"), + ) .add_volume_mount("hbase-config", HBASE_CONFIG_TMP_DIR) .context(AddVolumeMountSnafu)? .add_volume_mount("hdfs-discovery", HDFS_DISCOVERY_TMP_DIR) diff --git a/tests/templates/kuttl/kerberos/30-install-hbase.yaml.j2 b/tests/templates/kuttl/kerberos/30-install-hbase.yaml.j2 index f862651c..31a27a68 100644 --- a/tests/templates/kuttl/kerberos/30-install-hbase.yaml.j2 +++ b/tests/templates/kuttl/kerberos/30-install-hbase.yaml.j2 @@ -39,6 +39,9 @@ commands: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + resources: + memory: + limit: 1536Mi roleGroups: default: replicas: 2 diff --git a/tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 b/tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 index ac1bed8c..53e9a98e 100644 --- a/tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 @@ -45,6 +45,9 @@ spec: config: logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + resources: + memory: + limit: 1Gi roleGroups: default: configOverrides: diff --git a/tests/templates/kuttl/smoke/31-assert.yaml b/tests/templates/kuttl/smoke/31-assert.yaml new file mode 100644 index 00000000..25f259d3 --- /dev/null +++ b/tests/templates/kuttl/smoke/31-assert.yaml @@ -0,0 +1,9 @@ +--- +# This test checks if the containerdebug-state.json file is present and valid +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + - script: kubectl exec -n $NAMESPACE --container hbase test-hbase-master-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valif JSON"' + - script: kubectl exec -n $NAMESPACE --container hbase test-hbase-regionserver-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valif JSON"' + - script: kubectl exec -n $NAMESPACE --container hbase test-hbase-restserver-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valif JSON"'