Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: run containerdebug in the background #667

Merged
merged 1 commit into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.

- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup
config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#660]).
- Run a `containerdebug` process in the background of each "druid" container to collect debugging information ([#667]).

### Fixed

Expand All @@ -19,6 +20,7 @@ All notable changes to this project will be documented in this file.
[#656]: https://github.com/stackabletech/druid-operator/pull/656
[#657]: https://github.com/stackabletech/druid-operator/pull/657
[#660]: https://github.com/stackabletech/druid-operator/pull/660
[#667]: https://github.com/stackabletech/druid-operator/pull/667

## [24.11.0] - 2024-11-18

Expand Down
7 changes: 4 additions & 3 deletions rust/crd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub const JVM_SECURITY_PROPERTIES_FILE: &str = "security.properties";
pub const STACKABLE_TRUST_STORE: &str = "/stackable/truststore.p12";
pub const STACKABLE_TRUST_STORE_PASSWORD: &str = "changeit";
pub const CERTS_DIR: &str = "/stackable/certificates";
pub const LOG_DIR: &str = "/stackable/log";
pub const STACKABLE_LOG_DIR: &str = "/stackable/log";

// store file names
pub const DRUID_LOG_FILE: &str = "druid.log4j2.xml";
Expand Down Expand Up @@ -604,16 +604,17 @@ impl DruidRole {
{COMMON_BASH_TRAP_FUNCTIONS}
{remove_vector_shutdown_file_command}
prepare_signal_handlers
CONTAINERDEBUG_LOG_DIRECTORY={STACKABLE_LOG_DIR}/containerdebug containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop &
/stackable/druid/bin/run-druid {process_name} {RW_CONFIG_DIRECTORY} &
echo \"$!\" >> /tmp/DRUID_PID
wait_for_termination $(cat /tmp/DRUID_PID)
{create_vector_shutdown_file_command}
",
process_name = self.get_process_name(),
remove_vector_shutdown_file_command =
remove_vector_shutdown_file_command(LOG_DIR),
remove_vector_shutdown_file_command(STACKABLE_LOG_DIR),
create_vector_shutdown_file_command =
create_vector_shutdown_file_command(LOG_DIR),
create_vector_shutdown_file_command(STACKABLE_LOG_DIR),
}
}
}
Expand Down
12 changes: 6 additions & 6 deletions rust/operator-binary/src/druid_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ use stackable_druid_crd::{
Container, DeepStorageSpec, DruidCluster, DruidClusterStatus, DruidRole, APP_NAME,
AUTH_AUTHORIZER_OPA_URI, CREDENTIALS_SECRET_PROPERTY, DB_PASSWORD_ENV, DB_USERNAME_ENV,
DRUID_CONFIG_DIRECTORY, DS_BUCKET, EXTENSIONS_LOADLIST, HDFS_CONFIG_DIRECTORY, JVM_CONFIG,
JVM_SECURITY_PROPERTIES_FILE, LOG_CONFIG_DIRECTORY, LOG_DIR, MAX_DRUID_LOG_FILES_SIZE,
RUNTIME_PROPS, RW_CONFIG_DIRECTORY, S3_ACCESS_KEY, S3_ENDPOINT_URL, S3_PATH_STYLE_ACCESS,
S3_SECRET_KEY, ZOOKEEPER_CONNECTION_STRING,
JVM_SECURITY_PROPERTIES_FILE, LOG_CONFIG_DIRECTORY, MAX_DRUID_LOG_FILES_SIZE, RUNTIME_PROPS,
RW_CONFIG_DIRECTORY, S3_ACCESS_KEY, S3_ENDPOINT_URL, S3_PATH_STYLE_ACCESS, S3_SECRET_KEY,
STACKABLE_LOG_DIR, ZOOKEEPER_CONNECTION_STRING,
};
use stackable_operator::{
builder::{
Expand Down Expand Up @@ -964,7 +964,7 @@ fn build_rolegroup_statefulset(
// This command needs to be added at the beginning of the shell commands,
// otherwise the output of the following commands will not be captured!
prepare_container_commands.push(product_logging::framework::capture_shell_output(
LOG_DIR,
STACKABLE_LOG_DIR,
&prepare_container_name,
log_config,
));
Expand Down Expand Up @@ -1292,10 +1292,10 @@ fn add_log_volume_and_volume_mounts(
pb: &mut PodBuilder,
) -> Result<()> {
cb_druid
.add_volume_mount(LOG_VOLUME_NAME, LOG_DIR)
.add_volume_mount(LOG_VOLUME_NAME, STACKABLE_LOG_DIR)
.context(AddVolumeMountSnafu)?;
cb_prepare
.add_volume_mount(LOG_VOLUME_NAME, LOG_DIR)
.add_volume_mount(LOG_VOLUME_NAME, STACKABLE_LOG_DIR)
.context(AddVolumeMountSnafu)?;
pb.add_volume(
VolumeBuilder::new(LOG_VOLUME_NAME)
Expand Down
8 changes: 6 additions & 2 deletions rust/operator-binary/src/product_logging.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use snafu::{OptionExt, ResultExt, Snafu};
use stackable_druid_crd::{
Container, DruidCluster, DRUID_LOG_FILE, LOG4J2_CONFIG, LOG_DIR, MAX_DRUID_LOG_FILES_SIZE,
Container, DruidCluster, DRUID_LOG_FILE, LOG4J2_CONFIG, MAX_DRUID_LOG_FILES_SIZE,
STACKABLE_LOG_DIR,
};
use stackable_operator::{
builder::configmap::ConfigMapBuilder,
Expand Down Expand Up @@ -90,7 +91,10 @@ pub fn extend_role_group_config_map(
cm_builder.add_data(
LOG4J2_CONFIG,
product_logging::framework::create_log4j2_config(
&format!("{LOG_DIR}/{container}", container = Container::Druid),
&format!(
"{STACKABLE_LOG_DIR}/{container}",
container = Container::Druid
),
DRUID_LOG_FILE,
MAX_DRUID_LOG_FILES_SIZE
.scale_to(BinaryMultiple::Mebi)
Expand Down
11 changes: 11 additions & 0 deletions tests/templates/kuttl/smoke/50-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,14 @@ status:
expectedPods: 1
currentHealthy: 1
disruptionsAllowed: 1
---
# This test checks if the containerdebug-state.json file is present and valid
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
timeout: 600
commands:
- script: kubectl exec -n $NAMESPACE --container druid druid-coordinator-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
- script: kubectl exec -n $NAMESPACE --container druid druid-router-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
- script: kubectl exec -n $NAMESPACE --container druid druid-middlemanager-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
- script: kubectl exec -n $NAMESPACE --container druid druid-router-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
- script: kubectl exec -n $NAMESPACE --container druid druid-historical-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status
Loading