From dcd94b0f2d9e3c1e442f83c8573bf2d60c704031 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:40:19 +0100 Subject: [PATCH] feat: Support setting TLS certificate lifetimes (#598) * feat: `requestedSecretLifetime` role group property added * Update rust/crd/src/lib.rs Co-authored-by: Sebastian Bernauer * Update CHANGELOG.md Co-authored-by: Sebastian Bernauer * implement review feedback * Update CHANGELOG.md Co-authored-by: Sebastian Bernauer * point to op-rs main * apply review patch * apply review patch 2 * chore: bump op-rs * cargo update -p rustls --------- Co-authored-by: Sebastian Bernauer --- CHANGELOG.md | 6 ++++ Cargo.lock | 25 ++++---------- Cargo.toml | 6 ++-- deploy/helm/hbase-operator/crds/crds.yaml | 24 ++++++++++++++ rust/crd/src/lib.rs | 34 +++++++++++++++----- rust/operator-binary/src/hbase_controller.rs | 29 +++++++++++------ rust/operator-binary/src/kerberos.rs | 3 ++ 7 files changed, 89 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e689be80..4d634da3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## [Unreleased] +### Added + +- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup + config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#598]). + ### Fixed - BREAKING: Use distinct ServiceAccounts for the Stacklets, so that multiple Stacklets can be @@ -9,6 +14,7 @@ restart ([#594]). [#594]: https://github.com/stackabletech/hbase-operator/pull/594 +[#598]: https://github.com/stackabletech/hbase-operator/pull/598 ## [24.11.0] - 2024-11-18 diff --git a/Cargo.lock b/Cargo.lock index ba92b419..96ec8175 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -478,17 +478,6 @@ dependencies = [ "powerfmt", ] -[[package]] -name = "derivative" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "digest" version = "0.10.7" @@ -1830,9 +1819,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.15" +version = "0.23.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" dependencies = [ "log", "once_cell", @@ -2210,15 +2199,15 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.82.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +version = "0.83.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "chrono", "clap", "const_format", "delegate", - "derivative", "dockerfile-parser", + "educe", "either", "futures 0.3.31", "indexmap", @@ -2249,7 +2238,7 @@ dependencies = [ [[package]] name = "stackable-operator-derive" version = "0.3.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "darling", "proc-macro2", @@ -2260,7 +2249,7 @@ dependencies = [ [[package]] name = "stackable-shared" version = "0.0.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "kube", "semver", diff --git a/Cargo.toml b/Cargo.toml index b963f40b..f5ef3e8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,11 +21,11 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" snafu = "0.8" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.82.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.83.0" } product-config = { git = "https://github.com/stackabletech/product-config.git", tag = "0.7.0" } strum = { version = "0.26", features = ["derive"] } tokio = { version = "1.40", features = ["full"] } tracing = "0.1" -#[patch."https://github.com/stackabletech/operator-rs.git"] -#stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" } +# [patch."https://github.com/stackabletech/operator-rs.git"] +# stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" } diff --git a/deploy/helm/hbase-operator/crds/crds.yaml b/deploy/helm/hbase-operator/crds/crds.yaml index 9269adaa..59ed17e9 100644 --- a/deploy/helm/hbase-operator/crds/crds.yaml +++ b/deploy/helm/hbase-operator/crds/crds.yaml @@ -297,6 +297,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -520,6 +524,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -724,6 +732,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -947,6 +959,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -1151,6 +1167,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -1374,6 +1394,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 590d550d..321b1d33 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -81,11 +81,6 @@ pub const METRICS_PORT: u16 = 9100; pub const JVM_HEAP_FACTOR: f32 = 0.8; -const DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(20); -const DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = - Duration::from_minutes_unchecked(60); -const DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(5); - #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("the role [{role}] is invalid and does not exist in HBase"))] @@ -262,6 +257,17 @@ pub enum HbaseRole { } impl HbaseRole { + const DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(20); + const DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = + Duration::from_minutes_unchecked(60); + const DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = + Duration::from_minutes_unchecked(5); + + // Auto TLS certificate lifetime + const DEFAULT_MASTER_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + const DEFAULT_REGION_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + const DEFAULT_REST_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + pub fn default_config( &self, cluster_name: &str, @@ -304,9 +310,15 @@ impl HbaseRole { }; let graceful_shutdown_timeout = match &self { - HbaseRole::Master => DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT, - HbaseRole::RegionServer => DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT, - HbaseRole::RestServer => DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT, + HbaseRole::Master => Self::DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT, + HbaseRole::RegionServer => Self::DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT, + HbaseRole::RestServer => Self::DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT, + }; + + let requested_secret_lifetime = match &self { + HbaseRole::Master => Self::DEFAULT_MASTER_SECRET_LIFETIME, + HbaseRole::RegionServer => Self::DEFAULT_REGION_SECRET_LIFETIME, + HbaseRole::RestServer => Self::DEFAULT_REST_SECRET_LIFETIME, }; HbaseConfigFragment { @@ -316,6 +328,7 @@ impl HbaseRole { logging: product_logging::spec::default_logging(), affinity: get_affinity(cluster_name, self, hdfs_discovery_cm_name), graceful_shutdown_timeout: Some(graceful_shutdown_timeout), + requested_secret_lifetime: Some(requested_secret_lifetime), } } @@ -410,6 +423,11 @@ pub struct HbaseConfig { /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. #[fragment_attrs(serde(default))] pub graceful_shutdown_timeout: Option, + + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, } impl Configuration for HbaseConfigFragment { diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 24c2700b..99077dcc 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -113,6 +113,9 @@ pub struct Ctx { #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { + #[snafu(display("missing secret lifetime"))] + MissingSecretLifetime, + #[snafu(display("object defines no version"))] ObjectHasNoVersion, @@ -777,7 +780,7 @@ fn build_rolegroup_statefulset( hbase_role: &HbaseRole, rolegroup_ref: &RoleGroupRef, rolegroup_config: &HashMap>, - config: &HbaseConfig, + merged_config: &HbaseConfig, resolved_product_image: &ResolvedProductImage, service_account: &ServiceAccount, ) -> Result { @@ -899,7 +902,7 @@ fn build_rolegroup_statefulset( .add_volume_mount("log", STACKABLE_LOG_DIR) .context(AddVolumeMountSnafu)? .add_container_ports(ports) - .resources(config.resources.clone().into()) + .resources(merged_config.resources.clone().into()) .startup_probe(startup_probe) .liveness_probe(liveness_probe) .readiness_probe(readiness_probe); @@ -919,7 +922,7 @@ fn build_rolegroup_statefulset( pod_builder .metadata(pb_metadata) .image_pull_secrets_from_product_image(resolved_product_image) - .affinity(&config.affinity) + .affinity(&merged_config.affinity) .add_volume(stackable_operator::k8s_openapi::api::core::v1::Volume { name: "hbase-config".to_string(), config_map: Some(ConfigMapVolumeSource { @@ -959,7 +962,7 @@ fn build_rolegroup_statefulset( Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { custom: ConfigMapLogConfig { config_map }, })), - }) = config.logging.containers.get(&Container::Hbase) + }) = merged_config.logging.containers.get(&Container::Hbase) { pod_builder .add_volume(Volume { @@ -984,21 +987,29 @@ fn build_rolegroup_statefulset( .context(AddVolumeSnafu)?; } - add_graceful_shutdown_config(config, &mut pod_builder).context(GracefulShutdownSnafu)?; + add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?; if hbase.has_kerberos_enabled() { - add_kerberos_pod_config(hbase, hbase_role, &mut hbase_container, &mut pod_builder) - .context(AddKerberosConfigSnafu)?; + add_kerberos_pod_config( + hbase, + hbase_role, + &mut hbase_container, + &mut pod_builder, + merged_config + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?, + ) + .context(AddKerberosConfigSnafu)?; } pod_builder.add_container(hbase_container.build()); // Vector sidecar shall be the last container in the list - if config.logging.enable_vector_agent { + if merged_config.logging.enable_vector_agent { pod_builder.add_container( product_logging::framework::vector_container( resolved_product_image, "hbase-config", "log", - config.logging.containers.get(&Container::Vector), + merged_config.logging.containers.get(&Container::Vector), ResourceRequirementsBuilder::new() .with_cpu_request("250m") .with_cpu_limit("500m") diff --git a/rust/operator-binary/src/kerberos.rs b/rust/operator-binary/src/kerberos.rs index 9bea6b8a..f6a293dc 100644 --- a/rust/operator-binary/src/kerberos.rs +++ b/rust/operator-binary/src/kerberos.rs @@ -15,6 +15,7 @@ use stackable_operator::{ }, }, kube::{runtime::reflector::ObjectRef, ResourceExt}, + time::Duration, utils::cluster_info::KubernetesClusterInfo, }; @@ -232,6 +233,7 @@ pub fn add_kerberos_pod_config( role: &HbaseRole, cb: &mut ContainerBuilder, pb: &mut PodBuilder, + requested_secret_lifetime: Duration, ) -> Result<(), Error> { if let Some(kerberos_secret_class) = hbase.kerberos_secret_class() { // Mount keytab @@ -270,6 +272,7 @@ pub fn add_kerberos_pod_config( .with_node_scope() .with_format(SecretFormat::TlsPkcs12) .with_tls_pkcs12_password(TLS_STORE_PASSWORD) + .with_auto_tls_cert_lifetime(requested_secret_lifetime) .build() .context(AddTlsSecretVolumeSnafu)?, )