Skip to content

Commit

Permalink
feat: Support setting TLS certificate lifetimes (#598)
Browse files Browse the repository at this point in the history
* feat: `requestedSecretLifetime` role group property added

* Update rust/crd/src/lib.rs

Co-authored-by: Sebastian Bernauer <[email protected]>

* Update CHANGELOG.md

Co-authored-by: Sebastian Bernauer <[email protected]>

* implement review feedback

* Update CHANGELOG.md

Co-authored-by: Sebastian Bernauer <[email protected]>

* point to op-rs main

* apply review patch

* apply review patch 2

* chore: bump op-rs

* cargo update -p rustls

---------

Co-authored-by: Sebastian Bernauer <[email protected]>
  • Loading branch information
razvan and sbernauer authored Dec 4, 2024
1 parent af18193 commit dcd94b0
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 38 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@

## [Unreleased]

### Added

- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup
config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#598]).

### Fixed

- BREAKING: Use distinct ServiceAccounts for the Stacklets, so that multiple Stacklets can be
deployed in one namespace. Existing Stacklets will use the newly created ServiceAccounts after
restart ([#594]).

[#594]: https://github.com/stackabletech/hbase-operator/pull/594
[#598]: https://github.com/stackabletech/hbase-operator/pull/598

## [24.11.0] - 2024-11-18

Expand Down
25 changes: 7 additions & 18 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_yaml = "0.9"
snafu = "0.8"
stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.82.0" }
stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.83.0" }
product-config = { git = "https://github.com/stackabletech/product-config.git", tag = "0.7.0" }
strum = { version = "0.26", features = ["derive"] }
tokio = { version = "1.40", features = ["full"] }
tracing = "0.1"

#[patch."https://github.com/stackabletech/operator-rs.git"]
#stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" }
# [patch."https://github.com/stackabletech/operator-rs.git"]
# stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" }
24 changes: 24 additions & 0 deletions deploy/helm/hbase-operator/crds/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -520,6 +524,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -724,6 +732,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -947,6 +959,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -1151,6 +1167,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -1374,6 +1394,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down
34 changes: 26 additions & 8 deletions rust/crd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,6 @@ pub const METRICS_PORT: u16 = 9100;

pub const JVM_HEAP_FACTOR: f32 = 0.8;

const DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(20);
const DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration =
Duration::from_minutes_unchecked(60);
const DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(5);

#[derive(Snafu, Debug)]
pub enum Error {
#[snafu(display("the role [{role}] is invalid and does not exist in HBase"))]
Expand Down Expand Up @@ -262,6 +257,17 @@ pub enum HbaseRole {
}

impl HbaseRole {
const DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(20);
const DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration =
Duration::from_minutes_unchecked(60);
const DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration =
Duration::from_minutes_unchecked(5);

// Auto TLS certificate lifetime
const DEFAULT_MASTER_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);
const DEFAULT_REGION_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);
const DEFAULT_REST_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);

pub fn default_config(
&self,
cluster_name: &str,
Expand Down Expand Up @@ -304,9 +310,15 @@ impl HbaseRole {
};

let graceful_shutdown_timeout = match &self {
HbaseRole::Master => DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT,
HbaseRole::RegionServer => DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT,
HbaseRole::RestServer => DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT,
HbaseRole::Master => Self::DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT,
HbaseRole::RegionServer => Self::DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT,
HbaseRole::RestServer => Self::DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT,
};

let requested_secret_lifetime = match &self {
HbaseRole::Master => Self::DEFAULT_MASTER_SECRET_LIFETIME,
HbaseRole::RegionServer => Self::DEFAULT_REGION_SECRET_LIFETIME,
HbaseRole::RestServer => Self::DEFAULT_REST_SECRET_LIFETIME,
};

HbaseConfigFragment {
Expand All @@ -316,6 +328,7 @@ impl HbaseRole {
logging: product_logging::spec::default_logging(),
affinity: get_affinity(cluster_name, self, hdfs_discovery_cm_name),
graceful_shutdown_timeout: Some(graceful_shutdown_timeout),
requested_secret_lifetime: Some(requested_secret_lifetime),
}
}

Expand Down Expand Up @@ -410,6 +423,11 @@ pub struct HbaseConfig {
/// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details.
#[fragment_attrs(serde(default))]
pub graceful_shutdown_timeout: Option<Duration>,

/// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`.
/// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
#[fragment_attrs(serde(default))]
pub requested_secret_lifetime: Option<Duration>,
}

impl Configuration for HbaseConfigFragment {
Expand Down
29 changes: 20 additions & 9 deletions rust/operator-binary/src/hbase_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ pub struct Ctx {
#[strum_discriminants(derive(IntoStaticStr))]
#[allow(clippy::enum_variant_names)]
pub enum Error {
#[snafu(display("missing secret lifetime"))]
MissingSecretLifetime,

#[snafu(display("object defines no version"))]
ObjectHasNoVersion,

Expand Down Expand Up @@ -777,7 +780,7 @@ fn build_rolegroup_statefulset(
hbase_role: &HbaseRole,
rolegroup_ref: &RoleGroupRef<HbaseCluster>,
rolegroup_config: &HashMap<PropertyNameKind, BTreeMap<String, String>>,
config: &HbaseConfig,
merged_config: &HbaseConfig,
resolved_product_image: &ResolvedProductImage,
service_account: &ServiceAccount,
) -> Result<StatefulSet> {
Expand Down Expand Up @@ -899,7 +902,7 @@ fn build_rolegroup_statefulset(
.add_volume_mount("log", STACKABLE_LOG_DIR)
.context(AddVolumeMountSnafu)?
.add_container_ports(ports)
.resources(config.resources.clone().into())
.resources(merged_config.resources.clone().into())
.startup_probe(startup_probe)
.liveness_probe(liveness_probe)
.readiness_probe(readiness_probe);
Expand All @@ -919,7 +922,7 @@ fn build_rolegroup_statefulset(
pod_builder
.metadata(pb_metadata)
.image_pull_secrets_from_product_image(resolved_product_image)
.affinity(&config.affinity)
.affinity(&merged_config.affinity)
.add_volume(stackable_operator::k8s_openapi::api::core::v1::Volume {
name: "hbase-config".to_string(),
config_map: Some(ConfigMapVolumeSource {
Expand Down Expand Up @@ -959,7 +962,7 @@ fn build_rolegroup_statefulset(
Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig {
custom: ConfigMapLogConfig { config_map },
})),
}) = config.logging.containers.get(&Container::Hbase)
}) = merged_config.logging.containers.get(&Container::Hbase)
{
pod_builder
.add_volume(Volume {
Expand All @@ -984,21 +987,29 @@ fn build_rolegroup_statefulset(
.context(AddVolumeSnafu)?;
}

add_graceful_shutdown_config(config, &mut pod_builder).context(GracefulShutdownSnafu)?;
add_graceful_shutdown_config(merged_config, &mut pod_builder).context(GracefulShutdownSnafu)?;
if hbase.has_kerberos_enabled() {
add_kerberos_pod_config(hbase, hbase_role, &mut hbase_container, &mut pod_builder)
.context(AddKerberosConfigSnafu)?;
add_kerberos_pod_config(
hbase,
hbase_role,
&mut hbase_container,
&mut pod_builder,
merged_config
.requested_secret_lifetime
.context(MissingSecretLifetimeSnafu)?,
)
.context(AddKerberosConfigSnafu)?;
}
pod_builder.add_container(hbase_container.build());

// Vector sidecar shall be the last container in the list
if config.logging.enable_vector_agent {
if merged_config.logging.enable_vector_agent {
pod_builder.add_container(
product_logging::framework::vector_container(
resolved_product_image,
"hbase-config",
"log",
config.logging.containers.get(&Container::Vector),
merged_config.logging.containers.get(&Container::Vector),
ResourceRequirementsBuilder::new()
.with_cpu_request("250m")
.with_cpu_limit("500m")
Expand Down
3 changes: 3 additions & 0 deletions rust/operator-binary/src/kerberos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use stackable_operator::{
},
},
kube::{runtime::reflector::ObjectRef, ResourceExt},
time::Duration,
utils::cluster_info::KubernetesClusterInfo,
};

Expand Down Expand Up @@ -232,6 +233,7 @@ pub fn add_kerberos_pod_config(
role: &HbaseRole,
cb: &mut ContainerBuilder,
pb: &mut PodBuilder,
requested_secret_lifetime: Duration,
) -> Result<(), Error> {
if let Some(kerberos_secret_class) = hbase.kerberos_secret_class() {
// Mount keytab
Expand Down Expand Up @@ -270,6 +272,7 @@ pub fn add_kerberos_pod_config(
.with_node_scope()
.with_format(SecretFormat::TlsPkcs12)
.with_tls_pkcs12_password(TLS_STORE_PASSWORD)
.with_auto_tls_cert_lifetime(requested_secret_lifetime)
.build()
.context(AddTlsSecretVolumeSnafu)?,
)
Expand Down

0 comments on commit dcd94b0

Please sign in to comment.