From 3ff2623e54ad6080719ccd6485aa0e31d3fc7915 Mon Sep 17 00:00:00 2001 From: torredil Date: Thu, 10 Oct 2024 04:24:46 +0000 Subject: [PATCH] Initial KEP check-in Signed-off-by: torredil --- keps/prod-readiness/sig-storage/4876.yaml | 3 + .../README.md | 845 ++++++++++++++++++ .../4876-mutable-csinode-allocatable/kep.yaml | 50 ++ 3 files changed, 898 insertions(+) create mode 100644 keps/prod-readiness/sig-storage/4876.yaml create mode 100644 keps/sig-storage/4876-mutable-csinode-allocatable/README.md create mode 100644 keps/sig-storage/4876-mutable-csinode-allocatable/kep.yaml diff --git a/keps/prod-readiness/sig-storage/4876.yaml b/keps/prod-readiness/sig-storage/4876.yaml new file mode 100644 index 00000000000..c11f694e080 --- /dev/null +++ b/keps/prod-readiness/sig-storage/4876.yaml @@ -0,0 +1,3 @@ +kep-number: 4876 +alpha: + approver: "@deads2k" diff --git a/keps/sig-storage/4876-mutable-csinode-allocatable/README.md b/keps/sig-storage/4876-mutable-csinode-allocatable/README.md new file mode 100644 index 00000000000..aaec3cf0ebd --- /dev/null +++ b/keps/sig-storage/4876-mutable-csinode-allocatable/README.md @@ -0,0 +1,845 @@ +# KEP-4876: Mutable CSINode Allocatable Property + + +- [Release Signoff Checklist](#release-signoff-checklist) +- [Summary](#summary) +- [Motivation](#motivation) + - [Goals](#goals) + - [Non-Goals](#non-goals) +- [Proposal](#proposal) + - [User Stories (Optional)](#user-stories-optional) + - [Story 1](#story-1) + - [Story 2](#story-2) + - [Story 3](#story-3) + - [Notes/Constraints/Caveats (Optional)](#notesconstraintscaveats-optional) + - [Risks and Mitigations](#risks-and-mitigations) +- [Design Details](#design-details) + - [Feature Gate](#feature-gate) + - [API Changes](#api-changes) + - [CSINode](#csinode) + - [CSIDriver](#csidriver) + - [Validation Changes](#validation-changes) + - [Volume Plugin Manager](#volume-plugin-manager) + - [NodeInfoManager Interface Extension](#nodeinfomanager-interface-extension) + - [CSINode Update Behavior](#csinode-update-behavior) + - [Pod Construction Changes](#pod-construction-changes) + - [Test Plan](#test-plan) + - [Prerequisite testing updates](#prerequisite-testing-updates) + - [Unit tests](#unit-tests) + - [Integration tests](#integration-tests) + - [e2e tests](#e2e-tests) + - [Graduation Criteria](#graduation-criteria) + - [Alpha](#alpha) + - [Beta](#beta) + - [GA](#ga) + - [Upgrade / Downgrade Strategy](#upgrade--downgrade-strategy) + - [Version Skew Strategy](#version-skew-strategy) +- [Production Readiness Review Questionnaire](#production-readiness-review-questionnaire) + - [Feature Enablement and Rollback](#feature-enablement-and-rollback) + - [Rollout, Upgrade and Rollback Planning](#rollout-upgrade-and-rollback-planning) + - [Monitoring Requirements](#monitoring-requirements) + - [Dependencies](#dependencies) + - [Scalability](#scalability) + - [Troubleshooting](#troubleshooting) +- [Implementation History](#implementation-history) +- [Drawbacks](#drawbacks) +- [Alternatives](#alternatives) +- [Infrastructure Needed (Optional)](#infrastructure-needed-optional) + + +## Release Signoff Checklist + +Items marked with (R) are required *prior to targeting to a milestone / release*. + +- [ ] (R) Enhancement issue in release milestone, which links to KEP dir in [kubernetes/enhancements] (not the initial KEP PR) +- [ ] (R) KEP approvers have approved the KEP status as `implementable` +- [ ] (R) Design details are appropriately documented +- [ ] (R) Test plan is in place, giving consideration to SIG Architecture and SIG Testing input (including test refactors) + - [ ] e2e Tests for all Beta API Operations (endpoints) + - [ ] (R) Ensure GA e2e tests meet requirements for [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) + - [ ] (R) Minimum Two Week Window for GA e2e tests to prove flake free +- [ ] (R) Graduation criteria is in place + - [ ] (R) [all GA Endpoints](https://github.com/kubernetes/community/pull/1806) must be hit by [Conformance Tests](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/conformance-tests.md) +- [ ] (R) Production readiness review completed +- [ ] (R) Production readiness review approved +- [ ] "Implementation History" section is up-to-date for milestone +- [ ] User-facing documentation has been created in [kubernetes/website], for publication to [kubernetes.io] +- [ ] Supporting documentation—e.g., additional design documents, links to mailing list discussions/SIG meetings, relevant PRs/issues, release notes + + +[kubernetes.io]: https://kubernetes.io/ +[kubernetes/enhancements]: https://git.k8s.io/enhancements +[kubernetes/kubernetes]: https://git.k8s.io/kubernetes +[kubernetes/website]: https://git.k8s.io/website + +## Summary + +This KEP proposes changes to make the `CSINode.Spec.Drivers[*].Allocatable.Count` field mutable and introduces a mechanism to update it dynamically based on user configuration at the CSI driver level. These updates can be triggered either by periodic intervals or by failure detection (such as volume attachment failures due to insufficient capacity). This improvement enhances the reliability of stateful pod scheduling by addressing mismatches between reported and actual attachment capacity on nodes. + +## Motivation + +Currently, a mismatch between the reported and actual attachment capacity on nodes can result in permanent scheduling errors and stuck workloads. This occurs when volume slots are taken after a CSI driver starts up, which results in `kube-scheduler` assigning stateful pods to nodes lacking the necessary capacity to support them. This mismatch can happen due to various scenarios, such as: + +1. Operations out of band with respect to CSI drivers and Kubernetes: + - Manual attachment of volumes by administrators or external controllers. + +2. Multi-driver scenarios: + - When multiple CSI drivers are used on a node and one driver's operations affect the available capacity for others. + +3. Other devices consuming available slots: + - Network interfaces taking up slots. + - GPU or specialized hardware attachments that weren't present during CSI driver initialization. + +These scenarios can lead to the CSI driver reporting an initial capacity that becomes inaccurate over time, causing the scheduler to make decisions based on outdated information. This results in pods being scheduled to nodes without sufficient capacity, ultimately getting stuck in a `ContainerCreating` state. + +By making the `CSINode.Spec.Drivers[*].Allocatable.Count` field mutable and introducing a mechanism to update it dynamically, we can ensure that the scheduler always has information which more accurately represents the actual state of the world, significantly improving the reliability of stateful pod scheduling. + +### Goals + +- Make `CSINode.Spec.Drivers[*].Allocatable.Count` mutable. +- Enable CSI drivers to define the interval at which the `Allocatable.Count` value on each node is updated through the `CSIDriver` object. +- Automatically update `CSINode.Spec.Drivers[*].Allocatable.Count` upon detecting a failure in volume attachment due to insufficient capacity. + +### Non-Goals + +- Modifying the core scheduling logic of Kubernetes. +- Implementing cloud provider-specific solutions within Kubernetes core. +- Re-scheduling pods stuck in a `ContainerCreating` state. + +## Proposal + +### User Stories (Optional) + +#### Story 1 + +As a cluster administrator, I want the reported attachment capacity on nodes to accurately reflect the actual capacity, so that stateful pods are reliably scheduled and do not become stuck in a `ContainerCreating` state due to insufficient capacity. + +#### Story 2 + +As a cluster operator, I use volumes during node setup for components like kubelet, containerd, and additional drivers. These boot volumes, which are not managed by CSI, may be detached after setup, and I need a way to reclaim these slots for other uses. The current static capacity reporting doesn't allow for this flexibility. + +#### Story 3 + +As a cluster operator, I need the Kubernetes scheduler to accurately count the number of available device slots for both storage volumes and network interfaces. On certain machine types, network interfaces and volumes share device slots, and network interfaces may be dynamically attached after the CSI driver is registered. This results in an inaccurate `Allocatable.Count` for volumes, causing stateful pods to be scheduled on nodes with insufficient capacity, ultimately getting stuck in a `ContainerCreating` state. + +### Notes/Constraints/Caveats (Optional) + + + +### Risks and Mitigations + +The following risks are identified: + +- Frequent updates/retrieval of the `CSINode` object could increase API server load. + +- Frequent calls to a CSI driver's `NodeGetInfo` RPC endpoint may become expensive, particularly if the operation involves retrieving information from a remote server or performing resource-intensive tasks. Specifically, this is a concern at scale, where the cumulative cost of multiple nodes repeatedly querying for updates is more impactful. + +- There's a race condition where the scheduler might assign a stateful pod to a node with insufficient capacity if the `CSINode.Spec.Drivers[*].Allocatable.Count` value hasn't been updated in time. + +The risks are mitigated as follows: + +- The use of the Kubernetes informer pattern in the scheduler. The +scheduler uses a `CSINode` informer and lister to efficiently access and watch `CSINode` objects. + +- Allow users to opt in to this feature at a per-CSI driver granularity by configuring the `CSIDriver` object. Specifically, administrators will be able to fine-tune the interval update value via the `NodeAllocatableUpdatePeriodSeconds` attribute in the `CSIDriver` object as per their specific requirement. + +- A reactive update mechanism is implemented to immediately update the `CSINode.Spec.Drivers[*].Allocatable.Count` value if a pod fails to enter a running state due volume attachment failures as a result of insufficient capacity. This ensures that even if a race occurs, Kubernetes quickly corrects itself and prevents further scheduling errors. + +## Design Details + +### Feature Gate + +A new feature gate - `MutableCSINodeAllocatableCount` - will be introduced to control the functionality implemented by this KEP. When the feature gate is disabled, the `CSINode` object will remain immutable, maintaining the current behavior. + +### API Changes + +#### CSINode + +The `CSINode.Spec.Drivers[*].Allocatable.Count` field will be made mutable. No changes to the object structs are needed, only the validation logic needs to be revised. For reference, these are the API fields this KEP proposes to make mutable. + +```golang +// CSINodeDriver holds information about the specification of one CSI driver installed +type CSINodeDriver struct { + ... + // allocatable represents the volume resources of a node that are available for sc + // +optional + Allocatable *VolumeNodeResources +} +``` + +```golang +// VolumeNodeResources is a set of resource limits for scheduling of volumes. +type VolumeNodeResources struct { + // Maximum number of unique volumes managed by the CSI driver that can be used on + // A volume that is both attached and mounted on a node is considered to be used o + // The same rule applies for a unique volume that is shared among multiple pods on + // If this field is not specified, then the supported number of volumes on this no + // +optional + Count *int32 +} +``` + +#### CSIDriver + +A new field, `NodeAllocatableUpdatePeriodSeconds`, will be added to the `CSIDriverSpec` struct. This field allows a CSI driver to specify the interval at which the Kubelet should periodically query a driver's `NodeGetInfo` RPC endpoint to update the `CSINode` object. If this field is not set, updates will only occur in response to volume attachment failures as a result of no capacity. + +```golang +// CSIDriverSpec is the specification of a CSIDriver. +type CSIDriverSpec struct { + ... + // NodeAllocatableUpdatePeriodSeconds specifies the interval between periodic updates of + // the CSINode allocatable capacity for this driver. If not set, periodic updates + // are disabled, and updates occur only upon detecting capacity-related failures. + // The minimum allowed value for this field is 10 seconds. + // +optional + NodeAllocatableUpdatePeriodSeconds *metav1.Duration +} +``` + +#### Validation Changes + +The [ValidateCSINodeUpdate](https://github.com/kubernetes/kubernetes/blob/master/pkg/apis/storage/validation/validation.go#L304) function in the API validation code path will be modified to allow updates to the `Allocatable.Count` +field when the feature gate is enabled: + +```golang +func ValidateCSINodeUpdate(new, old *storage.CSINode) field.ErrorList { + allErrs := ValidateCSINode(new) + + if utilfeature.DefaultFeatureGate.Enabled(features.MutableCSINodeAllocatableCount) { + for _, oldDriver := range old.Spec.Drivers { + for _, newDriver := range new.Spec.Drivers { + // Allow Allocatable.Count to be modified + // Ensure all other fields are unchanged + } + } + } else { + // Existing validation logic for when feature gate is disabled + } + return allErrs +} +``` + +This updated logic allows the `Allocatable.Count` field to be modified when the feature gate is enabled, while ensuring all other fields remain immutable. When the feature gate is disabled, it falls back to the existing validation logic for backward compatibility. + +#### Volume Plugin Manager + +A new goroutine will be started in VolumePluginMgr’s [Run()](https://github.com/kubernetes/kubernetes/blob/master/pkg/volume/plugins.go#L953) func if the `NodeAllocatableUpdatePeriodSeconds` is set to a nonzero value. This goroutine will periodically trigger updates to the `CSINode` object based on the specified interval: + +```golang +func (pm *VolumePluginMgr) Run(stopCh <-chan struct{}) { + if pm.csiNodeUpdateInterval > 0 { + go wait.Until(pm.updateCSINodeInfo, pm.csiNodeUpdateInterval, stopCh) + } +} +``` + +#### NodeInfoManager Interface Extension + +The existing [NodeInfoManager](https://github.com/kubernetes/kubernetes/blob/master/pkg/volume/csi/nodeinfomanager/nodeinfomanager.go#L76) interface will be extended to include a new method for updating the `CSINode` object: + +```golang +// Interface implements an interface for managing labels of a node +type Interface interface { + CreateCSINode() (*storagev1.CSINode, error) + ... + // UpdateCSINode updates the CSINode object + UpdateCSINode() error +} +``` + +#### CSINode Update Behavior + +This table explains how updates to the `CSINode.Spec.Drivers[*].Allocatable.Count` field are handled, depending on the status of the `MutableCSINodeAllocatableCount` feature flag and the `NodeAllocatableUpdatePeriodSeconds` field in the `CSIDriver` object. + +| **Feature Flag Status** | **`NodeAllocatableUpdatePeriodSeconds`** | **Behavior** | +|------------------------------------------|-------------------------------------|------------------------------------------------------------------------------------------------------------------------------------| +| Enabled | Set | Periodic updates occur at the defined interval + when invalid state is detected (volume attachment failures due to `ResourceExhausted`)| +| Enabled | Not set | Updates occur only in response to volume attachment failures (`ResourceExhausted` errors) | +| Disabled | Set | `NodeAllocatableUpdatePeriodSeconds` is ignored; `Allocatable.Count` remains static and immutable | +| Disabled | Not set | No updates occur; `Allocatable.Count` remains static and immutable | + + +#### Pod Construction Changes + +To address race conditions where the scheduler assigns stateful pods to nodes with insufficient capacity, Kubelet's pod construction process during [WaitForAttachAndMount](https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/volumemanager/volume_manager.go#L393) will now handle `ResourceExhausted` errors returned by CSI drivers during the `ControllerPublishVolume` RPC. + +The `ResourceExhausted` error is directly reported on the `VolumeAttachment` object associated with the relevant attachment. + +```golang +if err := kl.volumeManager.WaitForAttachAndMount(pod); err != nil { + if isResourceExhaustedError(err) { + // Update CSINode using a backoff mechanism + // Generate event for affected pod + } else { + // Existing error handling + } +} +``` + +This change ensures that when a pod fails to be constructed due to insufficient volume attachment capacity, that both: + +1. The `CSINode` object is promptly updated to reflect the actual available capacity, improving future scheduling decisions. +2. An event is added to the pod, providing visibility to cluster operators and enabling automated actions by components like +the Kubernetes [descheduler](https://github.com/kubernetes-sigs/descheduler) to fix the stateful pods stuck in `ContainerCreating`. + +### Test Plan + + + +[X] I/we understand the owners of the involved components may require updates to +existing tests to make this code solid enough prior to committing the changes necessary +to implement this enhancement. + +##### Prerequisite testing updates + + + +##### Unit tests + + + + + +- `k8s.io/kubernetes/pkg/kubelet`: 2024-09-24 - 51% +- `k8s.io/kubernetes/pkg/apis/storage/validation`: 2024-09-24 - 96% +- `k8s.io/kubernetes/pkg/volume/plugins.go`: 2024-09-24 - 27.9% +- `k8s.io/kubernetes/pkg/volume/csi/nodeinfomanager`: 2024-09-24 - 76.6% + +##### Integration tests + + + + + +- Test that updates to `CSINode.Spec.Drivers[*].Allocatable.Count` are properly reflected when the `MutableCSINodeAllocatableCount` feature gate is enabled. +- Test that periodic updates occur at the specified `NodeAllocatableUpdatePeriodSeconds`. +- Test that `ResourceExhausted` errors during volume attachment trigger an immediate update of the CSINode object. + +##### e2e tests + + + +- Test the impact on pod scheduling when `CSINode.Spec.Drivers[*].Allocatable.Count` is updated, ensuring that pods are not scheduled to nodes with insufficient capacity. +- Test the end-to-end workflow of updating `CSINode.Spec.Drivers[*].Allocatable.Count` using a CSI driver. + +### Graduation Criteria + +#### Alpha + +- Feature implemented behind a feature flag. +- Initial unit tests/integration tests completed and enabled. + +#### Beta + +- Allowing time for feedback (at least 2 releases between beta and GA). +- All unit tests/integration/e2e tests completed and enabled. + - Validate kubelet behavior when API server rejects `CSINode` updates (older API server version). + - Validate CSI driver behavior with and without the `NodeAllocatableUpdatePeriodSeconds` field set. + - Validate scheduler behavior remains consistent regardless of whether `CSINode.Spec.Drivers[*].Allocatable.Count` is being dynamically updated or not. + - Validate pod construction failure handling in kubelet, ensuring it correctly updates the `CSINode` object when the feature is enabled and the API server supports it. + +#### GA + +- All beta criteria have been satisfied. +- Feature is stable. +- No bug reports / feedback / improvements to address. + +### Upgrade / Downgrade Strategy + + + +- Upgrade Strategy + - Upgrade the API server first to support mutable `CSINode.Spec.Drivers[*].Allocatable.Count` and the new `NodeAllocatableUpdatePeriodSeconds` field in `CSIDriver` object. + - Upgrade nodes + - Update CSI drivers to take advantage of the new feature, if desired. + +- Downgrade Strategy + - If downgrading the API server, ensure that nodes are downgraded first to avoid rejected `CSINode` update attempts. + - CSI drivers using the `NodeAllocatableUpdatePeriodSeconds` feature should be reconfigured to not use this field before downgrading the API server. + +### Version Skew Strategy + +This enhancement primarily involves changes to the kubelet and the API server, with no impact on the scheduler. Here's how the system will behave in various version skew scenarios: + +- API Server considerations + - Older API server versions will reject updates to the `CSINode.Spec.Drivers[*].Allocatable.Count` field and won't recognize the `NodeAllocatableUpdatePeriodSeconds` field in the `CSIDriver` object. + +- Kubelet version considerations + - Newer kubelet (with this feature) + Older API server: The kubelet will attempt to update the `CSINode.Spec.Drivers[*].Allocatable.Count` field due to capacity failures, but these updates will be rejected by the API server. + - Older kubelet + Newer API server: Volume attachment failures due to capacity issues will not trigger `CSINode` updates during pod construction. + +- Scheduler considerations + - The scheduler is not directly affected by this change and will continue to use the latest `CSINode.Spec.Drivers[*].Allocatable.Count` value for scheduling decisions, regardless of whether it's being updated or not. + +## Production Readiness Review Questionnaire + + + +### Feature Enablement and Rollback + + + +###### How can this feature be enabled / disabled in a live cluster? + + + +- [X] Feature gate (also fill in values in `kep.yaml`) + - Feature gate name: `MutableCSINodeAllocatableCount` + - Components depending on the feature gate: `kube-apiserver`, `kube-controller-manager`, `kubelet`. + +###### Does enabling the feature change any default behavior? + + + +- The `CSINode.Spec.Drivers[*].Allocatable.Count` field becomes mutable and the kubelet will attempt to update this field when a pod fails to enter a ready state +due to a volume attachment failure due to insufficient capacity. + +###### Can the feature be disabled once it has been enabled (i.e. can we roll back the enablement)? + + + +- Yes, the feature can be disabled by turning off the feature gate. + +###### What happens if we reenable the feature if it was previously rolled back? + +- The `CSINode.Spec.Drivers[*].Allocatable.Count` field will become mutable again. + +###### Are there any tests for feature enablement/disablement? + + + +- Yes, unit tests will be implemented to verify the behavior of the `ValidateCSINodeUpdate` function when the feature gate is enabled and disabled. + +### Rollout, Upgrade and Rollback Planning + + + +###### How can a rollout or rollback fail? Can it impact already running workloads? + + + +###### What specific metrics should inform a rollback? + + + +###### Were upgrade and rollback tested? Was the upgrade->downgrade->upgrade path tested? + + + +###### Is the rollout accompanied by any deprecations and/or removals of features, APIs, fields of API types, flags, etc.? + + + +### Monitoring Requirements + + + +###### How can an operator determine if the feature is in use by workloads? + + + +###### How can someone using this feature know that it is working for their instance? + + + +- [ ] Events + - Event Reason: +- [ ] API .status + - Condition name: + - Other field: +- [ ] Other (treat as last resort) + - Details: + +###### What are the reasonable SLOs (Service Level Objectives) for the enhancement? + + + +###### What are the SLIs (Service Level Indicators) an operator can use to determine the health of the service? + + + +- [ ] Metrics + - Metric name: + - [Optional] Aggregation method: + - Components exposing the metric: +- [ ] Other (treat as last resort) + - Details: + +###### Are there any missing metrics that would be useful to have to improve observability of this feature? + + + +### Dependencies + + + +###### Does this feature depend on any specific services running in the cluster? + + + +### Scalability + + + +###### Will enabling / using this feature result in any new API calls? + + + +Yes, there will be new API calls to update the `CSINode` object: + +``` +API call type: PATCH +Estimated throughput: Depends on the `NodeAllocatableUpdatePeriodSeconds` setting and the frequency of volume attachment failures. +Originating component: Kubelet, KCM +``` + +###### Will enabling / using this feature result in introducing new API types? + + + +- No, this feature does not introduce new API types. + +###### Will enabling / using this feature result in any new calls to the cloud provider? + + + +- No, this feature does not introduce new calls to the cloud provider directly. However, CSI drivers may make additional calls to retrieve updated capacity information. + +###### Will enabling / using this feature result in increasing size or count of the existing API objects? + + + +``` +API Object: CSIDriver +Estimated increase in size: New `NodeAllocatableUpdatePeriodSeconds` field (approximately 32 bytes) +Estimated amount of new objects: No new objects, only modification of existing CSIDriver objects +``` + +###### Will enabling / using this feature result in increasing time taken by any operations covered by existing SLIs/SLOs? + + + +- This feature should not impact existing SLIs/SLOs. The `CSINode` updates are asynchronous and should not directly affect pod startup times or API responsiveness. + +###### Will enabling / using this feature result in non-negligible increase of resource usage (CPU, RAM, disk, IO, ...) in any components? + + + +- The feature may result in a slight increase in CPU and network usage on nodes due to periodic `CSINode` updates and more frequent calls to the CSI driver's `NodeGetInfo` RPC. + +###### Can enabling / using this feature result in resource exhaustion of some node resources (PIDs, sockets, inodes, etc.)? + + + +- This feature should not result in resource exhaustion of node resources. The additional goroutine and API calls are minimal and should not significantly impact the node's resources. + +### Troubleshooting + + + +###### How does this feature react if the API server and/or etcd is unavailable? + +###### What are other known failure modes? + + + +###### What steps should be taken if SLOs are not being met to determine the problem? + +## Implementation History + + + +## Drawbacks + + + +## Alternatives + + + +- Implementing a custom scheduler: This approach was rejected for several reasons. + - It would significantly degrade the customer experience, as users would need to deploy and manage an additional component. + - This issue is not a niche use case; it affects a wide range of CSI drivers and cloud providers. + - The default Kubernetes scheduler heavily relies on the `CSINode` allocatable object to make informed decisions about node capacity. Implementing a custom scheduler is arguably workaround solution + that does not address the root cause and inherent limitation of the immutable `CSINode` object today. + +## Infrastructure Needed (Optional) + + diff --git a/keps/sig-storage/4876-mutable-csinode-allocatable/kep.yaml b/keps/sig-storage/4876-mutable-csinode-allocatable/kep.yaml new file mode 100644 index 00000000000..64860c9f010 --- /dev/null +++ b/keps/sig-storage/4876-mutable-csinode-allocatable/kep.yaml @@ -0,0 +1,50 @@ +title: Mutable CSINode Allocatable Property +kep-number: 4876 +authors: + - "@torredil" +owning-sig: sig-storage +participating-sigs: + - sig-api-machinery + - sig-node + - sig-scheduling + - sig-storage +status: provisional +creation-date: 2024-09-24 + +reviewers: + - "@gnuified" + - "@jsafrane" + - "@msau42" + - "@xing-yang" + +approvers: + - "@xing-yang" + - "@msau42" + +# The target maturity stage in the current dev cycle for this KEP. +stage: alpha + +# The most recent milestone for which work toward delivery of this KEP has been +# done. This can be the current (upcoming) milestone, if it is being actively +# worked on. +latest-milestone: "v1.32" + +# The milestone at which this feature was, or is targeted to be, at each stage. +milestone: + alpha: "v1.32" + beta: "v1.33" + stable: "v1.35" + +# The following PRR answers are required at alpha release +# List the feature gate name and the components for which it must be enabled +feature-gates: + - name: MutableCSINode + components: + - kube-apiserver + - kube-controller-manager + - kubelet +disable-supported: true + +# The following PRR answers are required at beta release +#metrics: +# - my_feature_metric