From bfe3225f97c21c998c53fa1b07d1a48a48c31162 Mon Sep 17 00:00:00 2001 From: haijianyang Date: Mon, 29 Jan 2024 06:04:36 -0500 Subject: [PATCH] Add support for expand disk --- PROJECT | 3 + api/v1alpha1/common_types.go | 57 +++ api/v1alpha1/doc.go | 21 + api/v1alpha1/groupversion_info.go | 36 ++ api/v1alpha1/hostconfig_types.go | 109 ++++ api/v1alpha1/hostoperationjob_types.go | 83 +++ api/v1alpha1/method.go | 72 +++ api/v1alpha1/zz_generated.deepcopy.go | 322 ++++++++++++ api/v1beta1/conditions_consts.go | 23 + api/v1beta1/consts.go | 3 + .../bases/kubesmart.smtx.io_hostconfigs.yaml | 153 ++++++ .../kubesmart.smtx.io_hostoperationjobs.yaml | 153 ++++++ config/default/webhookcainjection_patch.yaml | 28 +- config/rbac/role.yaml | 26 + config/webhook/kustomizeconfig.yaml | 14 +- config/webhook/manifests.yaml | 26 + controllers/elfmachine_controller.go | 15 + .../elfmachine_controller_resources.go | 152 ++++++ controllers/elfmachinetemplate_controller.go | 474 ++++++++++++++++++ main.go | 16 +- pkg/cloudinit/cloudinit.go | 32 ++ pkg/cloudinit/expand_root_partition | 5 + pkg/context/machine_template_context.go | 42 ++ pkg/hostagent/service.go | 69 +++ .../tasks/expand_root_partition.yaml | 14 + pkg/hostagent/tasks/tasks.go | 23 + pkg/manager/manager.go | 2 + pkg/service/errors.go | 5 + pkg/service/mock_services/vm_mock.go | 45 ++ pkg/service/util.go | 10 + pkg/service/vm.go | 76 ++- pkg/util/annotations/helpers.go | 29 ++ pkg/util/machine/machine.go | 67 +++ pkg/util/machine/md.go | 40 ++ pkg/util/md/md.go | 95 ++++ pkg/util/md/md_test.go | 160 ++++++ test/helpers/envtest.go | 4 + webhooks/elfmachinetemplate_webhook.go | 90 ++++ webhooks/elfmachinetemplate_webhook_test.go | 123 +++++ webhooks/util.go | 35 ++ 40 files changed, 2709 insertions(+), 43 deletions(-) create mode 100644 api/v1alpha1/common_types.go create mode 100644 api/v1alpha1/doc.go create mode 100644 api/v1alpha1/groupversion_info.go create mode 100644 api/v1alpha1/hostconfig_types.go create mode 100644 api/v1alpha1/hostoperationjob_types.go create mode 100644 api/v1alpha1/method.go create mode 100644 api/v1alpha1/zz_generated.deepcopy.go create mode 100644 config/crd/bases/kubesmart.smtx.io_hostconfigs.yaml create mode 100644 config/crd/bases/kubesmart.smtx.io_hostoperationjobs.yaml create mode 100644 controllers/elfmachine_controller_resources.go create mode 100644 controllers/elfmachinetemplate_controller.go create mode 100644 pkg/cloudinit/cloudinit.go create mode 100644 pkg/cloudinit/expand_root_partition create mode 100644 pkg/context/machine_template_context.go create mode 100644 pkg/hostagent/service.go create mode 100644 pkg/hostagent/tasks/expand_root_partition.yaml create mode 100644 pkg/hostagent/tasks/tasks.go create mode 100644 pkg/util/md/md.go create mode 100644 pkg/util/md/md_test.go create mode 100644 webhooks/elfmachinetemplate_webhook.go create mode 100644 webhooks/elfmachinetemplate_webhook_test.go create mode 100644 webhooks/util.go diff --git a/PROJECT b/PROJECT index 8263dd52..7b1d96aa 100644 --- a/PROJECT +++ b/PROJECT @@ -39,4 +39,7 @@ resources: kind: ElfMachineTemplate path: github.com/smartxworks/cluster-api-provider-elf/api/v1beta1 version: v1beta1 + webhooks: + validation: true + webhookVersion: v1 version: "3" diff --git a/api/v1alpha1/common_types.go b/api/v1alpha1/common_types.go new file mode 100644 index 00000000..c49ed0b3 --- /dev/null +++ b/api/v1alpha1/common_types.go @@ -0,0 +1,57 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" +) + +type Phase string + +// xx. +const ( + PhaseInitializing = Phase("Initializing") + PhaseProcessing = Phase("Processing") + PhaseSucceeded = Phase("Succeeded") + PhaseFailed = Phase("Failed") +) + +type Ansible struct { + // RemotePlaybook 在远端的 playbook,单个 .tar.gz 压缩包,内容可以是单个 yaml 文件,也可以符合 ansible 要求的目录 + RemotePlaybook *RemotePlaybook `json:"remotePlaybook,omitempty"` + // LocalPlaybook 本地的 playbook,单个 yaml 文件, secret 引用或者 yaml 字符串 + LocalPlaybook *YAMLText `json:"localPlaybook,omitempty"` + // Values 执行 playbook 的参数,yaml 格式,可以是 secret 引用或者 yaml 字符串 + Values *YAMLText `json:"values,omitempty"` +} + +type RemotePlaybook struct { + // URL playbook 在远端的地址,支持 https + URL string `json:"url"` + // Name 要执行的 playbook 文件名,相对于压缩包顶层的位置 + Name string `json:"name"` + // MD5sum 压缩包的 MD5,填写了会进行校验,已经下载过的 playbook 校验通过后跳过重复下载 + MD5sum string `json:"md5sum,omitempty"` +} + +type YAMLText struct { + // SecretRef specifies the secret which stores yaml text. + SecretRef *corev1.SecretReference `json:"secretRef,omitempty"` + // Content is the inline yaml text. + //+kubebuilder:validation:Format=yaml + Content string `json:"content,omitempty"` +} diff --git a/api/v1alpha1/doc.go b/api/v1alpha1/doc.go new file mode 100644 index 00000000..282add15 --- /dev/null +++ b/api/v1alpha1/doc.go @@ -0,0 +1,21 @@ +/* +Copyright 2023 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +k8s:deepcopy-gen=package,register +// +k8s:defaulter-gen=TypeMeta +// +groupName=kubesmart.smtx.io + +package v1alpha1 diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go new file mode 100644 index 00000000..a25d0fcd --- /dev/null +++ b/api/v1alpha1/groupversion_info.go @@ -0,0 +1,36 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1alpha1 contains API Schema definitions for the kubesmart v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=kubesmart.smtx.io +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects. + GroupVersion = schema.GroupVersion{Group: "kubesmart.smtx.io", Version: "v1alpha1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme. + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/api/v1alpha1/hostconfig_types.go b/api/v1alpha1/hostconfig_types.go new file mode 100644 index 00000000..c537c419 --- /dev/null +++ b/api/v1alpha1/hostconfig_types.go @@ -0,0 +1,109 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + // HostConfigFinalizer is set on PrepareForCreate callback. + HostConfigFinalizer = "hostconfig.kubesmart.smtx.io" + + // HostConfigReRunAnnotation 表示重新执行. + HostConfigReRunAnnotation = "hostconfig.kubesmart.smtx.io/re-run" + + // HostConfigConfigHashAnnotation 记录 spec.config 哈希值的 annotation,当实际计算的哈希值与记录的不同时,代表配置有变更,需要重新执行. + HostConfigConfigHashAnnotation = "hostconfig.kubesmart.smtx.io/config-hash" + + // HostConfigNodeNameLabel 表示属于哪个节点,如果长度长度超过 63,会用哈希值代替. + HostConfigNodeNameLabel = "hostconfig.kubesmart.smtx.io/node-name" +) + +type HostConfigSpec struct { + NodeName string `json:"nodeName"` + Config Config `json:"config"` +} + +type Config struct { + // Ansible 通过 ansible playbook 完成配置 + Ansible *Ansible `json:"ansible,omitempty"` + // Timeout 执行一次配置的超时时间 + Timeout metav1.Duration `json:"timeout,omitempty"` +} + +type HostConfigStatus struct { + // Phase 当前状态 + Phase Phase `json:"phase"` + FailureReason string `json:"failureReason,omitempty"` + FailureMessage string `json:"failureMessage,omitempty"` + // LastExecutionTime 最后执行的时间戳 + LastExecutionTime *metav1.Time `json:"lastExecutionTime,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:resource:path=hostconfigs,scope=Namespaced,categories=kubesmart,shortName=hc +// +kubebuilder:storageversion +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="the current phase of HostConfig" +// +kubebuilder:printcolumn:name="LastExecutionTime",type="string",JSONPath=".status.lastExecutionTime",description="the last execution time" +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time duration since creation of HostConfig" + +// HostConfig is the Schema for the HostConfig API. +type HostConfig struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec HostConfigSpec `json:"spec,omitempty"` + Status HostConfigStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// HostConfigList contains a list of HostConfig. +type HostConfigList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []HostConfig `json:"items"` +} + +func init() { + SchemeBuilder.Register(&HostConfig{}, &HostConfigList{}) +} + +// IsConfigStable returns true if the hash of current config is equal to the hash in annotation. +func (in *HostConfig) IsConfigStable() bool { + annotationHash := GetAnnotation(in, HostConfigConfigHashAnnotation) + currentHash := CalculateHash(in.Spec.Config) + return annotationHash == currentHash +} + +func GetAnnotation(obj metav1.Object, key string) string { + annotations := obj.GetAnnotations() + value, _ := annotations[key] //nolint:gosimple + return value +} + +func CalculateHash(data interface{}) string { + b, _ := json.Marshal(data) //nolint:errchkjson + hashDate := sha256.Sum256(b) + return hex.EncodeToString(hashDate[:]) +} diff --git a/api/v1alpha1/hostoperationjob_types.go b/api/v1alpha1/hostoperationjob_types.go new file mode 100644 index 00000000..7548fd5a --- /dev/null +++ b/api/v1alpha1/hostoperationjob_types.go @@ -0,0 +1,83 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + // HostOperationJobFinalizer is set on PrepareForCreate callback. + HostOperationJobFinalizer = "hostoperationjob.kubesmart.smtx.io" + + // HostOperationJobReRunAnnotation 表示重新执行. + HostOperationJobReRunAnnotation = "hostoperationjob.kubesmart.smtx.io/re-run" + + // HostOperationJobNodeNameLabel 表示属于哪个节点,如果长度长度超过 63,会用哈希值代替。 + HostOperationJobNodeNameLabel = "hostoperationjob.kubesmart.smtx.io/node-name" +) + +type HostOperationJobSpec struct { + NodeName string `json:"nodeName"` + Operation Operation `json:"operation"` +} + +type Operation struct { + // Ansible 通过 ansible playbook 完成操作 + Ansible *Ansible `json:"ansible,omitempty"` + // Timeout 执行一次操作的超时时间 + Timeout metav1.Duration `json:"timeout,omitempty"` +} + +type HostOperationJobStatus struct { + // Phase 当前阶段 + Phase Phase `json:"phase"` + FailureReason string `json:"failureReason,omitempty"` + FailureMessage string `json:"failureMessage,omitempty"` + // LastExecutionTime 最后执行的时间戳 + LastExecutionTime *metav1.Time `json:"lastExecutionTime,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:resource:path=hostoperationjobs,scope=Namespaced,categories=kubesmart,shortName=hoj +// +kubebuilder:storageversion +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="the current phase of HostOperationJob" +// +kubebuilder:printcolumn:name="LastExecutionTime",type="string",JSONPath=".status.lastExecutionTime",description="the last execution time" +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time duration since creation of HostOperationJob" + +// HostOperationJob is the Schema for the HostOperationJob API. +type HostOperationJob struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec HostOperationJobSpec `json:"spec,omitempty"` + Status HostOperationJobStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// HostOperationJobList contains a list of HostOperationJob. +type HostOperationJobList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []HostOperationJob `json:"items"` +} + +func init() { + SchemeBuilder.Register(&HostOperationJob{}, &HostOperationJobList{}) +} diff --git a/api/v1alpha1/method.go b/api/v1alpha1/method.go new file mode 100644 index 00000000..0f1e2c11 --- /dev/null +++ b/api/v1alpha1/method.go @@ -0,0 +1,72 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apitypes "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + valuesYamlKey = "values.yaml" +) + +func (in *YAMLText) IsEmpty() bool { + if in == nil { + return true + } + if in.SecretRef != nil && in.SecretRef.Name == "" && in.Content == "" { + return true + } + return in.SecretRef == nil && in.Content == "" +} + +func (in *YAMLText) getAddonSecretNamespacedName(defaultNamespace string) (apitypes.NamespacedName, bool) { + if in.SecretRef != nil && in.SecretRef.Name != "" { + result := apitypes.NamespacedName{ + Namespace: in.SecretRef.Namespace, + Name: in.SecretRef.Name, + } + if result.Namespace == "" { + result.Namespace = defaultNamespace + } + return result, true + } + return apitypes.NamespacedName{}, false +} + +// GetValuesYaml 优先使用secret,如果不存在尝试使用yaml inline. +func (in *YAMLText) GetValuesYaml(ctx context.Context, c client.Client, defaultNamespace string) (string, error) { + if in.IsEmpty() { + return "", nil + } + secretKey, ok := in.getAddonSecretNamespacedName(defaultNamespace) + if ok { + var err error + secret := &corev1.Secret{} + if err = c.Get(ctx, secretKey, secret); err != nil && !apierrors.IsNotFound(err) { + return "", err + } + return string(secret.Data[valuesYamlKey]), nil + } else { + return in.Content, nil + } +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 00000000..19c0fa59 --- /dev/null +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,322 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/api/core/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Ansible) DeepCopyInto(out *Ansible) { + *out = *in + if in.RemotePlaybook != nil { + in, out := &in.RemotePlaybook, &out.RemotePlaybook + *out = new(RemotePlaybook) + **out = **in + } + if in.LocalPlaybook != nil { + in, out := &in.LocalPlaybook, &out.LocalPlaybook + *out = new(YAMLText) + (*in).DeepCopyInto(*out) + } + if in.Values != nil { + in, out := &in.Values, &out.Values + *out = new(YAMLText) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Ansible. +func (in *Ansible) DeepCopy() *Ansible { + if in == nil { + return nil + } + out := new(Ansible) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Config) DeepCopyInto(out *Config) { + *out = *in + if in.Ansible != nil { + in, out := &in.Ansible, &out.Ansible + *out = new(Ansible) + (*in).DeepCopyInto(*out) + } + out.Timeout = in.Timeout +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config. +func (in *Config) DeepCopy() *Config { + if in == nil { + return nil + } + out := new(Config) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostConfig) DeepCopyInto(out *HostConfig) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostConfig. +func (in *HostConfig) DeepCopy() *HostConfig { + if in == nil { + return nil + } + out := new(HostConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *HostConfig) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostConfigList) DeepCopyInto(out *HostConfigList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]HostConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostConfigList. +func (in *HostConfigList) DeepCopy() *HostConfigList { + if in == nil { + return nil + } + out := new(HostConfigList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *HostConfigList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostConfigSpec) DeepCopyInto(out *HostConfigSpec) { + *out = *in + in.Config.DeepCopyInto(&out.Config) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostConfigSpec. +func (in *HostConfigSpec) DeepCopy() *HostConfigSpec { + if in == nil { + return nil + } + out := new(HostConfigSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostConfigStatus) DeepCopyInto(out *HostConfigStatus) { + *out = *in + if in.LastExecutionTime != nil { + in, out := &in.LastExecutionTime, &out.LastExecutionTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostConfigStatus. +func (in *HostConfigStatus) DeepCopy() *HostConfigStatus { + if in == nil { + return nil + } + out := new(HostConfigStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostOperationJob) DeepCopyInto(out *HostOperationJob) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostOperationJob. +func (in *HostOperationJob) DeepCopy() *HostOperationJob { + if in == nil { + return nil + } + out := new(HostOperationJob) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *HostOperationJob) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostOperationJobList) DeepCopyInto(out *HostOperationJobList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]HostOperationJob, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostOperationJobList. +func (in *HostOperationJobList) DeepCopy() *HostOperationJobList { + if in == nil { + return nil + } + out := new(HostOperationJobList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *HostOperationJobList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostOperationJobSpec) DeepCopyInto(out *HostOperationJobSpec) { + *out = *in + in.Operation.DeepCopyInto(&out.Operation) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostOperationJobSpec. +func (in *HostOperationJobSpec) DeepCopy() *HostOperationJobSpec { + if in == nil { + return nil + } + out := new(HostOperationJobSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostOperationJobStatus) DeepCopyInto(out *HostOperationJobStatus) { + *out = *in + if in.LastExecutionTime != nil { + in, out := &in.LastExecutionTime, &out.LastExecutionTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostOperationJobStatus. +func (in *HostOperationJobStatus) DeepCopy() *HostOperationJobStatus { + if in == nil { + return nil + } + out := new(HostOperationJobStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Operation) DeepCopyInto(out *Operation) { + *out = *in + if in.Ansible != nil { + in, out := &in.Ansible, &out.Ansible + *out = new(Ansible) + (*in).DeepCopyInto(*out) + } + out.Timeout = in.Timeout +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Operation. +func (in *Operation) DeepCopy() *Operation { + if in == nil { + return nil + } + out := new(Operation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RemotePlaybook) DeepCopyInto(out *RemotePlaybook) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemotePlaybook. +func (in *RemotePlaybook) DeepCopy() *RemotePlaybook { + if in == nil { + return nil + } + out := new(RemotePlaybook) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *YAMLText) DeepCopyInto(out *YAMLText) { + *out = *in + if in.SecretRef != nil { + in, out := &in.SecretRef, &out.SecretRef + *out = new(v1.SecretReference) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new YAMLText. +func (in *YAMLText) DeepCopy() *YAMLText { + if in == nil { + return nil + } + out := new(YAMLText) + in.DeepCopyInto(out) + return out +} diff --git a/api/v1beta1/conditions_consts.go b/api/v1beta1/conditions_consts.go index ef75b629..1d04c897 100644 --- a/api/v1beta1/conditions_consts.go +++ b/api/v1beta1/conditions_consts.go @@ -117,6 +117,29 @@ const ( // WaitingForAvailableHostWithEnoughGPUsReason (Severity=Info) documents an ElfMachine // waiting for an available host with enough GPUs to create VM. WaitingForAvailableHostWithEnoughGPUsReason = "WaitingForAvailableHostWithEnoughGPUs" + + // ResourcesHotUpdatedCondition documents the status of the hot updating resources of a VM. + ResourcesHotUpdatedCondition = "ResourceHotUpdated" + + // WaitingForResourcesHotUpdateReason (Severity=Info) documents an ElfMachine waiting for updating resources. + WaitingForResourcesHotUpdateReason = "WaitingForResourcesHotUpdate" + + // ExpandingVMDiskReason documents (Severity=Info) ElfMachine currently executing the expand disk operation. + ExpandingVMDiskReason = "ExpandingVMDisk" + + // ExpandingVMDiskFailedReason (Severity=Warning) documents an ElfMachine controller detecting + // an error while expanding disk; those kind of errors are usually transient and failed updating + // are automatically re-tried by the controller. + ExpandingVMDiskFailedReason = "ExpandingVMDiskFailed" + + // ExpandingRootPartitionReason documents (Severity=Info) ElfMachine currently executing the + // adding new disk capacity to root directory operation. + ExpandingRootPartitionReason = "ExpandingRootPartition" + + // ExpandingRootPartitionFailedReason (Severity=Warning) documents an ElfMachine controller + // detecting an error while adding new disk capacity to root directory; those kind of errors are + // usually transient and failed updating are automatically re-tried by the controller. + ExpandingRootPartitionFailedReason = "ExpandingRootPartitionFailed" ) // Conditions and Reasons related to make connections to a Tower. Can currently be used by ElfCluster and ElfMachine diff --git a/api/v1beta1/consts.go b/api/v1beta1/consts.go index 9848dfcc..769b1d02 100644 --- a/api/v1beta1/consts.go +++ b/api/v1beta1/consts.go @@ -40,6 +40,9 @@ const ( // 1. ${Tower username}@${Tower auth_config_id}, e.g. caas.smartx@7e98ecbb-779e-43f6-8330-1bc1d29fffc7. // 2. ${Tower username}, e.g. root. If auth_config_id is not set, it means it is a LOCAL user. CreatedByAnnotation = "cape.infrastructure.cluster.x-k8s.io/created-by" + + // HostAgentJobNameAnnotation is the annotation identifying the name of HostOperationJob. + HostAgentJobNameAnnotation = "cape.infrastructure.cluster.x-k8s.io/host-agent-job-name" ) // Labels. diff --git a/config/crd/bases/kubesmart.smtx.io_hostconfigs.yaml b/config/crd/bases/kubesmart.smtx.io_hostconfigs.yaml new file mode 100644 index 00000000..cceab7c5 --- /dev/null +++ b/config/crd/bases/kubesmart.smtx.io_hostconfigs.yaml @@ -0,0 +1,153 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.12.0 + name: hostconfigs.kubesmart.smtx.io +spec: + group: kubesmart.smtx.io + names: + categories: + - kubesmart + kind: HostConfig + listKind: HostConfigList + plural: hostconfigs + shortNames: + - hc + singular: hostconfig + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: the current phase of HostConfig + jsonPath: .status.phase + name: Phase + type: string + - description: the last execution time + jsonPath: .status.lastExecutionTime + name: LastExecutionTime + type: string + - description: Time duration since creation of HostConfig + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: HostConfig is the Schema for the HostConfig API. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + properties: + config: + properties: + ansible: + description: Ansible 通过 ansible playbook 完成配置 + properties: + localPlaybook: + description: LocalPlaybook 本地的 playbook,单个 yaml 文件, secret + 引用或者 yaml 字符串 + properties: + content: + description: Content is the inline yaml text. + format: yaml + type: string + secretRef: + description: SecretRef specifies the secret which stores + yaml text. + properties: + name: + description: name is unique within a namespace to + reference a secret resource. + type: string + namespace: + description: namespace defines the space within which + the secret name must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + type: object + remotePlaybook: + description: RemotePlaybook 在远端的 playbook,单个 .tar.gz 压缩包,内容可以是单个 + yaml 文件,也可以符合 ansible 要求的目录 + properties: + md5sum: + description: MD5sum 压缩包的 MD5,填写了会进行校验,已经下载过的 playbook + 校验通过后跳过重复下载 + type: string + name: + description: Name 要执行的 playbook 文件名,相对于压缩包顶层的位置 + type: string + url: + description: URL playbook 在远端的地址,支持 https + type: string + required: + - name + - url + type: object + values: + description: Values 执行 playbook 的参数,yaml 格式,可以是 secret 引用或者 + yaml 字符串 + properties: + content: + description: Content is the inline yaml text. + format: yaml + type: string + secretRef: + description: SecretRef specifies the secret which stores + yaml text. + properties: + name: + description: name is unique within a namespace to + reference a secret resource. + type: string + namespace: + description: namespace defines the space within which + the secret name must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + type: object + type: object + timeout: + description: Timeout 执行一次配置的超时时间 + type: string + type: object + nodeName: + type: string + required: + - config + - nodeName + type: object + status: + properties: + failureMessage: + type: string + failureReason: + type: string + lastExecutionTime: + description: LastExecutionTime 最后执行的时间戳 + format: date-time + type: string + phase: + description: Phase 当前状态 + type: string + required: + - phase + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/kubesmart.smtx.io_hostoperationjobs.yaml b/config/crd/bases/kubesmart.smtx.io_hostoperationjobs.yaml new file mode 100644 index 00000000..b177299d --- /dev/null +++ b/config/crd/bases/kubesmart.smtx.io_hostoperationjobs.yaml @@ -0,0 +1,153 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.12.0 + name: hostoperationjobs.kubesmart.smtx.io +spec: + group: kubesmart.smtx.io + names: + categories: + - kubesmart + kind: HostOperationJob + listKind: HostOperationJobList + plural: hostoperationjobs + shortNames: + - hoj + singular: hostoperationjob + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: the current phase of HostOperationJob + jsonPath: .status.phase + name: Phase + type: string + - description: the last execution time + jsonPath: .status.lastExecutionTime + name: LastExecutionTime + type: string + - description: Time duration since creation of HostOperationJob + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: HostOperationJob is the Schema for the HostOperationJob API. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + properties: + nodeName: + type: string + operation: + properties: + ansible: + description: Ansible 通过 ansible playbook 完成操作 + properties: + localPlaybook: + description: LocalPlaybook 本地的 playbook,单个 yaml 文件, secret + 引用或者 yaml 字符串 + properties: + content: + description: Content is the inline yaml text. + format: yaml + type: string + secretRef: + description: SecretRef specifies the secret which stores + yaml text. + properties: + name: + description: name is unique within a namespace to + reference a secret resource. + type: string + namespace: + description: namespace defines the space within which + the secret name must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + type: object + remotePlaybook: + description: RemotePlaybook 在远端的 playbook,单个 .tar.gz 压缩包,内容可以是单个 + yaml 文件,也可以符合 ansible 要求的目录 + properties: + md5sum: + description: MD5sum 压缩包的 MD5,填写了会进行校验,已经下载过的 playbook + 校验通过后跳过重复下载 + type: string + name: + description: Name 要执行的 playbook 文件名,相对于压缩包顶层的位置 + type: string + url: + description: URL playbook 在远端的地址,支持 https + type: string + required: + - name + - url + type: object + values: + description: Values 执行 playbook 的参数,yaml 格式,可以是 secret 引用或者 + yaml 字符串 + properties: + content: + description: Content is the inline yaml text. + format: yaml + type: string + secretRef: + description: SecretRef specifies the secret which stores + yaml text. + properties: + name: + description: name is unique within a namespace to + reference a secret resource. + type: string + namespace: + description: namespace defines the space within which + the secret name must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + type: object + type: object + timeout: + description: Timeout 执行一次操作的超时时间 + type: string + type: object + required: + - nodeName + - operation + type: object + status: + properties: + failureMessage: + type: string + failureReason: + type: string + lastExecutionTime: + description: LastExecutionTime 最后执行的时间戳 + format: date-time + type: string + phase: + description: Phase 当前阶段 + type: string + required: + - phase + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml index e334e162..f5651701 100644 --- a/config/default/webhookcainjection_patch.yaml +++ b/config/default/webhookcainjection_patch.yaml @@ -13,17 +13,17 @@ metadata: name: mutating-webhook-configuration annotations: cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) -#! --- -#! apiVersion: admissionregistration.k8s.io/v1 -#! kind: ValidatingWebhookConfiguration -#! metadata: -#! labels: -#! app.kubernetes.io/name: validatingwebhookconfiguration -#! app.kubernetes.io/instance: validating-webhook-configuration -#! app.kubernetes.io/component: webhook -#! app.kubernetes.io/created-by: cluster-api-provider-elf -#! app.kubernetes.io/part-of: cluster-api-provider-elf -#! app.kubernetes.io/managed-by: kustomize -#! name: validating-webhook-configuration -#! annotations: -#! cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + labels: + app.kubernetes.io/name: validatingwebhookconfiguration + app.kubernetes.io/instance: validating-webhook-configuration + app.kubernetes.io/component: webhook + app.kubernetes.io/created-by: cluster-api-provider-elf + app.kubernetes.io/part-of: cluster-api-provider-elf + app.kubernetes.io/managed-by: kustomize + name: validating-webhook-configuration + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 30280427..f6a7881d 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -129,3 +129,29 @@ rules: - get - patch - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - elfmachinetemplates + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - elfmachinetemplates/finalizers + verbs: + - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - elfmachinetemplates/status + verbs: + - get + - patch + - update diff --git a/config/webhook/kustomizeconfig.yaml b/config/webhook/kustomizeconfig.yaml index 310c4817..25e21e3c 100644 --- a/config/webhook/kustomizeconfig.yaml +++ b/config/webhook/kustomizeconfig.yaml @@ -7,19 +7,19 @@ nameReference: - kind: MutatingWebhookConfiguration group: admissionregistration.k8s.io path: webhooks/clientConfig/service/name - #! - kind: ValidatingWebhookConfiguration - #! group: admissionregistration.k8s.io - #! path: webhooks/clientConfig/service/name + - kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name namespace: - kind: MutatingWebhookConfiguration group: admissionregistration.k8s.io path: webhooks/clientConfig/service/namespace create: true -#! - kind: ValidatingWebhookConfiguration -#! group: admissionregistration.k8s.io -#! path: webhooks/clientConfig/service/namespace -#! create: true +- kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true varReference: - path: metadata/annotations diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index bbab1a91..4e46ef1a 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -23,3 +23,29 @@ webhooks: resources: - elfmachines sideEffects: None +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: validating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /validate-infrastructure-cluster-x-k8s-io-v1beta1-elfmachinetemplate + failurePolicy: Fail + name: validation.elfmachinetemplate.infrastructure.x-k8s.io + rules: + - apiGroups: + - infrastructure.cluster.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - elfmachinetemplates + sideEffects: None diff --git a/controllers/elfmachine_controller.go b/controllers/elfmachine_controller.go index 29bfc762..bf127f4c 100644 --- a/controllers/elfmachine_controller.go +++ b/controllers/elfmachine_controller.go @@ -48,6 +48,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" + "github.com/smartxworks/cluster-api-provider-elf/pkg/cloudinit" "github.com/smartxworks/cluster-api-provider-elf/pkg/config" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" capeerrors "github.com/smartxworks/cluster-api-provider-elf/pkg/errors" @@ -211,6 +212,7 @@ func (r *ElfMachineReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (_ conditions.SetSummary(machineContext.ElfMachine, conditions.WithConditions( infrav1.VMProvisionedCondition, + infrav1.ResourcesHotUpdatedCondition, infrav1.TowerAvailableCondition, ), ) @@ -514,6 +516,7 @@ func (r *ElfMachineReconciler) reconcileVM(ctx *context.MachineContext) (*models if bootstrapData == "" { return nil, false, errors.New("bootstrapData is empty") } + bootstrapData = cloudinit.JoinExpandRootPartitionCommandsToCloudinit(bootstrapData) if ok, message, err := isELFScheduleVMErrorRecorded(ctx); err != nil { return nil, false, err @@ -646,6 +649,10 @@ func (r *ElfMachineReconciler) reconcileVM(ctx *context.MachineContext) (*models return vm, false, err } + if ok, err := r.reconcileVMResources(ctx, vm); err != nil || !ok { + return vm, false, err + } + return vm, true, nil } @@ -733,6 +740,14 @@ func (r *ElfMachineReconciler) reconcileVMStatus(ctx *context.MachineContext, vm return false, r.updateVM(ctx, vm) } + // Before the virtual machine is started for the first time, if the + // current disk capacity of the virtual machine is smaller than expected, + // expand the disk capacity first and then start it. cloud-init will + // add the new disk capacity to root. + if ok, err := r.reconcieVMVolume(ctx, vm, infrav1.VMProvisionedCondition); err != nil || !ok { + return ok, err + } + return false, r.powerOnVM(ctx, vm) case models.VMStatusSUSPENDED: // In some abnormal conditions, the VM will be in a suspended state, diff --git a/controllers/elfmachine_controller_resources.go b/controllers/elfmachine_controller_resources.go new file mode 100644 index 00000000..c36efe1a --- /dev/null +++ b/controllers/elfmachine_controller_resources.go @@ -0,0 +1,152 @@ +/* +Copyright 2024. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "fmt" + + "github.com/pkg/errors" + "github.com/smartxworks/cloudtower-go-sdk/v2/models" + apierrors "k8s.io/apimachinery/pkg/api/errors" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + capiremote "sigs.k8s.io/cluster-api/controllers/remote" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/controller-runtime/pkg/client" + + agentv1 "github.com/smartxworks/cluster-api-provider-elf/api/v1alpha1" + infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" + "github.com/smartxworks/cluster-api-provider-elf/pkg/context" + "github.com/smartxworks/cluster-api-provider-elf/pkg/hostagent" + "github.com/smartxworks/cluster-api-provider-elf/pkg/service" + annotationsutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/annotations" + machineutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/machine" +) + +func (r *ElfMachineReconciler) reconcileVMResources(ctx *context.MachineContext, vm *models.VM) (bool, error) { + if !machineutil.IsUpdatingElfMachineResources(ctx.ElfMachine) { + return true, nil + } + + if ok, err := r.reconcieVMVolume(ctx, vm, infrav1.ResourcesHotUpdatedCondition); err != nil || !ok { + return ok, err + } + + // Agent needs to wait for the node exists before it can run and execute commands. + if ctx.Machine.Status.Phase != string(clusterv1.MachinePhaseRunning) { + ctx.Logger.Info("Waiting for node exists for host agent running", "phase", ctx.Machine.Status.Phase) + + return false, nil + } + + kubeClient, err := capiremote.NewClusterClient(ctx, "", ctx.Client, client.ObjectKey{Namespace: ctx.Cluster.Namespace, Name: ctx.Cluster.Name}) + if err != nil { + return false, err + } + + var agentJob *agentv1.HostOperationJob + agentJobName := annotationsutil.HostAgentJobName(ctx.ElfMachine) + if agentJobName != "" { + agentJob, err = hostagent.GetHostJob(ctx, kubeClient, ctx.ElfMachine.Namespace, agentJobName) + if err != nil && !apierrors.IsNotFound(err) { + return false, err + } + } + if agentJob == nil { + agentJob, err = hostagent.AddNewDiskCapacityToRoot(ctx, kubeClient, ctx.ElfMachine) + if err != nil { + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityInfo, err.Error()) + + return false, err + } + + annotationsutil.AddAnnotations(ctx.ElfMachine, map[string]string{infrav1.HostAgentJobNameAnnotation: agentJob.Name}) + + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionReason, clusterv1.ConditionSeverityInfo, "") + + ctx.Logger.Info("Waiting for disk to be added new disk capacity to root", "hostAgentJob", agentJob.Name) + + return false, nil + } + + switch agentJob.Status.Phase { + case agentv1.PhaseSucceeded: + annotationsutil.RemoveAnnotation(ctx.ElfMachine, infrav1.HostAgentJobNameAnnotation) + conditions.MarkTrue(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) + ctx.Logger.Info("Add new disk capacity to root succeeded", "hostAgentJob", agentJob.Name) + case agentv1.PhaseFailed: + annotationsutil.RemoveAnnotation(ctx.ElfMachine, infrav1.HostAgentJobNameAnnotation) + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityWarning, agentJob.Status.FailureMessage) + ctx.Logger.Info("Add new disk capacity to root failed, will try again", "hostAgentJob", agentJob.Name) + + return false, nil + default: + ctx.Logger.Info("Waiting for adding new disk capacity to root job done", "jobStatus", agentJob.Status.Phase) + + return false, nil + } + + return true, nil +} + +// reconcieVMVolume ensures that the vm disk size is as expected. +func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm *models.VM, conditionType clusterv1.ConditionType) (bool, error) { + vmDiskIDs := make([]string, len(vm.VMDisks)) + for i := 0; i < len(vm.VMDisks); i++ { + vmDiskIDs[i] = *vm.VMDisks[i].ID + } + + vmDisks, err := ctx.VMService.GetVMDisks(vmDiskIDs) + if err != nil { + return false, errors.Errorf("no disks found for vm %s/%s", *vm.ID, *vm.Name) + } + + vmVolume, err := ctx.VMService.GetVMVolume(*vmDisks[0].VMVolume.ID) + if err != nil { + return false, err + } + + diskSize := service.TowerDisk(ctx.ElfMachine.Spec.DiskGiB) + if *diskSize > *vmVolume.Size { + if service.IsTowerResourcePerformingAnOperation(vmVolume.EntityAsyncStatus) { + ctx.Logger.Info("Waiting for vm volume task done", "volume", fmt.Sprintf("%s/%s", *vmVolume.ID, *vmVolume.Name)) + + return false, nil + } + + return false, r.resizeVMVolume(ctx, vmVolume, *diskSize, conditionType) + } else if *diskSize < *vmVolume.Size { + conditions.MarkTrue(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) + ctx.Logger.Info(fmt.Sprintf("Current disk capacity is larger than expected, skipping expand vm volume %s/%s", *vmVolume.ID, *vmVolume.Name), "currentSize", *vmVolume.Size, "expectedSize", *diskSize) + } + + return true, nil +} + +// resizeVMVolume sets the volume to the specified size. +func (r *ElfMachineReconciler) resizeVMVolume(ctx *context.MachineContext, vmVolume *models.VMVolume, diskSize int64, conditionType clusterv1.ConditionType) error { + withTaskVMVolume, err := ctx.VMService.ResizeVMVolume(*vmVolume.ID, diskSize) + if err != nil { + conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskReason, clusterv1.ConditionSeverityWarning, err.Error()) + + return errors.Wrapf(err, "failed to trigger expand size from %d to %d for vm volume %s/%s", *vmVolume.Size, diskSize, *vmVolume.ID, *vmVolume.Name) + } + + conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskFailedReason, clusterv1.ConditionSeverityInfo, "") + + ctx.ElfMachine.SetTask(*withTaskVMVolume.TaskID) + + ctx.Logger.Info(fmt.Sprintf("Waiting for the vm volume %s/%s to be expanded", *vmVolume.ID, *vmVolume.Name), "taskRef", ctx.ElfMachine.Status.TaskRef, "oldSize", *vmVolume.Size, "newSize", diskSize) + + return nil +} diff --git a/controllers/elfmachinetemplate_controller.go b/controllers/elfmachinetemplate_controller.go new file mode 100644 index 00000000..e52139ed --- /dev/null +++ b/controllers/elfmachinetemplate_controller.go @@ -0,0 +1,474 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + goctx "context" + "fmt" + "reflect" + "strings" + + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apitypes "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" + capiutil "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/collections" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + ctrlmgr "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" + "github.com/smartxworks/cluster-api-provider-elf/pkg/config" + "github.com/smartxworks/cluster-api-provider-elf/pkg/context" + "github.com/smartxworks/cluster-api-provider-elf/pkg/service" + kcputil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/kcp" + machineutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/machine" + mdutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/md" +) + +// ElfMachineTemplateReconciler reconciles a ElfMachineTemplate object. +type ElfMachineTemplateReconciler struct { + *context.ControllerContext + NewVMService service.NewVMServiceFunc +} + +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=elfmachinetemplates,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=elfmachinetemplates/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=elfmachinetemplates/finalizers,verbs=update + +// AddMachineTemplateControllerToManager adds the ElfMachineTemplate controller to the provided +// manager. +func AddMachineTemplateControllerToManager(ctx *context.ControllerManagerContext, mgr ctrlmgr.Manager, options controller.Options) error { + var ( + controlledType = &infrav1.ElfMachineTemplate{} + controlledTypeName = reflect.TypeOf(controlledType).Elem().Name() + + controllerNameShort = fmt.Sprintf("%s-controller", strings.ToLower(controlledTypeName)) + ) + + // Build the controller context. + controllerContext := &context.ControllerContext{ + ControllerManagerContext: ctx, + Name: controllerNameShort, + Logger: ctx.Logger.WithName(controllerNameShort), + } + + reconciler := &ElfMachineTemplateReconciler{ + ControllerContext: controllerContext, + NewVMService: service.NewVMService, + } + + return ctrl.NewControllerManagedBy(mgr). + // Watch the controlled, infrastructure resource. + For(controlledType). + WithOptions(options). + // WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), ctx.WatchFilterValue)). + Complete(reconciler) +} + +func (r *ElfMachineTemplateReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + // Get the ElfMachineTemplate resource for this request. + var elfMachineTemplate infrav1.ElfMachineTemplate + if err := r.Client.Get(r, req.NamespacedName, &elfMachineTemplate); err != nil { + if apierrors.IsNotFound(err) { + r.Logger.Info("ElfMachineTemplate not found, won't reconcile", "key", req.NamespacedName) + + return reconcile.Result{}, nil + } + + return reconcile.Result{}, err + } + + // Fetch the CAPI Cluster. + cluster, err := capiutil.GetOwnerCluster(r, r.Client, elfMachineTemplate.ObjectMeta) + if err != nil { + return reconcile.Result{}, err + } + if cluster == nil { + r.Logger.Info("Waiting for Cluster Controller to set OwnerRef on ElfMachineTemplate", + "namespace", elfMachineTemplate.Namespace, "elfCluster", elfMachineTemplate.Name) + + return reconcile.Result{}, nil + } + + if annotations.IsPaused(cluster, &elfMachineTemplate) { + r.Logger.V(4).Info("ElfMachineTemplate linked to a cluster that is paused", + "namespace", elfMachineTemplate.Namespace, "elfMachineTemplate", elfMachineTemplate.Name) + + return reconcile.Result{}, nil + } + + // Fetch the ElfCluster + var elfCluster infrav1.ElfCluster + if err := r.Client.Get(r, client.ObjectKey{ + Namespace: cluster.Namespace, + Name: cluster.Spec.InfrastructureRef.Name, + }, &elfCluster); err != nil { + if apierrors.IsNotFound(err) { + r.Logger.Info("ElfMachine Waiting for ElfCluster", + "namespace", elfMachineTemplate.Namespace, "elfMachineTemplate", elfMachineTemplate.Name) + return reconcile.Result{}, nil + } + + return reconcile.Result{}, err + } + + logger := r.Logger.WithValues("namespace", elfMachineTemplate.Namespace, + "elfCluster", elfCluster.Name, "elfMachineTemplate", elfMachineTemplate.Name) + + // Create the machine context for this request. + machineTemplateContext := &context.MachineTemplateContext{ + ControllerContext: r.ControllerContext, + Cluster: cluster, + ElfCluster: &elfCluster, + ElfMachineTemplate: &elfMachineTemplate, + Logger: logger, + } + + if elfMachineTemplate.ObjectMeta.DeletionTimestamp.IsZero() || !elfCluster.HasForceDeleteCluster() { + vmService, err := r.NewVMService(r.Context, elfCluster.GetTower(), logger) + if err != nil { + return reconcile.Result{}, err + } + + machineTemplateContext.VMService = vmService + } + + // Handle deleted machines + if !elfMachineTemplate.ObjectMeta.DeletionTimestamp.IsZero() { + return ctrl.Result{}, nil + } + + // Handle non-deleted machines + return r.reconcileNormal(machineTemplateContext) +} + +func (r *ElfMachineTemplateReconciler) reconcileNormal(ctx *context.MachineTemplateContext) (reconcile.Result, error) { + return r.reconcileMachineResources(ctx) +} + +// reconcileMachineResources ensures that the resources(disk capacity) of the +// virtual machines are the same as expected by ElfMachine. +// TODO: CPU and memory will be supported in the future. +func (r *ElfMachineTemplateReconciler) reconcileMachineResources(ctx *context.MachineTemplateContext) (reconcile.Result, error) { + if ok, err := r.reconcileCPResources(ctx); err != nil { + return reconcile.Result{}, err + } else if !ok { + return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil + } + + if ok, err := r.reconcileWorkerResources(ctx); err != nil { + return reconcile.Result{}, err + } else if !ok { + return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil + } + + return reconcile.Result{}, nil +} + +// reconcileCPResources ensures that the resources(disk capacity) of the +// control plane virtual machines are the same as expected by ElfMachine. +func (r *ElfMachineTemplateReconciler) reconcileCPResources(ctx *context.MachineTemplateContext) (bool, error) { + var kcp controlplanev1.KubeadmControlPlane + if err := ctx.Client.Get(ctx, apitypes.NamespacedName{ + Namespace: ctx.Cluster.Spec.ControlPlaneRef.Namespace, + Name: ctx.Cluster.Spec.ControlPlaneRef.Name, + }, &kcp); err != nil { + return false, err + } + + if kcp.Spec.MachineTemplate.InfrastructureRef.Namespace != ctx.ElfMachineTemplate.Namespace || + kcp.Spec.MachineTemplate.InfrastructureRef.Name != ctx.ElfMachineTemplate.Name { + return true, nil + } + + elfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) + if err != nil { + return false, err + } + + updatingResourcesElfMachines, needUpdatedResourcesElfMachines, err := selectResourcesNotUpToDateElfMachines(ctx, ctx.ElfMachineTemplate, elfMachines) + if err != nil { + return false, err + } else if len(updatingResourcesElfMachines) == 0 && len(needUpdatedResourcesElfMachines) == 0 { + return true, nil + } + + // Only one CP ElfMachine is allowed to update resources at the same time. + if len(updatingResourcesElfMachines) > 0 { + ctx.Logger.V(1).Info("Waiting for control plane ElfMachines to be updated resources", "updatingCount", len(updatingResourcesElfMachines), "needUpdatedCount", len(needUpdatedResourcesElfMachines)) + + return false, nil + } + + if ok, err := r.preflightChecksForCP(ctx, &kcp); err != nil || !ok { + return ok, err + } + + toBeUpdatedElfMachine := needUpdatedResourcesElfMachines[0] + if err := markElfMachineToBeUpdatedResources(ctx, ctx.ElfMachineTemplate, toBeUpdatedElfMachine); err != nil { + return false, err + } + + return false, err +} + +// preflightChecksForCP checks if the control plane is stable before proceeding with a updating resources operation, +// where stable means that: +// - KCP not in rolling update. +// - There are no machine deletion in progress. +// - All the health conditions on KCP are true. +// - All the health conditions on the control plane machines are true. +// If the control plane is not passing preflight checks, it requeue. +func (r *ElfMachineTemplateReconciler) preflightChecksForCP(ctx *context.MachineTemplateContext, kcp *controlplanev1.KubeadmControlPlane) (bool, error) { + // During the rolling update process, it is impossible to determine which + // machines are new and which are old machines. Complete the rolling update + // first and then update the resources to avoid updating resources for old + // machines that are about to be deleted. + if kcputil.IsKCPInRollingUpdate(kcp) { + ctx.Logger.Info("KCP rolling update in progress, skip updating resources") + + return false, nil + } + + cpMachines, err := machineutil.GetControlPlaneMachinesForCluster(ctx, ctx.Client, ctx.Cluster) + if err != nil { + return false, err + } + + machines := collections.FromMachines(cpMachines...) + deletingMachines := machines.Filter(collections.HasDeletionTimestamp) + if len(deletingMachines) > 0 { + ctx.Logger.Info("Waiting for machines to be deleted", "machines", deletingMachines.Names()) + + return false, nil + } + + allMachineHealthConditions := []clusterv1.ConditionType{ + controlplanev1.MachineAPIServerPodHealthyCondition, + controlplanev1.MachineControllerManagerPodHealthyCondition, + controlplanev1.MachineSchedulerPodHealthyCondition, + controlplanev1.MachineEtcdPodHealthyCondition, + controlplanev1.MachineEtcdMemberHealthyCondition, + } + machineErrors := []error{} + for _, machine := range machines { + if machine.Status.NodeRef == nil { + // The conditions will only ever be set on a Machine if we're able to correlate a Machine to a Node. + // Correlating Machines to Nodes requires the nodeRef to be set. + // Instead of confusing users with errors about that the conditions are not set, let's point them + // towards the unset nodeRef (which is the root cause of the conditions not being there). + machineErrors = append(machineErrors, errors.Errorf("Machine %s does not have a corresponding Node yet (Machine.status.nodeRef not set)", machine.Name)) + } else { + for _, condition := range allMachineHealthConditions { + if err := preflightCheckCondition("Machine", machine, condition); err != nil { + machineErrors = append(machineErrors, err) + } + } + } + } + + if len(machineErrors) > 0 { + aggregatedError := kerrors.NewAggregate(machineErrors) + ctx.Logger.Info("Waiting for control plane to pass preflight checks", "failures", aggregatedError.Error()) + + return false, nil + } + + return true, nil +} + +func preflightCheckCondition(kind string, obj conditions.Getter, condition clusterv1.ConditionType) error { + c := conditions.Get(obj, condition) + if c == nil { + return errors.Errorf("%s %s does not have %s condition", kind, obj.GetName(), condition) + } + if c.Status == corev1.ConditionFalse { + return errors.Errorf("%s %s reports %s condition is false (%s, %s)", kind, obj.GetName(), condition, c.Severity, c.Message) + } + if c.Status == corev1.ConditionUnknown { + return errors.Errorf("%s %s reports %s condition is unknown (%s)", kind, obj.GetName(), condition, c.Message) + } + return nil +} + +// reconcileWorkerResources ensures that the resources(disk capacity) of the +// worker virtual machines are the same as expected by ElfMachine. +func (r *ElfMachineTemplateReconciler) reconcileWorkerResources(ctx *context.MachineTemplateContext) (bool, error) { + mds, err := machineutil.GetMDsForCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) + if err != nil { + return false, err + } + + allElfMachinesUpToDate := true + for i := 0; i < len(mds); i++ { + if ctx.ElfMachineTemplate.Name != mds[i].Spec.Template.Spec.InfrastructureRef.Name { + continue + } + + if ok, err := r.reconcileWorkerResourcesForMD(ctx, mds[i]); err != nil { + return false, err + } else if !ok { + allElfMachinesUpToDate = false + } + } + + return allElfMachinesUpToDate, nil +} + +// reconcileWorkerResourcesForMD ensures that the resources(disk capacity) of the +// worker virtual machines managed by the md are the same as expected by ElfMachine. +func (r *ElfMachineTemplateReconciler) reconcileWorkerResourcesForMD(ctx *context.MachineTemplateContext, md *clusterv1.MachineDeployment) (bool, error) { + elfMachines, err := machineutil.GetElfMachinesForMD(ctx, ctx.Client, ctx.Cluster, md) + if err != nil { + return false, err + } + + updatingResourcesElfMachines, needUpdatedResourcesElfMachines, err := selectResourcesNotUpToDateElfMachines(ctx, ctx.ElfMachineTemplate, elfMachines) + if err != nil { + return false, err + } else if len(updatingResourcesElfMachines) == 0 && len(needUpdatedResourcesElfMachines) == 0 { + return true, nil + } + + if ok := r.preflightChecksForWorker(ctx, md, updatingResourcesElfMachines); !ok { + return ok, nil + } + + maxSurge := getMaxSurge(md) + toBeUpdatedElfMachines := machineutil.SelectFirstNElfMachines(needUpdatedResourcesElfMachines, maxSurge-len(updatingResourcesElfMachines)) + for i := 0; i < len(toBeUpdatedElfMachines); i++ { + if err := markElfMachineToBeUpdatedResources(ctx, ctx.ElfMachineTemplate, toBeUpdatedElfMachines[i]); err != nil { + return false, err + } + } + + notUpToDateElfMachineCount := len(updatingResourcesElfMachines) + len(needUpdatedResourcesElfMachines) + if notUpToDateElfMachineCount > 0 { + ctx.Logger.V(2).Info("Waiting for worker ElfMachines to be updated resources", "md", md.Name, "count", notUpToDateElfMachineCount) + + return false, nil + } + + return true, nil +} + +func getMaxSurge(md *clusterv1.MachineDeployment) int { + maxSurge := mdutil.MaxSurge(*md) + if maxSurge <= 0 { + return 1 + } + + return int(maxSurge) +} + +// preflightChecksForWorker checks if the worker is stable before proceeding with a updating resources operation, +// where stable means that: +// - MD not in rolling update. +// - The number of machines updating resources is not greater than maxSurge. +// - The number of unavailable machines is no greater than maxUnavailable. +// If the worker is not passing preflight checks, it requeue. +func (r *ElfMachineTemplateReconciler) preflightChecksForWorker(ctx *context.MachineTemplateContext, md *clusterv1.MachineDeployment, updatingResourcesElfMachines []*infrav1.ElfMachine) bool { + if mdutil.IsMDInRollingUpdate(md) { + ctx.Logger.Info("MD rolling update in progress, skip updating resources", "md", md.Name) + + return false + } + + // Use maxSurge of rolling update to control the maximum number of concurrent + // update resources to avoid updating too many machines at the same time. + // If an exception occurs during the resource update process, all machines will + // not be affected. + if maxSurge := getMaxSurge(md); len(updatingResourcesElfMachines) >= getMaxSurge(md) { + ctx.Logger.V(1).Info("Waiting for worker ElfMachines to be updated resources", "md", md.Name, "maxSurge", maxSurge, "updatingCount", len(updatingResourcesElfMachines)) + + return false + } + + maxUnavailable := mdutil.MaxUnavailable(*md) + if md.Status.UnavailableReplicas > maxUnavailable { + ctx.Logger.Info(fmt.Sprintf("MD unavailable replicas %d is greater than expected %d, skip updating resources", md.Status.UnavailableReplicas, maxUnavailable), "md", md.Name) + + return false + } + + return true +} + +// selectResourcesNotUpToDateElfMachines returns elfMachines whose resources are +// not as expected. +func selectResourcesNotUpToDateElfMachines(ctx *context.MachineTemplateContext, elfMachineTemplate *infrav1.ElfMachineTemplate, elfMachines []*infrav1.ElfMachine) ([]*infrav1.ElfMachine, []*infrav1.ElfMachine, error) { + var updatingResourcesElfMachines []*infrav1.ElfMachine + var needUpdatedResourcesElfMachines []*infrav1.ElfMachine + for i := 0; i < len(elfMachines); i++ { + elfMachine := elfMachines[i] + if machineutil.IsUpdatingElfMachineResources(elfMachine) { + updatingResourcesElfMachines = append(updatingResourcesElfMachines, elfMachine) + continue + } + + machine, err := capiutil.GetOwnerMachine(ctx, ctx.Client, elfMachine.ObjectMeta) + if err != nil { + return nil, nil, err + } + + // No need to update the resources of deleted and failed machines. + if machine == nil || + !machine.DeletionTimestamp.IsZero() || + clusterv1.MachinePhase(machine.Status.Phase) == clusterv1.MachinePhaseFailed || + conditions.IsFalse(machine, clusterv1.MachineOwnerRemediatedCondition) { + continue + } + + if machineutil.NeedUpdateElfMachineResources(elfMachineTemplate, elfMachine) { + needUpdatedResourcesElfMachines = append(needUpdatedResourcesElfMachines, elfMachine) + } + } + + return updatingResourcesElfMachines, needUpdatedResourcesElfMachines, nil +} + +// markElfMachineToBeUpdatedResources synchronizes the expected resource values +// from the ElfMachineTemplate and marks the ElfMachine to be updated resources. +func markElfMachineToBeUpdatedResources(ctx *context.MachineTemplateContext, elfMachineTemplate *infrav1.ElfMachineTemplate, elfMachine *infrav1.ElfMachine) error { + patchHelper, err := patch.NewHelper(elfMachine, ctx.Client) + if err != nil { + return err + } + + // Ensure resources are up to date. + diskGiB := elfMachineTemplate.Spec.Template.Spec.DiskGiB + elfMachine.Spec.DiskGiB = elfMachineTemplate.Spec.Template.Spec.DiskGiB + conditions.MarkFalse(elfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.WaitingForResourcesHotUpdateReason, clusterv1.ConditionSeverityInfo, "") + + ctx.Logger.Info(fmt.Sprintf("Resources of ElfMachine is not up to date, marking for updating resources(disk: %d -> %d)", diskGiB, elfMachine.Spec.DiskGiB), "elfMachine", elfMachine.Name) + + if err := patchHelper.Patch(ctx, elfMachine); err != nil { + return errors.Wrapf(err, "failed to patch ElfMachine %s to mark for updating resources", elfMachine.Name) + } + + return nil +} diff --git a/main.go b/main.go index 7c2972e5..bf0ce015 100644 --- a/main.go +++ b/main.go @@ -62,8 +62,9 @@ var ( webhookOpts webhook.Options watchNamespace string - elfClusterConcurrency int - elfMachineConcurrency int + elfClusterConcurrency int + elfMachineConcurrency int + elfMachineTemplateConcurrency int tlsOptions = flags.TLSOptions{} @@ -90,6 +91,9 @@ func InitFlags(fs *pflag.FlagSet) { fs.IntVar(&elfMachineConcurrency, "max-elfmachine-concurrent-reconciles", 10, "Number of ELF machines to process simultaneously") + fs.IntVar(&elfMachineTemplateConcurrency, "max-elfmachinetemplate-concurrent-reconciles", 10, + "Number of ELF machine templates to process simultaneously") + fs.StringVar(&managerOpts.PodName, "pod-name", defaultPodName, "The name of the pod running the controller manager.") @@ -192,6 +196,10 @@ func main() { // Create a function that adds all of the controllers and webhooks to the manager. addToManager := func(ctx *context.ControllerManagerContext, mgr ctrlmgr.Manager) error { if os.Getenv("ENABLE_WEBHOOKS") != "false" { + if err := (&webhooks.ElfMachineTemplateValidator{}).SetupWebhookWithManager(mgr); err != nil { + return err + } + if err := (&webhooks.ElfMachineMutation{ Client: mgr.GetClient(), Logger: mgr.GetLogger().WithName("ElfMachineMutation"), @@ -208,6 +216,10 @@ func main() { return err } + if err := controllers.AddMachineTemplateControllerToManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: elfMachineTemplateConcurrency}); err != nil { + return err + } + return nil } diff --git a/pkg/cloudinit/cloudinit.go b/pkg/cloudinit/cloudinit.go new file mode 100644 index 00000000..dce49f9d --- /dev/null +++ b/pkg/cloudinit/cloudinit.go @@ -0,0 +1,32 @@ +/* +Copyright 2024. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloudinit + +import ( + _ "embed" + "fmt" + "strings" +) + +//go:embed expand_root_partition +var expandRootPartition string + +func JoinExpandRootPartitionCommandsToCloudinit(cloudinit string) string { + runcmdIndex := strings.LastIndex(cloudinit, "runcmd:") + if runcmdIndex == -1 { + return fmt.Sprintf("%s%s", cloudinit, expandRootPartition) + } + + return strings.Replace(cloudinit, "runcmd:", expandRootPartition, 1) +} diff --git a/pkg/cloudinit/expand_root_partition b/pkg/cloudinit/expand_root_partition new file mode 100644 index 00000000..e2151f7d --- /dev/null +++ b/pkg/cloudinit/expand_root_partition @@ -0,0 +1,5 @@ +runcmd: + - "growpart /dev/vda 2" + - "pvresize /dev/vda2" + - "lvextend -l+100%FREE -n /dev/mapper/rl-root" + - "resize2fs /dev/mapper/rl-root" \ No newline at end of file diff --git a/pkg/context/machine_template_context.go b/pkg/context/machine_template_context.go new file mode 100644 index 00000000..a238067f --- /dev/null +++ b/pkg/context/machine_template_context.go @@ -0,0 +1,42 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package context + +import ( + "fmt" + + "github.com/go-logr/logr" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + + infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" + "github.com/smartxworks/cluster-api-provider-elf/pkg/service" +) + +// MachineTemplateContext is a Go context used with an ElfMachineTemplate. +type MachineTemplateContext struct { + *ControllerContext + Cluster *clusterv1.Cluster + ElfCluster *infrav1.ElfCluster + ElfMachineTemplate *infrav1.ElfMachineTemplate + Logger logr.Logger + VMService service.VMService +} + +// String returns ElfMachineTemplateGroupVersionKindElfMachineTemplateNamespace/ElfMachineTemplateName. +func (c *MachineTemplateContext) String() string { + return fmt.Sprintf("%s %s/%s", c.ElfMachineTemplate.GroupVersionKind(), c.ElfMachineTemplate.Namespace, c.ElfMachineTemplate.Name) +} diff --git a/pkg/hostagent/service.go b/pkg/hostagent/service.go new file mode 100644 index 00000000..ed329333 --- /dev/null +++ b/pkg/hostagent/service.go @@ -0,0 +1,69 @@ +/* +Copyright 2024. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hostagent + +import ( + goctx "context" + "fmt" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + apitypes "k8s.io/apimachinery/pkg/types" + capiutil "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/controller-runtime/pkg/client" + + agentv1 "github.com/smartxworks/cluster-api-provider-elf/api/v1alpha1" + infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" + "github.com/smartxworks/cluster-api-provider-elf/pkg/hostagent/tasks" +) + +const defaultTimeout = 1 * time.Minute + +func GetHostJob(ctx goctx.Context, c client.Client, namespace, name string) (*agentv1.HostOperationJob, error) { + var restartKubeletJob agentv1.HostOperationJob + if err := c.Get(ctx, apitypes.NamespacedName{ + Name: name, + Namespace: namespace, + }, &restartKubeletJob); err != nil { + return nil, err + } + + return &restartKubeletJob, nil +} + +func AddNewDiskCapacityToRoot(ctx goctx.Context, c client.Client, elfMachine *infrav1.ElfMachine) (*agentv1.HostOperationJob, error) { + agentJob := &agentv1.HostOperationJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("cape-expand-root-rartition-%s-%s", elfMachine.Name, capiutil.RandomString(6)), + Namespace: elfMachine.Namespace, + }, + Spec: agentv1.HostOperationJobSpec{ + NodeName: elfMachine.Name, + Operation: agentv1.Operation{ + Ansible: &agentv1.Ansible{ + LocalPlaybook: &agentv1.YAMLText{ + Content: tasks.ExpandRootPartitionTask, + }, + }, + Timeout: metav1.Duration{Duration: defaultTimeout}, + }, + }, + } + + if err := c.Create(ctx, agentJob); err != nil { + return nil, err + } + + return agentJob, nil +} diff --git a/pkg/hostagent/tasks/expand_root_partition.yaml b/pkg/hostagent/tasks/expand_root_partition.yaml new file mode 100644 index 00000000..8bc001be --- /dev/null +++ b/pkg/hostagent/tasks/expand_root_partition.yaml @@ -0,0 +1,14 @@ +--- +- name: Expand root partition + hosts: all + become: true + gather_facts: false + tasks: + - name: Grow vda2 + shell: growpart /dev/vda 2 + - name: Resize vda2 + shell: pvresize /dev/vda2 + - name: Extend root + shell: lvextend -l+100%FREE -n /dev/mapper/rl-root + - name: Resize root + shell: resize2fs /dev/mapper/rl-root diff --git a/pkg/hostagent/tasks/tasks.go b/pkg/hostagent/tasks/tasks.go new file mode 100644 index 00000000..8a38d3df --- /dev/null +++ b/pkg/hostagent/tasks/tasks.go @@ -0,0 +1,23 @@ +/* +Copyright 2024. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tasks + +import ( + _ "embed" +) + +// ExpandRootPartitionTask is the task to add new disk capacity to root. +// +//go:embed expand_root_partition.yaml +var ExpandRootPartitionTask string diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 260810ba..ef75c50f 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -26,6 +26,7 @@ import ( controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" ctrl "sigs.k8s.io/controller-runtime" + agentv1 "github.com/smartxworks/cluster-api-provider-elf/api/v1alpha1" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" ) @@ -48,6 +49,7 @@ func New(opts Options) (Manager, error) { _ = infrav1.AddToScheme(opts.Scheme) _ = bootstrapv1.AddToScheme(opts.Scheme) _ = controlplanev1.AddToScheme(opts.Scheme) + _ = agentv1.AddToScheme(opts.Scheme) // +kubebuilder:scaffold:scheme // Build the controller manager. diff --git a/pkg/service/errors.go b/pkg/service/errors.go index d4435878..bbc0368f 100644 --- a/pkg/service/errors.go +++ b/pkg/service/errors.go @@ -27,6 +27,7 @@ const ( HostNotFound = "HOST_NOT_FOUND" VMTemplateNotFound = "VM_TEMPLATE_NOT_FOUND" VMNotFound = "VM_NOT_FOUND" + VMVolumeNotFound = "VM_VOLUME_NOT_FOUND" VMGPUInfoNotFound = "VM_GPU_INFO_NOT_FOUND" VMDuplicate = "VM_DUPLICATE" TaskNotFound = "TASK_NOT_FOUND" @@ -61,6 +62,10 @@ func IsShutDownTimeout(message string) bool { return strings.Contains(message, "JOB_VM_SHUTDOWN_TIMEOUT") } +func IsVMVolumeNotFound(err error) bool { + return strings.Contains(err.Error(), VMVolumeNotFound) +} + func IsGPUAssignFailed(message string) bool { return strings.Contains(message, GPUAssignFailed) } diff --git a/pkg/service/mock_services/vm_mock.go b/pkg/service/mock_services/vm_mock.go index fd096d8d..55f56a60 100644 --- a/pkg/service/mock_services/vm_mock.go +++ b/pkg/service/mock_services/vm_mock.go @@ -323,6 +323,21 @@ func (mr *MockVMServiceMockRecorder) GetTask(id interface{}) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTask", reflect.TypeOf((*MockVMService)(nil).GetTask), id) } +// GetVMDisks mocks base method. +func (m *MockVMService) GetVMDisks(vmDiskIDs []string) ([]*models.VMDisk, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetVMDisks", vmDiskIDs) + ret0, _ := ret[0].([]*models.VMDisk) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetVMDisks indicates an expected call of GetVMDisks. +func (mr *MockVMServiceMockRecorder) GetVMDisks(vmDiskIDs interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVMDisks", reflect.TypeOf((*MockVMService)(nil).GetVMDisks), vmDiskIDs) +} + // GetVMGPUAllocationInfo mocks base method. func (m *MockVMService) GetVMGPUAllocationInfo(id string) (*models.VMGpuInfo, error) { m.ctrl.T.Helper() @@ -383,6 +398,21 @@ func (mr *MockVMServiceMockRecorder) GetVMTemplate(template interface{}) *gomock return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVMTemplate", reflect.TypeOf((*MockVMService)(nil).GetVMTemplate), template) } +// GetVMVolume mocks base method. +func (m *MockVMService) GetVMVolume(vmVolumeID string) (*models.VMVolume, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetVMVolume", vmVolumeID) + ret0, _ := ret[0].(*models.VMVolume) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetVMVolume indicates an expected call of GetVMVolume. +func (mr *MockVMServiceMockRecorder) GetVMVolume(vmVolumeID interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVMVolume", reflect.TypeOf((*MockVMService)(nil).GetVMVolume), vmVolumeID) +} + // GetVlan mocks base method. func (m *MockVMService) GetVlan(id string) (*models.Vlan, error) { m.ctrl.T.Helper() @@ -458,6 +488,21 @@ func (mr *MockVMServiceMockRecorder) RemoveGPUDevices(id, gpus interface{}) *gom return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveGPUDevices", reflect.TypeOf((*MockVMService)(nil).RemoveGPUDevices), id, gpus) } +// ResizeVMVolume mocks base method. +func (m *MockVMService) ResizeVMVolume(vmVolumeID string, size int64) (*models.WithTaskVMVolume, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ResizeVMVolume", vmVolumeID, size) + ret0, _ := ret[0].(*models.WithTaskVMVolume) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ResizeVMVolume indicates an expected call of ResizeVMVolume. +func (mr *MockVMServiceMockRecorder) ResizeVMVolume(vmVolumeID, size interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResizeVMVolume", reflect.TypeOf((*MockVMService)(nil).ResizeVMVolume), vmVolumeID, size) +} + // ShutDown mocks base method. func (m *MockVMService) ShutDown(uuid string) (*models.Task, error) { m.ctrl.T.Helper() diff --git a/pkg/service/util.go b/pkg/service/util.go index 7d92da62..79ef3cb6 100644 --- a/pkg/service/util.go +++ b/pkg/service/util.go @@ -110,6 +110,10 @@ func TowerInt32(v int) *int32 { return &val } +func TowerInt64(v int64) *int64 { + return &v +} + func TowerFloat64(v int) *float64 { val := float64(v) @@ -206,6 +210,12 @@ func IsPlacementGroupTask(task *models.Task) bool { return strings.Contains(GetTowerString(task.Description), "VM placement group") // Update VM placement group } +// IsTowerResourcePerformingAnOperation returns whether the Tower resource is being operated on. +// Before operating on Tower resources, call this function first to avoid Tower resource lock conflicts. +func IsTowerResourcePerformingAnOperation(entityAsyncStatus *models.EntityAsyncStatus) bool { + return entityAsyncStatus != nil +} + // HasGPUsCanNotBeUsedForVM returns whether the specified GPUs contains GPU // that cannot be used by the specified VM. func HasGPUsCanNotBeUsedForVM(gpuVMInfos GPUVMInfos, elfMachine *infrav1.ElfMachine) bool { diff --git a/pkg/service/vm.go b/pkg/service/vm.go index 0f382672..4a5c12d4 100644 --- a/pkg/service/vm.go +++ b/pkg/service/vm.go @@ -31,8 +31,10 @@ import ( clienttask "github.com/smartxworks/cloudtower-go-sdk/v2/client/task" clientvlan "github.com/smartxworks/cloudtower-go-sdk/v2/client/vlan" clientvm "github.com/smartxworks/cloudtower-go-sdk/v2/client/vm" + clientvmdisk "github.com/smartxworks/cloudtower-go-sdk/v2/client/vm_disk" clientvmnic "github.com/smartxworks/cloudtower-go-sdk/v2/client/vm_nic" clientvmplacementgroup "github.com/smartxworks/cloudtower-go-sdk/v2/client/vm_placement_group" + clientvmvolume "github.com/smartxworks/cloudtower-go-sdk/v2/client/vm_volume" "github.com/smartxworks/cloudtower-go-sdk/v2/models" "k8s.io/apimachinery/pkg/util/wait" @@ -58,6 +60,9 @@ type VMService interface { FindByIDs(ids []string) ([]*models.VM, error) FindVMsByName(name string) ([]*models.VM, error) GetVMNics(vmID string) ([]*models.VMNic, error) + GetVMDisks(vmDiskIDs []string) ([]*models.VMDisk, error) + GetVMVolume(vmVolumeID string) (*models.VMVolume, error) + ResizeVMVolume(vmVolumeID string, size int64) (*models.WithTaskVMVolume, error) GetVMTemplate(template string) (*models.ContentLibraryVMTemplate, error) GetTask(id string) (*models.Task, error) WaitTask(ctx goctx.Context, id string, timeout, interval time.Duration) (*models.Task, error) @@ -117,6 +122,56 @@ func (svr *TowerVMService) UpdateVM(vm *models.VM, elfMachine *infrav1.ElfMachin return updateVMResp.Payload[0], nil } +func (svr *TowerVMService) GetVMDisks(vmDiskIDs []string) ([]*models.VMDisk, error) { + getVMDisksParams := clientvmdisk.NewGetVMDisksParams() + getVMDisksParams.RequestBody = &models.GetVMDisksRequestBody{ + Where: &models.VMDiskWhereInput{IDIn: vmDiskIDs}, + OrderBy: models.NewVMDiskOrderByInput(models.VMDiskOrderByInputBootASC), + } + + getVMDisksResp, err := svr.Session.VMDisk.GetVMDisks(getVMDisksParams) + if err != nil { + return nil, err + } + + return getVMDisksResp.Payload, nil +} + +func (svr *TowerVMService) GetVMVolume(volumeID string) (*models.VMVolume, error) { + getVMVolumesParams := clientvmvolume.NewGetVMVolumesParams() + getVMVolumesParams.RequestBody = &models.GetVMVolumesRequestBody{ + Where: &models.VMVolumeWhereInput{ID: TowerString(volumeID)}, + } + + getVMVolumesResp, err := svr.Session.VMVolume.GetVMVolumes(getVMVolumesParams) + if err != nil { + return nil, err + } + + if len(getVMVolumesResp.Payload) == 0 { + return nil, errors.New(VMVolumeNotFound) + } + + return getVMVolumesResp.Payload[0], nil +} + +// ResizeVMVolume resizes the virtual machine volume to the specified size. +// Can only increase the volume size, not reduce it. +func (svr *TowerVMService) ResizeVMVolume(vmVolumeID string, size int64) (*models.WithTaskVMVolume, error) { + updateVMVolumeParams := clientvmvolume.NewUpdateVMVolumeParams() + updateVMVolumeParams.RequestBody = &models.UpdateVMVolumeParams{ + Data: &models.UpdateVMVolumeParamsData{Size: TowerInt64(size)}, + Where: &models.VMVolumeWhereInput{ID: TowerString(vmVolumeID)}, + } + + updateVMVolumeResp, err := svr.Session.VMVolume.UpdateVMVolume(updateVMVolumeParams) + if err != nil { + return nil, err + } + + return updateVMVolumeResp.Payload[0], nil +} + // Clone kicks off a clone operation on Elf to create a new virtual machine using VM template. func (svr *TowerVMService) Clone( elfCluster *infrav1.ElfCluster, elfMachine *infrav1.ElfMachine, bootstrapData, @@ -149,21 +204,6 @@ func (svr *TowerVMService) Clone( ha = TowerBool(false) } - var mountDisks []*models.MountNewCreateDisksParams - if elfMachine.Spec.DiskGiB > 0 { - storagePolicy := models.VMVolumeElfStoragePolicyTypeREPLICA2THINPROVISION - bus := models.BusVIRTIO - mountDisks = append(mountDisks, &models.MountNewCreateDisksParams{ - Boot: TowerInt32(0), - Bus: &bus, - VMVolume: &models.MountNewCreateDisksParamsVMVolume{ - ElfStoragePolicy: &storagePolicy, - Name: TowerString(config.VMDiskName), - Size: TowerDisk(elfMachine.Spec.DiskGiB), - }, - }) - } - nics := make([]*models.VMNicParams, 0, len(elfMachine.Spec.Network.Devices)) networks := make([]*models.CloudInitNetWork, 0, len(elfMachine.Spec.Network.Devices)) for i := 0; i < len(elfMachine.Spec.Network.Devices); i++ { @@ -268,11 +308,7 @@ func (svr *TowerVMService) Clone( TemplateID: template.ID, GuestOsType: models.NewVMGuestsOperationSystem(models.VMGuestsOperationSystem(elfMachine.Spec.OSType)), VMNics: nics, - DiskOperate: &models.VMDiskOperate{ - NewDisks: &models.VMDiskParams{ - MountNewCreateDisks: mountDisks, - }, - }, + DiskOperate: &models.VMDiskOperate{}, CloudInit: &models.TemplateCloudInit{ Hostname: TowerString(elfMachine.Name), UserData: TowerString(bootstrapData), diff --git a/pkg/util/annotations/helpers.go b/pkg/util/annotations/helpers.go index 2534140f..04988953 100644 --- a/pkg/util/annotations/helpers.go +++ b/pkg/util/annotations/helpers.go @@ -18,6 +18,7 @@ package annotations import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/annotations" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" @@ -52,7 +53,35 @@ func GetCreatedBy(o metav1.Object) string { return annotations[infrav1.CreatedByAnnotation] } +func HostAgentJobName(o metav1.Object) string { + annotations := o.GetAnnotations() + if annotations == nil { + return "" + } + + return annotations[infrav1.HostAgentJobNameAnnotation] +} + +func GetTemplateClonedFromName(o metav1.Object) string { + annotations := o.GetAnnotations() + if annotations == nil { + return "" + } + + return annotations[clusterv1.TemplateClonedFromNameAnnotation] +} + // AddAnnotations sets the desired annotations on the object and returns true if the annotations have changed. func AddAnnotations(o metav1.Object, desired map[string]string) bool { return annotations.AddAnnotations(o, desired) } + +// RemoveAnnotation deletes the desired annotation on the object. +func RemoveAnnotation(o metav1.Object, annotation string) { + annotations := o.GetAnnotations() + if annotations == nil { + return + } + delete(annotations, annotation) + o.SetAnnotations(annotations) +} diff --git a/pkg/util/machine/machine.go b/pkg/util/machine/machine.go index e66ac4e0..e567220b 100644 --- a/pkg/util/machine/machine.go +++ b/pkg/util/machine/machine.go @@ -25,6 +25,7 @@ import ( "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/controller-runtime/pkg/client" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" @@ -88,6 +89,47 @@ func GetControlPlaneElfMachinesInCluster(ctx goctx.Context, ctrlClient client.Cl return machines, nil } +func GetElfMachinesForMD( + ctx goctx.Context, + ctrlClient client.Client, + cluster *clusterv1.Cluster, + md *clusterv1.MachineDeployment) ([]*infrav1.ElfMachine, error) { + elfMachineList := &infrav1.ElfMachineList{} + labels := map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + clusterv1.MachineDeploymentNameLabel: md.Name, + } + if err := ctrlClient.List(ctx, elfMachineList, client.InNamespace(md.Namespace), client.MatchingLabels(labels)); err != nil { + return nil, err + } + + elfMachines := make([]*infrav1.ElfMachine, len(elfMachineList.Items)) + for i := range elfMachineList.Items { + elfMachines[i] = &elfMachineList.Items[i] + } + + return elfMachines, nil +} + +func GetControlPlaneMachinesForCluster(ctx goctx.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) ([]*clusterv1.Machine, error) { + ms := &clusterv1.MachineList{} + labels := map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + clusterv1.MachineControlPlaneLabel: "", + } + + if err := ctrlClient.List(ctx, ms, client.InNamespace(cluster.Namespace), client.MatchingLabels(labels)); err != nil { + return nil, err + } + + machines := make([]*clusterv1.Machine, len(ms.Items)) + for i := range ms.Items { + machines[i] = &ms.Items[i] + } + + return machines, nil +} + // IsControlPlaneMachine returns true if the provided resource is // a member of the control plane. func IsControlPlaneMachine(machine metav1.Object) bool { @@ -114,6 +156,31 @@ func GetNodeGroupName(machine *clusterv1.Machine) string { return strings.ReplaceAll(nodeGroupName, fmt.Sprintf("%s-", clusterName), "") } +func IsUpdatingElfMachineResources(elfMachine *infrav1.ElfMachine) bool { + if conditions.Has(elfMachine, infrav1.ResourcesHotUpdatedCondition) && + conditions.IsFalse(elfMachine, infrav1.ResourcesHotUpdatedCondition) { + return true + } + + return false +} + +func NeedUpdateElfMachineResources(elfMachineTemplate *infrav1.ElfMachineTemplate, elfMachine *infrav1.ElfMachine) bool { + return elfMachineTemplate.Spec.Template.Spec.DiskGiB > elfMachine.Spec.DiskGiB +} + +// SelectFirstNElfMachines returns the specified first N elfMachines. +func SelectFirstNElfMachines(elfMachines []*infrav1.ElfMachine, count int) []*infrav1.ElfMachine { + if count <= 0 { + return nil + } + if len(elfMachines) == 0 || count >= len(elfMachines) { + return elfMachines + } + + return elfMachines[:count] +} + func ConvertProviderIDToUUID(providerID *string) string { if providerID == nil || *providerID == "" { return "" diff --git a/pkg/util/machine/md.go b/pkg/util/machine/md.go index 8cba6d68..0d00a66a 100644 --- a/pkg/util/machine/md.go +++ b/pkg/util/machine/md.go @@ -19,6 +19,7 @@ package machine import ( goctx "context" + "github.com/pkg/errors" apitypes "k8s.io/apimachinery/pkg/types" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -34,3 +35,42 @@ func GetMDByMachine(ctx goctx.Context, ctrlClient client.Client, machine *cluste return &md, nil } + +func GetMDsForCluster( + ctx goctx.Context, + ctrlClient client.Client, + namespace, clusterName string) ([]*clusterv1.MachineDeployment, error) { + var mdList clusterv1.MachineDeploymentList + labels := map[string]string{clusterv1.ClusterNameLabel: clusterName} + + if err := ctrlClient.List( + ctx, &mdList, + client.InNamespace(namespace), + client.MatchingLabels(labels)); err != nil { + return nil, err + } + + mds := make([]*clusterv1.MachineDeployment, len(mdList.Items)) + for i := range mdList.Items { + mds[i] = &mdList.Items[i] + } + + return mds, nil +} + +// GetMachineSetsForDeployment returns a list of MachineSets associated with a MachineDeployment. +func GetMachineSetsForDeployment(ctx goctx.Context, ctrlClient client.Client, mdNamespace, mdName string) ([]*clusterv1.MachineSet, error) { + // List MachineSets based on the MachineDeployment label. + msList := &clusterv1.MachineSetList{} + if err := ctrlClient.List(ctx, msList, + client.InNamespace(mdNamespace), client.MatchingLabels{clusterv1.MachineDeploymentNameLabel: mdName}); err != nil { + return nil, errors.Wrapf(err, "failed to list MachineSets for MachineDeployment/%s", mdName) + } + + // Copy the MachineSets to an array of MachineSet pointers, to avoid MachineSet copying later. + res := make([]*clusterv1.MachineSet, 0, len(msList.Items)) + for i := range msList.Items { + res = append(res, &msList.Items[i]) + } + return res, nil +} diff --git a/pkg/util/md/md.go b/pkg/util/md/md.go new file mode 100644 index 00000000..6d40bf2e --- /dev/null +++ b/pkg/util/md/md.go @@ -0,0 +1,95 @@ +package md + +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import ( + intstrutil "k8s.io/apimachinery/pkg/util/intstr" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" +) + +// IsMDInRollingUpdate returns whether MD is in rolling update. +// +// When *md.Spec.Replicas > md.Status.UpdatedReplicas, it must be in a MD rolling update process. +// When *md.Spec.Replicas == md.Status.UpdatedReplicas, it could be in one of the following cases: +// 1. It's not in a MD rolling update process. So md.Spec.Replicas == md.Status.Replicas. +// 2. It's at the end of a MD rolling update process, and the last MD replica (i.e the last MD ElfMachine) is created just now. +// There is still an old MD ElfMachine, so md.Spec.Replicas + 1 == md.Status.Replicas. +func IsMDInRollingUpdate(md *clusterv1.MachineDeployment) bool { + if (*md.Spec.Replicas > md.Status.UpdatedReplicas && *md.Spec.Replicas <= md.Status.Replicas) || + (*md.Spec.Replicas == md.Status.UpdatedReplicas && *md.Spec.Replicas < md.Status.Replicas) { + return true + } + + return false +} + +/* +Copy from CAPI: https://github.com/kubernetes-sigs/cluster-api/blob/release-1.5/internal/controllers/machinedeployment/mdutil/util.go +*/ + +// MaxUnavailable returns the maximum unavailable machines a rolling deployment can take. +func MaxUnavailable(deployment clusterv1.MachineDeployment) int32 { + if deployment.Spec.Strategy.Type != clusterv1.RollingUpdateMachineDeploymentStrategyType || *(deployment.Spec.Replicas) == 0 { + return int32(0) + } + // Error caught by validation + _, maxUnavailable, _ := ResolveFenceposts(deployment.Spec.Strategy.RollingUpdate.MaxSurge, deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, *(deployment.Spec.Replicas)) + if maxUnavailable > *deployment.Spec.Replicas { + return *deployment.Spec.Replicas + } + return maxUnavailable +} + +// MaxSurge returns the maximum surge machines a rolling deployment can take. +func MaxSurge(deployment clusterv1.MachineDeployment) int32 { + if deployment.Spec.Strategy.Type != clusterv1.RollingUpdateMachineDeploymentStrategyType { + return int32(0) + } + // Error caught by validation + maxSurge, _, _ := ResolveFenceposts(deployment.Spec.Strategy.RollingUpdate.MaxSurge, deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, *(deployment.Spec.Replicas)) + return maxSurge +} + +// ResolveFenceposts resolves both maxSurge and maxUnavailable. This needs to happen in one +// step. For example: +// +// 2 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1), then old(-1), then new(+1) +// 1 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1) +// 2 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1) +// 1 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1) +// 2 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1) +// 1 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1). +func ResolveFenceposts(maxSurge, maxUnavailable *intstrutil.IntOrString, desired int32) (int32, int32, error) { + surge, err := intstrutil.GetScaledValueFromIntOrPercent(maxSurge, int(desired), true) + if err != nil { + return 0, 0, err + } + unavailable, err := intstrutil.GetScaledValueFromIntOrPercent(maxUnavailable, int(desired), false) + if err != nil { + return 0, 0, err + } + + if surge == 0 && unavailable == 0 { + // Validation should never allow the user to explicitly use zero values for both maxSurge + // maxUnavailable. Due to rounding down maxUnavailable though, it may resolve to zero. + // If both fenceposts resolve to zero, then we should set maxUnavailable to 1 on the + // theory that surge might not work due to quota. + unavailable = 1 + } + + return int32(surge), int32(unavailable), nil +} diff --git a/pkg/util/md/md_test.go b/pkg/util/md/md_test.go new file mode 100644 index 00000000..073a2eac --- /dev/null +++ b/pkg/util/md/md_test.go @@ -0,0 +1,160 @@ +package md + +import ( + "testing" + + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/util/intstr" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" +) + +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +func TestMaxUnavailable(t *testing.T) { + deployment := func(replicas int32, maxUnavailable intstr.IntOrString) clusterv1.MachineDeployment { + return clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: func(i int32) *int32 { return &i }(replicas), + Strategy: &clusterv1.MachineDeploymentStrategy{ + RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ + MaxSurge: func(i int) *intstr.IntOrString { x := intstr.FromInt(i); return &x }(int(1)), + MaxUnavailable: &maxUnavailable, + }, + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + }, + }, + } + } + tests := []struct { + name string + deployment clusterv1.MachineDeployment + expected int32 + }{ + { + name: "maxUnavailable less than replicas", + deployment: deployment(10, intstr.FromInt(5)), + expected: int32(5), + }, + { + name: "maxUnavailable equal replicas", + deployment: deployment(10, intstr.FromInt(10)), + expected: int32(10), + }, + { + name: "maxUnavailable greater than replicas", + deployment: deployment(5, intstr.FromInt(10)), + expected: int32(5), + }, + { + name: "maxUnavailable with replicas is 0", + deployment: deployment(0, intstr.FromInt(10)), + expected: int32(0), + }, + { + name: "maxUnavailable less than replicas with percents", + deployment: deployment(10, intstr.FromString("50%")), + expected: int32(5), + }, + { + name: "maxUnavailable equal replicas with percents", + deployment: deployment(10, intstr.FromString("100%")), + expected: int32(10), + }, + { + name: "maxUnavailable greater than replicas with percents", + deployment: deployment(5, intstr.FromString("100%")), + expected: int32(5), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + g := NewWithT(t) + + g.Expect(MaxUnavailable(test.deployment)).To(Equal(test.expected)) + }) + } +} + +func TestResolveFenceposts(t *testing.T) { + tests := []struct { + maxSurge string + maxUnavailable string + desired int32 + expectSurge int32 + expectUnavailable int32 + expectError bool + }{ + { + maxSurge: "0%", + maxUnavailable: "0%", + desired: 0, + expectSurge: 0, + expectUnavailable: 1, + expectError: false, + }, + { + maxSurge: "39%", + maxUnavailable: "39%", + desired: 10, + expectSurge: 4, + expectUnavailable: 3, + expectError: false, + }, + { + maxSurge: "oops", + maxUnavailable: "39%", + desired: 10, + expectSurge: 0, + expectUnavailable: 0, + expectError: true, + }, + { + maxSurge: "55%", + maxUnavailable: "urg", + desired: 10, + expectSurge: 0, + expectUnavailable: 0, + expectError: true, + }, + { + maxSurge: "5", + maxUnavailable: "1", + desired: 7, + expectSurge: 0, + expectUnavailable: 0, + expectError: true, + }, + } + + for _, test := range tests { + t.Run("maxSurge="+test.maxSurge, func(t *testing.T) { + g := NewWithT(t) + + maxSurge := intstr.FromString(test.maxSurge) + maxUnavail := intstr.FromString(test.maxUnavailable) + surge, unavail, err := ResolveFenceposts(&maxSurge, &maxUnavail, test.desired) + if test.expectError { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + g.Expect(surge).To(Equal(test.expectSurge)) + g.Expect(unavail).To(Equal(test.expectUnavailable)) + }) + } +} diff --git a/test/helpers/envtest.go b/test/helpers/envtest.go index 10e5be5e..07f050f8 100644 --- a/test/helpers/envtest.go +++ b/test/helpers/envtest.go @@ -131,6 +131,10 @@ func NewTestEnvironment() *TestEnvironment { KubeConfig: env.Config, } managerOpts.AddToManager = func(ctx *context.ControllerManagerContext, mgr ctrlmgr.Manager) error { + if err := (&webhooks.ElfMachineTemplateValidator{}).SetupWebhookWithManager(mgr); err != nil { + return err + } + if err := (&webhooks.ElfMachineMutation{ Client: mgr.GetClient(), Logger: mgr.GetLogger().WithName("ElfMachineMutation"), diff --git a/webhooks/elfmachinetemplate_webhook.go b/webhooks/elfmachinetemplate_webhook.go new file mode 100644 index 00000000..8b3700a4 --- /dev/null +++ b/webhooks/elfmachinetemplate_webhook.go @@ -0,0 +1,90 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhooks + +import ( + goctx "context" + "fmt" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/validation/field" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" +) + +// Error messages. +const ( + diskCapacityCanOnlyBeGreaterThanZeroMsg = "the disk capacity can only be greater than 0" + diskCapacityCanOnlyBeExpanded = "the disk capacity can only be expanded" +) + +func (v *ElfMachineTemplateValidator) SetupWebhookWithManager(mgr ctrl.Manager) error { + return ctrl.NewWebhookManagedBy(mgr). + For(&infrav1.ElfMachineTemplate{}). + WithValidator(v). + Complete() +} + +//+kubebuilder:webhook:path=/validate-infrastructure-cluster-x-k8s-io-v1beta1-elfmachinetemplate,mutating=false,failurePolicy=fail,sideEffects=None,groups=infrastructure.cluster.x-k8s.io,resources=elfmachinetemplates,verbs=create;update,versions=v1beta1,name=validation.elfmachinetemplate.infrastructure.x-k8s.io,admissionReviewVersions=v1 + +// ElfMachineTemplateValidator implements a validation webhook for VSphereMachineTemplate. +type ElfMachineTemplateValidator struct{} + +var _ webhook.CustomValidator = &ElfMachineTemplateValidator{} + +// ValidateCreate implements webhook.Validator so a webhook will be registered for the type. +func (v *ElfMachineTemplateValidator) ValidateCreate(ctx goctx.Context, obj runtime.Object) (admission.Warnings, error) { + elfMachineTemplate, ok := obj.(*infrav1.ElfMachineTemplate) + if !ok { + return nil, apierrors.NewBadRequest(fmt.Sprintf("expected an ElfMachineTemplate but got a %T", obj)) + } + + var allErrs field.ErrorList + if elfMachineTemplate.Spec.Template.Spec.DiskGiB <= 0 { + allErrs = append(allErrs, field.Invalid(field.NewPath("spec", "template", "spec", "diskGiB"), elfMachineTemplate.Spec.Template.Spec.DiskGiB, diskCapacityCanOnlyBeGreaterThanZeroMsg)) + } + + return nil, aggregateObjErrors(elfMachineTemplate.GroupVersionKind().GroupKind(), elfMachineTemplate.Name, allErrs) +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type. +func (v *ElfMachineTemplateValidator) ValidateUpdate(ctx goctx.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { + oldElfMachineTemplate, ok := oldObj.(*infrav1.ElfMachineTemplate) //nolint:forcetypeassert + if !ok { + return nil, apierrors.NewBadRequest(fmt.Sprintf("expected an ElfMachineTemplate but got a %T", oldObj)) + } + elfMachineTemplate, ok := newObj.(*infrav1.ElfMachineTemplate) //nolint:forcetypeassert + if !ok { + return nil, apierrors.NewBadRequest(fmt.Sprintf("expected an ElfMachineTemplate but got a %T", newObj)) + } + + var allErrs field.ErrorList + if elfMachineTemplate.Spec.Template.Spec.DiskGiB < oldElfMachineTemplate.Spec.Template.Spec.DiskGiB { + allErrs = append(allErrs, field.Invalid(field.NewPath("spec", "template", "spec", "diskGiB"), elfMachineTemplate.Spec.Template.Spec.DiskGiB, diskCapacityCanOnlyBeExpanded)) + } + + return nil, aggregateObjErrors(elfMachineTemplate.GroupVersionKind().GroupKind(), elfMachineTemplate.Name, allErrs) +} + +// ValidateDelete implements webhook.Validator so a webhook will be registered for the type. +func (v *ElfMachineTemplateValidator) ValidateDelete(ctx goctx.Context, obj runtime.Object) (admission.Warnings, error) { + return nil, nil +} diff --git a/webhooks/elfmachinetemplate_webhook_test.go b/webhooks/elfmachinetemplate_webhook_test.go new file mode 100644 index 00000000..33c7ac74 --- /dev/null +++ b/webhooks/elfmachinetemplate_webhook_test.go @@ -0,0 +1,123 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhooks + +import ( + goctx "context" + "testing" + + . "github.com/onsi/gomega" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/validation/field" + + infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" +) + +func TestElfMachineTemplateValidatorValidateCreate(t *testing.T) { + g := NewWithT(t) + + var tests []testCase + tests = append(tests, testCase{ + Name: "disk capacity cannot be equal to 0", + EMT: &infrav1.ElfMachineTemplate{Spec: infrav1.ElfMachineTemplateSpec{ + Template: infrav1.ElfMachineTemplateResource{ + Spec: infrav1.ElfMachineSpec{ + DiskGiB: 0, + }, + }, + }}, + Errs: field.ErrorList{ + field.Invalid(field.NewPath("spec", "template", "spec", "diskGiB"), 0, diskCapacityCanOnlyBeGreaterThanZeroMsg), + }, + }) + + validator := &ElfMachineTemplateValidator{} + + for _, tc := range tests { + t.Run(tc.Name, func(t *testing.T) { + warnings, err := validator.ValidateCreate(goctx.Background(), tc.EMT) + g.Expect(warnings).To(BeEmpty()) + expectTestCase(g, tc, err) + }) + } +} + +func TestElfMachineTemplateValidatorValidateUpdate(t *testing.T) { + g := NewWithT(t) + + var tests []testCase + tests = append(tests, testCase{ + Name: "Cannot reduce disk capacity", + OldEMT: &infrav1.ElfMachineTemplate{Spec: infrav1.ElfMachineTemplateSpec{ + Template: infrav1.ElfMachineTemplateResource{ + Spec: infrav1.ElfMachineSpec{ + DiskGiB: 2, + }, + }, + }}, + EMT: &infrav1.ElfMachineTemplate{Spec: infrav1.ElfMachineTemplateSpec{ + Template: infrav1.ElfMachineTemplateResource{ + Spec: infrav1.ElfMachineSpec{ + DiskGiB: 1, + }, + }, + }}, + Errs: field.ErrorList{ + field.Invalid(field.NewPath("spec", "template", "spec", "diskGiB"), 1, diskCapacityCanOnlyBeExpanded), + }, + }) + + validator := &ElfMachineTemplateValidator{} + + for _, tc := range tests { + t.Run(tc.Name, func(t *testing.T) { + warnings, err := validator.ValidateUpdate(goctx.Background(), tc.OldEMT, tc.EMT) + g.Expect(warnings).To(BeEmpty()) + expectTestCase(g, tc, err) + }) + } +} + +func expectTestCase(g Gomega, tc testCase, err error) { + if tc.Errs != nil { + g.Expect(err).To(HaveOccurred()) + statusErr, ok := err.(*apierrors.StatusError) + g.Expect(ok).To(BeTrue()) + g.Expect(statusErr.ErrStatus.Details.Group).To(Equal(tc.EMT.GroupVersionKind().Group)) + g.Expect(statusErr.ErrStatus.Details.Kind).To(Equal(tc.EMT.GroupVersionKind().Kind)) + g.Expect(statusErr.ErrStatus.Details.Name).To(Equal(tc.EMT.Name)) + causes := make([]metav1.StatusCause, 0, len(tc.Errs)) + for i := 0; i < len(tc.Errs); i++ { + causes = append(causes, metav1.StatusCause{ + Type: metav1.CauseType(tc.Errs[i].Type), + Message: tc.Errs[i].ErrorBody(), + Field: tc.Errs[i].Field, + }) + } + g.Expect(statusErr.ErrStatus.Details.Causes).To(Equal(causes)) + } else { + g.Expect(err).NotTo(HaveOccurred()) + } +} + +type testCase struct { + Name string + EMT *infrav1.ElfMachineTemplate + OldEMT *infrav1.ElfMachineTemplate + Errs field.ErrorList +} diff --git a/webhooks/util.go b/webhooks/util.go new file mode 100644 index 00000000..cc485089 --- /dev/null +++ b/webhooks/util.go @@ -0,0 +1,35 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhooks + +import ( + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/validation/field" +) + +func aggregateObjErrors(gk schema.GroupKind, name string, allErrs field.ErrorList) error { + if len(allErrs) == 0 { + return nil + } + + return apierrors.NewInvalid( + gk, + name, + allErrs, + ) +}