From 202fc218a62ad70aa4efcddef5de26c3c76de023 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Fri, 12 Jul 2024 20:26:09 +0800 Subject: [PATCH] Add Inference API Signed-off-by: kerthcet --- PROJECT | 32 +- README.md | 8 +- api/inference/v1alpha1/groupversion_info.go | 36 ++ api/inference/v1alpha1/playground_types.go | 71 ++++ api/inference/v1alpha1/service_types.go | 71 ++++ api/inference/v1alpha1/types.go | 50 +++ .../v1alpha1/zz_generated.deepcopy.go | 304 +++++++++++++++++ api/v1alpha1/groupversion_info.go | 8 +- api/v1alpha1/modelprovider_types.go | 129 ++++++++ api/v1alpha1/serve_types.go | 86 ----- api/v1alpha1/zz_generated.deepcopy.go | 153 ++++++--- cmd/main.go | 31 +- .../bases/inference.llmaz.io_backends.yaml | 125 +++++++ .../bases/inference.llmaz.io_playgrounds.yaml | 307 ++++++++++++++++++ ....yaml => inference.llmaz.io_services.yaml} | 159 +++++++-- config/crd/bases/llmaz.io_modelproviders.yaml | 293 +++++++++++++++++ config/crd/kustomization.yaml | 12 +- config/default/kustomization.yaml | 4 +- config/manager/manager.yaml | 8 +- config/prometheus/monitor.yaml | 4 +- config/rbac/_modelprovider_editor_role.yaml | 31 ++ config/rbac/_modelprovider_viewer_role.yaml | 27 ++ .../rbac/auth_proxy_client_clusterrole.yaml | 4 +- config/rbac/auth_proxy_role.yaml | 4 +- config/rbac/auth_proxy_role_binding.yaml | 4 +- config/rbac/auth_proxy_service.yaml | 4 +- .../inference_playground_editor_role.yaml | 31 ++ .../inference_playground_viewer_role.yaml | 27 ++ ...aml => inference_service_editor_role.yaml} | 18 +- ...aml => inference_service_viewer_role.yaml} | 18 +- config/rbac/leader_election_role.yaml | 4 +- config/rbac/leader_election_role_binding.yaml | 4 +- config/rbac/role.yaml | 64 +++- config/rbac/role_binding.yaml | 4 +- config/rbac/service_account.yaml | 4 +- config/samples/_v1alpha1_modelprovider.yaml | 12 + .../inference_v1alpha1_playground.yaml | 12 + .../samples/inference_v1alpha1_service.yaml | 12 + config/samples/kustomization.yaml | 4 +- config/samples/llmaz_v1alpha1_serve.yaml | 12 - go.mod | 45 +-- go.sum | 91 +++--- hack/boilerplate.go.txt | 2 +- .../inference/playground_controller.go | 62 ++++ .../inference/service_controller.go | 62 ++++ internal/controller/inference/suite_test.go | 90 +++++ ...troller.go => modelprovider_controller.go} | 22 +- internal/controller/suite_test.go | 6 +- 48 files changed, 2245 insertions(+), 326 deletions(-) create mode 100644 api/inference/v1alpha1/groupversion_info.go create mode 100644 api/inference/v1alpha1/playground_types.go create mode 100644 api/inference/v1alpha1/service_types.go create mode 100644 api/inference/v1alpha1/types.go create mode 100644 api/inference/v1alpha1/zz_generated.deepcopy.go create mode 100644 api/v1alpha1/modelprovider_types.go delete mode 100644 api/v1alpha1/serve_types.go create mode 100644 config/crd/bases/inference.llmaz.io_backends.yaml create mode 100644 config/crd/bases/inference.llmaz.io_playgrounds.yaml rename config/crd/bases/{llmaz.inftyai.io_serves.yaml => inference.llmaz.io_services.yaml} (99%) create mode 100644 config/crd/bases/llmaz.io_modelproviders.yaml create mode 100644 config/rbac/_modelprovider_editor_role.yaml create mode 100644 config/rbac/_modelprovider_viewer_role.yaml create mode 100644 config/rbac/inference_playground_editor_role.yaml create mode 100644 config/rbac/inference_playground_viewer_role.yaml rename config/rbac/{serve_editor_role.yaml => inference_service_editor_role.yaml} (55%) rename config/rbac/{serve_viewer_role.yaml => inference_service_viewer_role.yaml} (52%) create mode 100644 config/samples/_v1alpha1_modelprovider.yaml create mode 100644 config/samples/inference_v1alpha1_playground.yaml create mode 100644 config/samples/inference_v1alpha1_service.yaml delete mode 100644 config/samples/llmaz_v1alpha1_serve.yaml create mode 100644 internal/controller/inference/playground_controller.go create mode 100644 internal/controller/inference/service_controller.go create mode 100644 internal/controller/inference/suite_test.go rename internal/controller/{serve_controller.go => modelprovider_controller.go} (70%) diff --git a/PROJECT b/PROJECT index 234561a..e565de7 100644 --- a/PROJECT +++ b/PROJECT @@ -2,19 +2,37 @@ # This file is used to track the info used to scaffold your project # and allow the plugins properly work. # More info: https://book.kubebuilder.io/reference/project-config.html -domain: inftyai.io +domain: llmaz.io layout: - go.kubebuilder.io/v4 -projectName: llmaz-operator -repo: inftyai.io/llmaz +multigroup: true +projectName: llmaz +repo: inftyai.com/llmaz resources: - api: crdVersion: v1 namespaced: true controller: true - domain: inftyai.io - group: llmaz - kind: Serve - path: inftyai.io/llmaz/api/v1alpha1 + domain: llmaz.io + group: inference + kind: Service + path: inftyai.com/llmaz/api/inference/v1alpha1 + version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: llmaz.io + kind: ModelProvider + path: inftyai.com/llmaz/api/v1alpha1 + version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: llmaz.io + group: inference + kind: Playground + path: inftyai.com/llmaz/api/inference/v1alpha1 version: v1alpha1 version: "3" diff --git a/README.md b/README.md index 2e0d297..43aa1c8 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ # llmaz -Serving LLMs on Kubernetes in a breeze. +☸️ Effortlessly operating LLMs on Kubernetes, e.g. Serving. + +## Roadmap + +- Serverless support +- CLI tool +- Gateway support diff --git a/api/inference/v1alpha1/groupversion_info.go b/api/inference/v1alpha1/groupversion_info.go new file mode 100644 index 0000000..6de3201 --- /dev/null +++ b/api/inference/v1alpha1/groupversion_info.go @@ -0,0 +1,36 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1alpha1 contains API Schema definitions for the inference v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=inference.llmaz.io +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go new file mode 100644 index 0000000..7aa3c26 --- /dev/null +++ b/api/inference/v1alpha1/playground_types.go @@ -0,0 +1,71 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + api "inftyai.com/llmaz/api/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// PlaygroundSpec defines the desired state of Playground +type PlaygroundSpec struct { + // Replicas represents the replica number of inference workloads. + // +kubebuilder:default=1 + // +optional + Replicas *int32 `json:"replicas,omitempty"` + // ModelsClaim represents the references to multiple models. + ModelsClaim api.ModelsClaim `json:"modelsClaim"` + // BackendConfig represents the inference backend configuration + // under the hood, e.g. vLLM, which is the default backend. + // +optional + BackendConfig *BackendConfig `json:"backendConfig,omitempty"` + // ElasticConfig defines the configuration for elastic usage, + // e.g. the max/min replicas. Default to 0 ~ Inf+. + // +optional + ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` +} + +// PlaygroundStatus defines the observed state of Playground +type PlaygroundStatus struct { + // Conditions represents the Inference condition. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status + +// Playground is the Schema for the playgrounds API +type Playground struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec PlaygroundSpec `json:"spec,omitempty"` + Status PlaygroundStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// PlaygroundList contains a list of Playground +type PlaygroundList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []Playground `json:"items"` +} + +func init() { + SchemeBuilder.Register(&Playground{}, &PlaygroundList{}) +} diff --git a/api/inference/v1alpha1/service_types.go b/api/inference/v1alpha1/service_types.go new file mode 100644 index 0000000..507fbb1 --- /dev/null +++ b/api/inference/v1alpha1/service_types.go @@ -0,0 +1,71 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + lws "sigs.k8s.io/lws/api/leaderworkerset/v1" + + api "inftyai.com/llmaz/api/v1alpha1" +) + +// ServiceSpec defines the desired state of Service. +// Service controller will maintain multi-flavor of workloads with +// different accelerators for cost or performance considerations. +type ServiceSpec struct { + // ModelsClaim represents the references to multiple models. + // Note: properties (nodeSelectors, resources, e.g.) of the model flavors + // will be applied to the workload if not exist. + ModelsClaim api.ModelsClaim `json:"modelProviderClaim"` + // WorkloadTemplate defines the underlying workload layout and configuration. + WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"` + // ElasticConfig defines the configuration for elastic usage, + // e.g. the max/min replicas. Default to 0 ~ Inf+. + // +optional + ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` +} + +// ServiceStatus defines the observed state of Service +type ServiceStatus struct { + // Conditions represents the Inference condition. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status + +// Service is the Schema for the services API +type Service struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec ServiceSpec `json:"spec,omitempty"` + Status ServiceStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// ServiceList contains a list of Service +type ServiceList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []Service `json:"items"` +} + +func init() { + SchemeBuilder.Register(&Service{}, &ServiceList{}) +} diff --git a/api/inference/v1alpha1/types.go b/api/inference/v1alpha1/types.go new file mode 100644 index 0000000..a2725e7 --- /dev/null +++ b/api/inference/v1alpha1/types.go @@ -0,0 +1,50 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import corev1 "k8s.io/api/core/v1" + +type BackendName string + +type BackendConfig struct { + // Name represents the inference backend under the hood, e.g. vLLM. + // +kubebuilder:validation:Enum={vllm} + // +kubebuilder:default=vllm + // +optional + Name *BackendName `json:"name"` + // Version represents the backend version if you want a different one + // from the default version. + // +optional + Version *string `json:"version,omitempty"` + // Args represents the arguments passed to the backend. + // +optional + Args []string `json:"args,omitempty"` + // Envs represents the environments set to the container. + // +optional + Envs []corev1.EnvVar `json:"envs,omitempty"` +} + +type ElasticConfig struct { + // MinReplicas indicates the minimum number of inference workloads based on the traffic. + // Default to nil means we can scale down the instances to 0. + // +optional + MinReplicas *int32 `json:"minReplicas,omitempty"` + // MaxReplicas indicates the maximum number of inference workloads based on the traffic. + // Default to nil means there's no limit for the instance number. + // +optional + MaxReplicas *int32 `json:"maxReplicas,omitempty"` +} diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 0000000..a5adfd5 --- /dev/null +++ b/api/inference/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,304 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *BackendConfig) DeepCopyInto(out *BackendConfig) { + *out = *in + if in.Name != nil { + in, out := &in.Name, &out.Name + *out = new(BackendName) + **out = **in + } + if in.Version != nil { + in, out := &in.Version, &out.Version + *out = new(string) + **out = **in + } + if in.Args != nil { + in, out := &in.Args, &out.Args + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Envs != nil { + in, out := &in.Envs, &out.Envs + *out = make([]corev1.EnvVar, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackendConfig. +func (in *BackendConfig) DeepCopy() *BackendConfig { + if in == nil { + return nil + } + out := new(BackendConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig) { + *out = *in + if in.MinReplicas != nil { + in, out := &in.MinReplicas, &out.MinReplicas + *out = new(int32) + **out = **in + } + if in.MaxReplicas != nil { + in, out := &in.MaxReplicas, &out.MaxReplicas + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticConfig. +func (in *ElasticConfig) DeepCopy() *ElasticConfig { + if in == nil { + return nil + } + out := new(ElasticConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Playground) DeepCopyInto(out *Playground) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Playground. +func (in *Playground) DeepCopy() *Playground { + if in == nil { + return nil + } + out := new(Playground) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Playground) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlaygroundList) DeepCopyInto(out *PlaygroundList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Playground, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundList. +func (in *PlaygroundList) DeepCopy() *PlaygroundList { + if in == nil { + return nil + } + out := new(PlaygroundList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PlaygroundList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlaygroundSpec) DeepCopyInto(out *PlaygroundSpec) { + *out = *in + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } + in.ModelsClaim.DeepCopyInto(&out.ModelsClaim) + if in.BackendConfig != nil { + in, out := &in.BackendConfig, &out.BackendConfig + *out = new(BackendConfig) + (*in).DeepCopyInto(*out) + } + if in.ElasticConfig != nil { + in, out := &in.ElasticConfig, &out.ElasticConfig + *out = new(ElasticConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundSpec. +func (in *PlaygroundSpec) DeepCopy() *PlaygroundSpec { + if in == nil { + return nil + } + out := new(PlaygroundSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlaygroundStatus) DeepCopyInto(out *PlaygroundStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundStatus. +func (in *PlaygroundStatus) DeepCopy() *PlaygroundStatus { + if in == nil { + return nil + } + out := new(PlaygroundStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Service) DeepCopyInto(out *Service) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Service. +func (in *Service) DeepCopy() *Service { + if in == nil { + return nil + } + out := new(Service) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Service) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceList) DeepCopyInto(out *ServiceList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Service, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceList. +func (in *ServiceList) DeepCopy() *ServiceList { + if in == nil { + return nil + } + out := new(ServiceList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ServiceList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceSpec) DeepCopyInto(out *ServiceSpec) { + *out = *in + in.ModelsClaim.DeepCopyInto(&out.ModelsClaim) + in.WorkloadTemplate.DeepCopyInto(&out.WorkloadTemplate) + if in.ElasticConfig != nil { + in, out := &in.ElasticConfig, &out.ElasticConfig + *out = new(ElasticConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceSpec. +func (in *ServiceSpec) DeepCopy() *ServiceSpec { + if in == nil { + return nil + } + out := new(ServiceSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServiceStatus) DeepCopyInto(out *ServiceStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceStatus. +func (in *ServiceStatus) DeepCopy() *ServiceStatus { + if in == nil { + return nil + } + out := new(ServiceStatus) + in.DeepCopyInto(out) + return out +} diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go index b9bf182..683c06e 100644 --- a/api/v1alpha1/groupversion_info.go +++ b/api/v1alpha1/groupversion_info.go @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,9 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package v1alpha1 contains API Schema definitions for the llmaz v1alpha1 API group +// Package v1alpha1 contains API Schema definitions for the v1alpha1 API group // +kubebuilder:object:generate=true -// +groupName=llmaz.inftyai.io +// +groupName=llmaz.io package v1alpha1 import ( @@ -26,7 +26,7 @@ import ( var ( // GroupVersion is group version used to register these objects - GroupVersion = schema.GroupVersion{Group: "llmaz.inftyai.io", Version: "v1alpha1"} + GroupVersion = schema.GroupVersion{Group: "llmaz.io", Version: "v1alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} diff --git a/api/v1alpha1/modelprovider_types.go b/api/v1alpha1/modelprovider_types.go new file mode 100644 index 0000000..305f06d --- /dev/null +++ b/api/v1alpha1/modelprovider_types.go @@ -0,0 +1,129 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// DataSource represents where to load the model. +// Only one data source will be used. +type DataSource struct { + // URL represents the URL link than contains the data sources. + // +optional + URL *string `json:"url,omitempty"` + // The mounted volume that contains the data. + // +optional + Volume *v1.VolumeSource `json:"volumeSource,omitempty"` + // Image represents the the image address that contains the source data. + // +optional + Image *string `json:"image,omitempty"` + // ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image. + // +optional + ImagePullSecrets []string `json:"imagePullSecrets,omitempty"` +} + +type FlavorName string + +// Flavor defines the accelerator requirements for a model and the necessary parameters in autoscaling. +// Generally, it will be used in two places: +// - Pod scheduling with node selectors specified. +// - Cluster autoscaling with essential parameters provided. +type Flavor struct { + // Name represents the flavor name, which will be used in model claim. + Name FlavorName `json:"name"` + // Requests defines the required resources to serve the model, like nvidia.com/gpu: 8. + // Note: cpu and memory usage of the model backend (vllm e.g.) can also be declared here, + // or a default value will be used based on the community recommendations. + Requests v1.ResourceList `json:"requests"` + // NodeSelector defines the labels to filter specified nodes, like + // cloud-provider.com/accelerator: nvidia-a100. + // NodeSelector will be auto injected to the Pods as scheduling primitives. + // +optional + NodeSelector []v1.NodeSelector `json:"nodeSelector,omitempty"` + // Params stores other useful parameters and will be consumed by the autoscaling components + // like cluster-autoscaler, Karpenter. + // E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with + // instance-type: p4d.24xlarge for AWS. + // +optional + Params map[string]string `json:"params,omitempty"` +} + +type ModelProviderName string + +// ModelsClaim represents the references to multiple models, +// as well as the configured flavors. +type ModelsClaim struct { + // ModelProviderNames represents a list of modelProviders, there maybe + // multiple modelProviders here to support state-of-the-art technologies + // like speculative decoding. + // +kubebuilder:validation:MinItems=1 + ModelProviderNames []ModelProviderName `json:"modelProviderNames"` + // InferenceFlavors represents a list of flavors with fungibility supported. + // If not set and multiple modelProviders claimed, apply with the 0-index + // model provider by default. + // If set, the flavor names will refer to the 0-index model provider. + // This is just for simplicity, if needed, will refactor this part in the future. + // +optional + InferenceFlavors []FlavorName `json:"inferenceFlavors"` +} + +// ModelProviderSpec defines the desired state of ModelProvider +type ModelProviderSpec struct { + // FamilyName represents the model type, like llama2, which will be auto injected + // to the labels with the key of `llmaz.io/model-family-name`. + FamilyName ModelProviderName `json:"familyName"` + // DataSource represents where the model stores, there're several ways like + // loading from huggingface, host path, s3 and so on. + DataSource DataSource `json:"dataSource"` + // InferenceFlavors represents the accelerator requirements to serve the model. + // Flavors are fungible following the priority of slice order. + // +optional + InferenceFlavors []Flavor `json:"inferenceFlavors,omitempty"` +} + +// ModelProviderStatus defines the observed state of ModelProvider +type ModelProviderStatus struct { + // Conditions represents the Inference condition. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status + +// ModelProvider is the Schema for the modelProvider API. +type ModelProvider struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec ModelProviderSpec `json:"spec,omitempty"` + Status ModelProviderStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// ModelProviderList contains a list of ModelProvider +type ModelProviderList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []ModelProvider `json:"items"` +} + +func init() { + SchemeBuilder.Register(&ModelProvider{}, &ModelProviderList{}) +} diff --git a/api/v1alpha1/serve_types.go b/api/v1alpha1/serve_types.go deleted file mode 100644 index 95c763a..0000000 --- a/api/v1alpha1/serve_types.go +++ /dev/null @@ -1,86 +0,0 @@ -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package v1alpha1 - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - lws "sigs.k8s.io/lws/api/leaderworkerset/v1" -) - -// ServeSpec defines the desired state of Serve -type ServeSpec struct { - // ModelNameOrPath represents the model name or the local path. - ModelNameOrPath string `json:"modelNameOrPath,omitempty"` - // Backend indicates the inference backend under the hood, e.g. vLLM. - // Default to use huggingface library. - // - // +optional - // +kubebuilder:validation:Enum={vllm,huggingface} - // +kubebuilder:default=huggingface - Backend *string `json:"backend,omitempty"` - // WorkloadTemplate defines the underlying workload layout and configuration, - // e.g. the leader/worker templates and replicas. - WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"` - // ElasticConfig defines the configuration for elastic usage, - // e.g. the max/min replicas. - // Default to 0 ~ Inf+. - // +optional - ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"` -} - -type ElasticConfig struct { - // MinReplicas indicates the minimum number of Serve instances based on the traffic. - // Default to nil means we can scale down the instances to 0. - // +optional - MinReplicas *int32 `json:"minReplicas,omitempty"` - // MaxReplicas indicates the maximum number of Serve instances based on the traffic. - // Default to nil means there's no limit for the instance number. - // +optional - MaxReplicas *int32 `json:"maxReplicas,omitempty"` -} - -// ServeStatus defines the observed state of Serve -type ServeStatus struct { - // Conditions represents the Serve condition. - Conditions []metav1.Condition `json:"conditions,omitempty"` -} - -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status - -// Serve is the Schema for the serves API -type Serve struct { - metav1.TypeMeta `json:",inline"` - metav1.ObjectMeta `json:"metadata,omitempty"` - - Spec ServeSpec `json:"spec,omitempty"` - Status ServeStatus `json:"status,omitempty"` -} - -//+kubebuilder:object:root=true - -// ServeList contains a list of Serve -type ServeList struct { - metav1.TypeMeta `json:",inline"` - metav1.ListMeta `json:"metadata,omitempty"` - Items []Serve `json:"items"` -} - -func init() { - SchemeBuilder.Register(&Serve{}, &ServeList{}) -} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 95e8213..8c8057d 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1,7 +1,7 @@ //go:build !ignore_autogenerated /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,37 +21,84 @@ limitations under the License. package v1alpha1 import ( - "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig) { +func (in *DataSource) DeepCopyInto(out *DataSource) { *out = *in - if in.MinReplicas != nil { - in, out := &in.MinReplicas, &out.MinReplicas - *out = new(int32) + if in.URL != nil { + in, out := &in.URL, &out.URL + *out = new(string) **out = **in } - if in.MaxReplicas != nil { - in, out := &in.MaxReplicas, &out.MaxReplicas - *out = new(int32) + if in.Volume != nil { + in, out := &in.Volume, &out.Volume + *out = new(v1.VolumeSource) + (*in).DeepCopyInto(*out) + } + if in.Image != nil { + in, out := &in.Image, &out.Image + *out = new(string) **out = **in } + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataSource. +func (in *DataSource) DeepCopy() *DataSource { + if in == nil { + return nil + } + out := new(DataSource) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Flavor) DeepCopyInto(out *Flavor) { + *out = *in + if in.Requests != nil { + in, out := &in.Requests, &out.Requests + *out = make(v1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make([]v1.NodeSelector, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Params != nil { + in, out := &in.Params, &out.Params + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticConfig. -func (in *ElasticConfig) DeepCopy() *ElasticConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Flavor. +func (in *Flavor) DeepCopy() *Flavor { if in == nil { return nil } - out := new(ElasticConfig) + out := new(Flavor) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Serve) DeepCopyInto(out *Serve) { +func (in *ModelProvider) DeepCopyInto(out *ModelProvider) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) @@ -59,18 +106,18 @@ func (in *Serve) DeepCopyInto(out *Serve) { in.Status.DeepCopyInto(&out.Status) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Serve. -func (in *Serve) DeepCopy() *Serve { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProvider. +func (in *ModelProvider) DeepCopy() *ModelProvider { if in == nil { return nil } - out := new(Serve) + out := new(ModelProvider) in.DeepCopyInto(out) return out } // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *Serve) DeepCopyObject() runtime.Object { +func (in *ModelProvider) DeepCopyObject() runtime.Object { if c := in.DeepCopy(); c != nil { return c } @@ -78,31 +125,31 @@ func (in *Serve) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ServeList) DeepCopyInto(out *ServeList) { +func (in *ModelProviderList) DeepCopyInto(out *ModelProviderList) { *out = *in out.TypeMeta = in.TypeMeta in.ListMeta.DeepCopyInto(&out.ListMeta) if in.Items != nil { in, out := &in.Items, &out.Items - *out = make([]Serve, len(*in)) + *out = make([]ModelProvider, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServeList. -func (in *ServeList) DeepCopy() *ServeList { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProviderList. +func (in *ModelProviderList) DeepCopy() *ModelProviderList { if in == nil { return nil } - out := new(ServeList) + out := new(ModelProviderList) in.DeepCopyInto(out) return out } // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *ServeList) DeepCopyObject() runtime.Object { +func (in *ModelProviderList) DeepCopyObject() runtime.Object { if c := in.DeepCopy(); c != nil { return c } @@ -110,49 +157,71 @@ func (in *ServeList) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ServeSpec) DeepCopyInto(out *ServeSpec) { +func (in *ModelProviderSpec) DeepCopyInto(out *ModelProviderSpec) { *out = *in - if in.Backend != nil { - in, out := &in.Backend, &out.Backend - *out = new(string) - **out = **in - } - in.WorkloadTemplate.DeepCopyInto(&out.WorkloadTemplate) - if in.ElasticConfig != nil { - in, out := &in.ElasticConfig, &out.ElasticConfig - *out = new(ElasticConfig) - (*in).DeepCopyInto(*out) + in.DataSource.DeepCopyInto(&out.DataSource) + if in.InferenceFlavors != nil { + in, out := &in.InferenceFlavors, &out.InferenceFlavors + *out = make([]Flavor, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServeSpec. -func (in *ServeSpec) DeepCopy() *ServeSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProviderSpec. +func (in *ModelProviderSpec) DeepCopy() *ModelProviderSpec { if in == nil { return nil } - out := new(ServeSpec) + out := new(ModelProviderSpec) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ServeStatus) DeepCopyInto(out *ServeStatus) { +func (in *ModelProviderStatus) DeepCopyInto(out *ModelProviderStatus) { *out = *in if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions - *out = make([]v1.Condition, len(*in)) + *out = make([]metav1.Condition, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServeStatus. -func (in *ServeStatus) DeepCopy() *ServeStatus { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProviderStatus. +func (in *ModelProviderStatus) DeepCopy() *ModelProviderStatus { + if in == nil { + return nil + } + out := new(ModelProviderStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelsClaim) DeepCopyInto(out *ModelsClaim) { + *out = *in + if in.ModelProviderNames != nil { + in, out := &in.ModelProviderNames, &out.ModelProviderNames + *out = make([]ModelProviderName, len(*in)) + copy(*out, *in) + } + if in.InferenceFlavors != nil { + in, out := &in.InferenceFlavors, &out.InferenceFlavors + *out = make([]FlavorName, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelsClaim. +func (in *ModelsClaim) DeepCopy() *ModelsClaim { if in == nil { return nil } - out := new(ServeStatus) + out := new(ModelsClaim) in.DeepCopyInto(out) return out } diff --git a/cmd/main.go b/cmd/main.go index a8172ed..30b01d2 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,8 +32,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - llmazv1alpha1 "inftyai.io/llmaz/api/v1alpha1" - "inftyai.io/llmaz/internal/controller" + inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1" + llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1" + "inftyai.com/llmaz/internal/controller" + inferencecontroller "inftyai.com/llmaz/internal/controller/inference" //+kubebuilder:scaffold:imports ) @@ -45,7 +47,8 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(llmazv1alpha1.AddToScheme(scheme)) + utilruntime.Must(inferencev1alpha1.AddToScheme(scheme)) + utilruntime.Must(llmaziov1alpha1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme } @@ -71,7 +74,7 @@ func main() { Metrics: metricsserver.Options{BindAddress: metricsAddr}, HealthProbeBindAddress: probeAddr, LeaderElection: enableLeaderElection, - LeaderElectionID: "05d9997c.inftyai.io", + LeaderElectionID: "fbb36db9.llmaz.io", // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily // when the Manager ends. This requires the binary to immediately end when the // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly @@ -89,11 +92,25 @@ func main() { os.Exit(1) } - if err = (&controller.ServeReconciler{ + if err = (&inferencecontroller.ServiceReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "Serve") + setupLog.Error(err, "unable to create controller", "controller", "Service") + os.Exit(1) + } + if err = (&controller.ModelProviderReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ModelProvider") + os.Exit(1) + } + if err = (&inferencecontroller.PlaygroundReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Playground") os.Exit(1) } //+kubebuilder:scaffold:builder diff --git a/config/crd/bases/inference.llmaz.io_backends.yaml b/config/crd/bases/inference.llmaz.io_backends.yaml new file mode 100644 index 0000000..0299ff1 --- /dev/null +++ b/config/crd/bases/inference.llmaz.io_backends.yaml @@ -0,0 +1,125 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: backends.inference.llmaz.io +spec: + group: inference.llmaz.io + names: + kind: Backend + listKind: BackendList + plural: backends + singular: backend + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Backend is the Schema for the backends API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: BackendSpec defines the desired state of Backend + properties: + foo: + description: Foo is an example field of Backend. Edit backend_types.go + to remove/update + type: string + type: object + status: + description: BackendStatus defines the observed state of Backend + properties: + conditions: + description: Conditions represents the Inference condition. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml new file mode 100644 index 0000000..3e0a696 --- /dev/null +++ b/config/crd/bases/inference.llmaz.io_playgrounds.yaml @@ -0,0 +1,307 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: playgrounds.inference.llmaz.io +spec: + group: inference.llmaz.io + names: + kind: Playground + listKind: PlaygroundList + plural: playgrounds + singular: playground + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Playground is the Schema for the playgrounds API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: PlaygroundSpec defines the desired state of Playground + properties: + backendConfig: + description: BackendConfig indicates the inference backend under the + hood, e.g. vLLM. + properties: + args: + description: Args represents the arguments passed to the backend. + items: + type: string + type: array + envs: + description: Envs represents the environments set to the container. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must be a + C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + optional: + description: Specify whether the ConfigMap or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of the + exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid? + type: string + optional: + description: Specify whether the Secret or its key + must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + name: + default: vllm + description: Name represents the inference backend under the hood, + e.g. vLLM. + enum: + - vllm + type: string + version: + description: |- + Version represents the backend version if you want a different one + from the default version. + type: string + type: object + elasticConfig: + description: |- + ElasticConfig defines the configuration for elastic usage, + e.g. the max/min replicas. + Default to 0 ~ Inf+. + properties: + maxReplicas: + description: |- + MaxReplicas indicates the maximum number of Inference instances based on the traffic. + Default to nil means there's no limit for the instance number. + format: int32 + type: integer + minReplicas: + description: |- + MinReplicas indicates the minimum number of Inference instances based on the traffic. + Default to nil means we can scale down the instances to 0. + format: int32 + type: integer + type: object + modelsClaim: + description: ModelsClaim refers to the model claims. + properties: + inferenceFlavors: + description: |- + InferenceFlavors represents a list of flavors with fungibility supported. + If not set and multiple modelProviders claimed, apply with the 0-index model provider. + If set, the flavor names also refer to the 0-index model provider. + This is just for simplicity, if needed, will refactor this part in the future. + items: + type: string + type: array + modelProviderNames: + description: |- + ClaimNames represents the names of the modelProviders, there maybe + multiple models here to support state-of-the-art technologies like + speculative decoding. + items: + type: string + minItems: 1 + type: array + required: + - modelProviderNames + type: object + replicas: + default: 1 + description: Replicas represents the replica number of model playground. + format: int32 + type: integer + required: + - modelsClaim + type: object + status: + description: PlaygroundStatus defines the observed state of Playground + properties: + conditions: + description: Conditions represents the Inference condition. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/llmaz.inftyai.io_serves.yaml b/config/crd/bases/inference.llmaz.io_services.yaml similarity index 99% rename from config/crd/bases/llmaz.inftyai.io_serves.yaml rename to config/crd/bases/inference.llmaz.io_services.yaml index ec90456..4a99628 100644 --- a/config/crd/bases/llmaz.inftyai.io_serves.yaml +++ b/config/crd/bases/inference.llmaz.io_services.yaml @@ -4,20 +4,20 @@ kind: CustomResourceDefinition metadata: annotations: controller-gen.kubebuilder.io/version: v0.14.0 - name: serves.llmaz.inftyai.io + name: services.inference.llmaz.io spec: - group: llmaz.inftyai.io + group: inference.llmaz.io names: - kind: Serve - listKind: ServeList - plural: serves - singular: serve + kind: Service + listKind: ServiceList + plural: services + singular: service scope: Namespaced versions: - name: v1alpha1 schema: openAPIV3Schema: - description: Serve is the Schema for the serves API + description: Service is the Schema for the services API properties: apiVersion: description: |- @@ -37,17 +37,11 @@ spec: metadata: type: object spec: - description: ServeSpec defines the desired state of Serve + description: |- + ServiceSpec defines the desired state of Service. + Service will maintain multi-version of lws with different flavors + to support accelerator fungibility. properties: - backend: - default: huggingface - description: |- - Backend indicates the inference backend under the hood, e.g. vLLM. - Default to use huggingface library. - enum: - - vllm - - huggingface - type: string elasticConfig: description: |- ElasticConfig defines the configuration for elastic usage, @@ -56,25 +50,49 @@ spec: properties: maxReplicas: description: |- - MaxReplicas indicates the maximum number of Serve instances based on the traffic. + MaxReplicas indicates the maximum number of Inference instances based on the traffic. Default to nil means there's no limit for the instance number. format: int32 type: integer minReplicas: description: |- - MinReplicas indicates the minimum number of Serve instances based on the traffic. + MinReplicas indicates the minimum number of Inference instances based on the traffic. Default to nil means we can scale down the instances to 0. format: int32 type: integer type: object - modelNameOrPath: - description: ModelNameOrPath represents the model name or the local - path. - type: string + modelProviderClaim: + description: |- + ModelsClaim refers to the model claims. + Note: properties (nodeSelectors, resources, e.g.) of the model flavors + will be applied to the workload if not exists. + properties: + inferenceFlavors: + description: |- + InferenceFlavors represents a list of flavors with fungibility supported. + If not set and multiple modelProviders claimed, apply with the 0-index model provider. + If set, the flavor names also refer to the 0-index model provider. + This is just for simplicity, if needed, will refactor this part in the future. + items: + type: string + type: array + modelProviderNames: + description: |- + ClaimNames represents the names of the modelProviders, there maybe + multiple models here to support state-of-the-art technologies like + speculative decoding. + items: + type: string + minItems: 1 + type: array + required: + - modelProviderNames + type: object workloadTemplate: description: |- WorkloadTemplate defines the underlying workload layout and configuration, - e.g. the leader/worker templates and replicas. + e.g. the templates and replicas. + Note: nodeSelectors and resources might be replaced by the modelProviderClaim. properties: leaderWorkerTemplate: description: LeaderWorkerTemplate defines the template for leader/worker @@ -7746,16 +7764,38 @@ spec: type: object type: object restartPolicy: + default: Default description: RestartPolicy defines the restart policy when pod failures happen. + enum: + - Default + - RecreateGroupOnPodRestart type: string size: + default: 1 description: |- Number of pods to create. It is the total number of pods in each group. The minimum is 1 which represent the leader. When set to 1, the leader pod is created for each group as well as a 0-replica StatefulSet for the workers. + Default to 1. format: int32 type: integer + subGroupPolicy: + description: |- + SubGroupPolicy describes the policy that will be applied when creating subgroups + in each replica. + properties: + subGroupSize: + description: |- + The number of pods per subgroup. This value is immutable, + and must not be greater than LeaderWorkerSet.Spec.Size. + Size must be divisible by subGroupSize in which case the + subgroups will be of equal size. Or size - 1 is divisible + by subGroupSize, in which case the leader is considered as + the extra pod, and will be part of the first subgroup. + format: int32 + type: integer + type: object workerTemplate: description: WorkerTemplate defines the pod template for worker pods. @@ -15422,10 +15462,10 @@ spec: type: object type: object required: - - size - workerTemplate type: object replicas: + default: 1 description: |- Number of leader-workers groups. A scale subresource is available to enable HPA. The selector for HPA will be that of the leader pod, and so practically HPA will be looking up the @@ -15433,19 +15473,84 @@ spec: the rest of the group and expose them as a summary custom metric representing the whole group. On scale down, the leader pod as well as the workers statefulset will be deleted. + Default to 1. format: int32 type: integer + rolloutStrategy: + description: |- + RolloutStrategy defines the strategy that will be applied to update replicas + when a revision is made to the leaderWorkerTemplate. + properties: + rollingUpdateConfiguration: + description: RollingUpdateConfiguration defines the parameters + to be used when type is RollingUpdateStrategyType. + properties: + maxSurge: + anyOf: + - type: integer + - type: string + default: 0 + description: |- + The maximum number of replicas that can be scheduled above the original number of + replicas. + Value can be an absolute number (ex: 5) or a percentage of total replicas at + the start of the update (ex: 10%). + Absolute number is calculated from percentage by rounding up. + By default, a value of 0 is used. + Example: when this is set to 30%, the new replicas can be scaled up by 30% + immediately when the rolling update starts. Once old replicas have been deleted, + new replicas can be scaled up further, ensuring that total number of replicas running + at any time during the update is at most 130% of original replicas. + When rolling update completes, replicas will fall back to the original replicas. + x-kubernetes-int-or-string: true + maxUnavailable: + anyOf: + - type: integer + - type: string + default: 1 + description: |- + The maximum number of replicas that can be unavailable during the update. + Value can be an absolute number (ex: 5) or a percentage of total replicas at the start of update (ex: 10%). + Absolute number is calculated from percentage by rounding down. + This can not be 0 if MaxSurge is 0. + By default, a fixed value of 1 is used. + Example: when this is set to 30%, the old replicas can be scaled down by 30% + immediately when the rolling update starts. Once new replicas are ready, old replicas + can be scaled down further, followed by scaling up the new replicas, ensuring + that at least 70% of original number of replicas are available at all times + during the update. + x-kubernetes-int-or-string: true + type: object + type: + default: RollingUpdate + description: Type defines the rollout strategy, it can only + be “RollingUpdate” for now. + enum: + - RollingUpdate + type: string + required: + - type + type: object + startupPolicy: + default: LeaderCreated + description: StartupPolicy determines the startup policy for the + worker statefulset. + enum: + - LeaderCreated + - LeaderReady + type: string required: - leaderWorkerTemplate type: object required: + - modelProviderClaim - workloadTemplate type: object status: - description: ServeStatus defines the observed state of Serve + description: ServiceStatus defines the observed state of Service properties: conditions: - description: Conditions represents the Serve condition. + description: Conditions represents the Inference condition. items: description: "Condition contains details for one aspect of the current state of this API Resource.\n---\nThis struct is intended for diff --git a/config/crd/bases/llmaz.io_modelproviders.yaml b/config/crd/bases/llmaz.io_modelproviders.yaml new file mode 100644 index 0000000..c7f057e --- /dev/null +++ b/config/crd/bases/llmaz.io_modelproviders.yaml @@ -0,0 +1,293 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: modelproviders.llmaz.io +spec: + group: llmaz.io + names: + kind: ModelProvider + listKind: ModelProviderList + plural: modelproviders + singular: modelprovider + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: ModelProvider is the Schema for the modelProvider API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ModelProviderSpec defines the desired state of ModelProvider + properties: + dataSource: + description: DataSource represents where to fetch the models, huggingface, + hostpath, s3, etc.. + properties: + image: + description: |- + The name of the image that contains the source data. The assumption is that the source data locates in the + `data` directory in the image. + type: string + imagePullSecrets: + description: ImagePullSecrets is a list of secret names in the + same namespace used for pulling the data image. + items: + type: string + type: array + name: + description: |- + The name of the dataset. The same name will be used as a container name. + It must be a valid DNS subdomain value, + type: string + urls: + description: URLs specifies the links to the public data sources. + E.g., files in a public github repository. + items: + type: string + type: array + volumeSource: + description: The mounted volume that contains the data. + x-kubernetes-preserve-unknown-fields: true + type: object + familyName: + description: |- + FamilyName represents the model type, like llama2, which will be auto injected + to the labels with the key of `llmaz.io/model-family-name`. + type: string + inferenceFlavors: + description: |- + InferenceFlavors represents the hardware requirements to serve the model. + Flavors are fungible following the priority of slice order. + items: + description: |- + Flavor represents the hardware requirements for one model. + Generally, it will be used in two places: + - Pod scheduling with node selectors specified. + - Cluster autoscaling with essential parameters provided. + Flavor is useful because the hardware requirements for models + are usually clear, like llama2-70B needs 8x Nvidia A100. + properties: + name: + description: Name represents the flavor name. + type: string + nodeSelector: + description: |- + NodeSelector refers to the nodes with specified accelerators equipped to + serve the model, like cloud-provider.com/accelerator: nvidia-a100, + NodeSelector will be auto injected to the Pods as scheduling primitives. + items: + description: |- + A node selector represents the union of the results of one or more label queries + over a set of nodes; that is, it represents the OR of the selectors represented + by the node selector terms. + properties: + nodeSelectorTerms: + description: Required. A list of node selector terms. + The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + x-kubernetes-map-type: atomic + type: array + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: array + params: + additionalProperties: + type: string + description: |- + Params stores other useful parameters and will be consumed by the autoscaling components + like cluster-autoscaler, Karpenter. + E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with + instance-type: p4d.24xlarge for AWS. + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests represents the required resources to serve the model, like nvidia.com/gpu: 8. + Note: cpu and memory usage of the model backend (vllm e.g.) can also be declared here, + or a default value will be used based on the community recommendations. + type: object + required: + - name + - requests + type: object + type: array + required: + - dataSource + - familyName + type: object + status: + description: ModelProviderStatus defines the observed state of ModelProvider + properties: + conditions: + description: Conditions represents the Inference condition. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 4b6a2d4..06b9a8a 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -2,18 +2,24 @@ # since it depends on service name and namespace that are out of this kustomize package. # It should be run by config/default resources: -- bases/llmaz.inftyai.io_serves.yaml +- bases/inference.llmaz.io_services.yaml +- bases/llmaz.io_modelproviders.yaml +- bases/inference.llmaz.io_playgrounds.yaml #+kubebuilder:scaffold:crdkustomizeresource patches: # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. # patches here are for enabling the conversion webhook for each CRD -#- path: patches/webhook_in_serves.yaml +#- path: patches/webhook_in_inference_services.yaml +#- path: patches/webhook_in__modelproviders.yaml +#- path: patches/webhook_in_inference_playgrounds.yaml #+kubebuilder:scaffold:crdkustomizewebhookpatch # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. # patches here are for enabling the CA injection for each CRD -#- path: patches/cainjection_in_serves.yaml +#- path: patches/cainjection_in_inference_services.yaml +#- path: patches/cainjection_in__modelproviders.yaml +#- path: patches/cainjection_in_inference_playgrounds.yaml #+kubebuilder:scaffold:crdkustomizecainjectionpatch # [WEBHOOK] To enable webhook, uncomment the following section diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index a320eaa..d35f3b1 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -1,12 +1,12 @@ # Adds namespace to all resources. -namespace: llmaz-operator-system +namespace: llmaz-system # Value of this field is prepended to the # names of all resources, e.g. a deployment named # "wordpress" becomes "alices-wordpress". # Note that it should also match with the prefix (text before '-') of the namespace # field above. -namePrefix: llmaz-operator- +namePrefix: llmaz- # Labels to add to all resources and selectors. #labels: diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 7cad38f..14a1b83 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -6,8 +6,8 @@ metadata: app.kubernetes.io/name: namespace app.kubernetes.io/instance: system app.kubernetes.io/component: manager - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: system --- @@ -21,8 +21,8 @@ metadata: app.kubernetes.io/name: deployment app.kubernetes.io/instance: controller-manager app.kubernetes.io/component: manager - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize spec: selector: diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index 77a8cc3..52e9051 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -7,8 +7,8 @@ metadata: app.kubernetes.io/name: servicemonitor app.kubernetes.io/instance: controller-manager-metrics-monitor app.kubernetes.io/component: metrics - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: controller-manager-metrics-monitor namespace: system diff --git a/config/rbac/_modelprovider_editor_role.yaml b/config/rbac/_modelprovider_editor_role.yaml new file mode 100644 index 0000000..4b0f788 --- /dev/null +++ b/config/rbac/_modelprovider_editor_role.yaml @@ -0,0 +1,31 @@ +# permissions for end users to edit modelproviders. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: modelprovider-editor-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: modelprovider-editor-role +rules: +- apiGroups: + - llmaz.io + resources: + - modelproviders + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - llmaz.io + resources: + - modelproviders/status + verbs: + - get diff --git a/config/rbac/_modelprovider_viewer_role.yaml b/config/rbac/_modelprovider_viewer_role.yaml new file mode 100644 index 0000000..67f90fc --- /dev/null +++ b/config/rbac/_modelprovider_viewer_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to view modelproviders. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: modelprovider-viewer-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: modelprovider-viewer-role +rules: +- apiGroups: + - llmaz.io + resources: + - modelproviders + verbs: + - get + - list + - watch +- apiGroups: + - llmaz.io + resources: + - modelproviders/status + verbs: + - get diff --git a/config/rbac/auth_proxy_client_clusterrole.yaml b/config/rbac/auth_proxy_client_clusterrole.yaml index 2fd2c1c..a6b698e 100644 --- a/config/rbac/auth_proxy_client_clusterrole.yaml +++ b/config/rbac/auth_proxy_client_clusterrole.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: clusterrole app.kubernetes.io/instance: metrics-reader app.kubernetes.io/component: kube-rbac-proxy - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: metrics-reader rules: diff --git a/config/rbac/auth_proxy_role.yaml b/config/rbac/auth_proxy_role.yaml index a43ea4c..3eca335 100644 --- a/config/rbac/auth_proxy_role.yaml +++ b/config/rbac/auth_proxy_role.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: clusterrole app.kubernetes.io/instance: proxy-role app.kubernetes.io/component: kube-rbac-proxy - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: proxy-role rules: diff --git a/config/rbac/auth_proxy_role_binding.yaml b/config/rbac/auth_proxy_role_binding.yaml index bc79daf..53c2976 100644 --- a/config/rbac/auth_proxy_role_binding.yaml +++ b/config/rbac/auth_proxy_role_binding.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: clusterrolebinding app.kubernetes.io/instance: proxy-rolebinding app.kubernetes.io/component: kube-rbac-proxy - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: proxy-rolebinding roleRef: diff --git a/config/rbac/auth_proxy_service.yaml b/config/rbac/auth_proxy_service.yaml index 46f6f74..30bd201 100644 --- a/config/rbac/auth_proxy_service.yaml +++ b/config/rbac/auth_proxy_service.yaml @@ -6,8 +6,8 @@ metadata: app.kubernetes.io/name: service app.kubernetes.io/instance: controller-manager-metrics-service app.kubernetes.io/component: kube-rbac-proxy - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: controller-manager-metrics-service namespace: system diff --git a/config/rbac/inference_playground_editor_role.yaml b/config/rbac/inference_playground_editor_role.yaml new file mode 100644 index 0000000..2638557 --- /dev/null +++ b/config/rbac/inference_playground_editor_role.yaml @@ -0,0 +1,31 @@ +# permissions for end users to edit playgrounds. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: playground-editor-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: playground-editor-role +rules: +- apiGroups: + - inference.llmaz.io + resources: + - playgrounds + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - inference.llmaz.io + resources: + - playgrounds/status + verbs: + - get diff --git a/config/rbac/inference_playground_viewer_role.yaml b/config/rbac/inference_playground_viewer_role.yaml new file mode 100644 index 0000000..6e2237e --- /dev/null +++ b/config/rbac/inference_playground_viewer_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to view playgrounds. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: playground-viewer-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + name: playground-viewer-role +rules: +- apiGroups: + - inference.llmaz.io + resources: + - playgrounds + verbs: + - get + - list + - watch +- apiGroups: + - inference.llmaz.io + resources: + - playgrounds/status + verbs: + - get diff --git a/config/rbac/serve_editor_role.yaml b/config/rbac/inference_service_editor_role.yaml similarity index 55% rename from config/rbac/serve_editor_role.yaml rename to config/rbac/inference_service_editor_role.yaml index cebc4ef..70cbcba 100644 --- a/config/rbac/serve_editor_role.yaml +++ b/config/rbac/inference_service_editor_role.yaml @@ -1,20 +1,20 @@ -# permissions for end users to edit serves. +# permissions for end users to edit services. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: app.kubernetes.io/name: clusterrole - app.kubernetes.io/instance: serve-editor-role + app.kubernetes.io/instance: service-editor-role app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize - name: serve-editor-role + name: service-editor-role rules: - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - serves + - services verbs: - create - delete @@ -24,8 +24,8 @@ rules: - update - watch - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - serves/status + - services/status verbs: - get diff --git a/config/rbac/serve_viewer_role.yaml b/config/rbac/inference_service_viewer_role.yaml similarity index 52% rename from config/rbac/serve_viewer_role.yaml rename to config/rbac/inference_service_viewer_role.yaml index f368d8c..9d08bad 100644 --- a/config/rbac/serve_viewer_role.yaml +++ b/config/rbac/inference_service_viewer_role.yaml @@ -1,27 +1,27 @@ -# permissions for end users to view serves. +# permissions for end users to view services. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: labels: app.kubernetes.io/name: clusterrole - app.kubernetes.io/instance: serve-viewer-role + app.kubernetes.io/instance: service-viewer-role app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize - name: serve-viewer-role + name: service-viewer-role rules: - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - serves + - services verbs: - get - list - watch - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - serves/status + - services/status verbs: - get diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml index 1d231ab..1b4f59e 100644 --- a/config/rbac/leader_election_role.yaml +++ b/config/rbac/leader_election_role.yaml @@ -6,8 +6,8 @@ metadata: app.kubernetes.io/name: role app.kubernetes.io/instance: leader-election-role app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: leader-election-role rules: diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml index 40c0ba5..9e982d1 100644 --- a/config/rbac/leader_election_role_binding.yaml +++ b/config/rbac/leader_election_role_binding.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: rolebinding app.kubernetes.io/instance: leader-election-rolebinding app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: leader-election-rolebinding roleRef: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 2a76a70..4918696 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -5,9 +5,9 @@ metadata: name: manager-role rules: - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - serves + - playgrounds verbs: - create - delete @@ -17,15 +17,67 @@ rules: - update - watch - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - serves/finalizers + - playgrounds/finalizers verbs: - update - apiGroups: - - llmaz.inftyai.io + - inference.llmaz.io resources: - - serves/status + - playgrounds/status + verbs: + - get + - patch + - update +- apiGroups: + - inference.llmaz.io + resources: + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - inference.llmaz.io + resources: + - services/finalizers + verbs: + - update +- apiGroups: + - inference.llmaz.io + resources: + - services/status + verbs: + - get + - patch + - update +- apiGroups: + - llmaz.io + resources: + - modelproviders + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - llmaz.io + resources: + - modelproviders/finalizers + verbs: + - update +- apiGroups: + - llmaz.io + resources: + - modelproviders/status verbs: - get - patch diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml index 6e4138e..6911993 100644 --- a/config/rbac/role_binding.yaml +++ b/config/rbac/role_binding.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: clusterrolebinding app.kubernetes.io/instance: manager-rolebinding app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: manager-rolebinding roleRef: diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml index 3a2627b..09904ae 100644 --- a/config/rbac/service_account.yaml +++ b/config/rbac/service_account.yaml @@ -5,8 +5,8 @@ metadata: app.kubernetes.io/name: serviceaccount app.kubernetes.io/instance: controller-manager-sa app.kubernetes.io/component: rbac - app.kubernetes.io/created-by: llmaz-operator - app.kubernetes.io/part-of: llmaz-operator + app.kubernetes.io/created-by: llmaz + app.kubernetes.io/part-of: llmaz app.kubernetes.io/managed-by: kustomize name: controller-manager namespace: system diff --git a/config/samples/_v1alpha1_modelprovider.yaml b/config/samples/_v1alpha1_modelprovider.yaml new file mode 100644 index 0000000..70b460d --- /dev/null +++ b/config/samples/_v1alpha1_modelprovider.yaml @@ -0,0 +1,12 @@ +apiVersion: llmaz.io/v1alpha1 +kind: ModelProvider +metadata: + labels: + app.kubernetes.io/name: modelprovider + app.kubernetes.io/instance: modelprovider-sample + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: llmaz + name: modelprovider-sample +spec: + # TODO(user): Add fields here diff --git a/config/samples/inference_v1alpha1_playground.yaml b/config/samples/inference_v1alpha1_playground.yaml new file mode 100644 index 0000000..e1eb086 --- /dev/null +++ b/config/samples/inference_v1alpha1_playground.yaml @@ -0,0 +1,12 @@ +apiVersion: inference.llmaz.io/v1alpha1 +kind: Playground +metadata: + labels: + app.kubernetes.io/name: playground + app.kubernetes.io/instance: playground-sample + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: llmaz + name: playground-sample +spec: + # TODO(user): Add fields here diff --git a/config/samples/inference_v1alpha1_service.yaml b/config/samples/inference_v1alpha1_service.yaml new file mode 100644 index 0000000..0d0c053 --- /dev/null +++ b/config/samples/inference_v1alpha1_service.yaml @@ -0,0 +1,12 @@ +apiVersion: inference.llmaz.io/v1alpha1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: service + app.kubernetes.io/instance: service-sample + app.kubernetes.io/part-of: llmaz + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: llmaz + name: service-sample +spec: + # TODO(user): Add fields here diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index c81515e..3c10859 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -1,4 +1,6 @@ ## Append samples of your project ## resources: -- llmaz_v1alpha1_serve.yaml +- inference_v1alpha1_service.yaml +- _v1alpha1_modelprovider.yaml +- inference_v1alpha1_playground.yaml #+kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/llmaz_v1alpha1_serve.yaml b/config/samples/llmaz_v1alpha1_serve.yaml deleted file mode 100644 index f9f405a..0000000 --- a/config/samples/llmaz_v1alpha1_serve.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: llmaz.inftyai.io/v1alpha1 -kind: Serve -metadata: - labels: - app.kubernetes.io/name: serve - app.kubernetes.io/instance: serve-sample - app.kubernetes.io/part-of: llmaz-operator - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/created-by: llmaz-operator - name: serve-sample -spec: - # TODO(user): Add fields here diff --git a/go.mod b/go.mod index bad51f5..c975355 100644 --- a/go.mod +++ b/go.mod @@ -1,16 +1,18 @@ -module inftyai.io/llmaz +module inftyai.com/llmaz -go 1.22 +go 1.22.0 toolchain go1.22.1 require ( - github.com/onsi/ginkgo/v2 v2.16.0 - github.com/onsi/gomega v1.31.1 - k8s.io/apimachinery v0.29.2 - k8s.io/client-go v0.29.2 - sigs.k8s.io/controller-runtime v0.17.2 - sigs.k8s.io/lws v0.1.0 + github.com/onsi/ginkgo/v2 v2.19.0 + github.com/onsi/gomega v1.33.1 + k8s.io/api v0.29.5 + k8s.io/apimachinery v0.29.5 + k8s.io/client-go v0.29.5 + sigs.k8s.io/controller-runtime v0.17.3 + sigs.k8s.io/lws v0.3.0 + ) require ( @@ -25,14 +27,14 @@ require ( github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/swag v0.22.3 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect + github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect github.com/google/uuid v1.3.0 // indirect github.com/imdario/mergo v0.3.12 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -51,24 +53,23 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.26.0 // indirect golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect - golang.org/x/net v0.20.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sys v0.16.0 // indirect - golang.org/x/term v0.16.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.17.0 // indirect + golang.org/x/tools v0.21.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/protobuf v1.31.0 // indirect + google.golang.org/protobuf v1.33.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.29.2 // indirect - k8s.io/apiextensions-apiserver v0.29.2 // indirect - k8s.io/component-base v0.29.2 // indirect - k8s.io/klog/v2 v2.110.1 // indirect - k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect + k8s.io/apiextensions-apiserver v0.29.5 // indirect + k8s.io/component-base v0.29.5 // indirect + k8s.io/klog/v2 v2.120.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect diff --git a/go.sum b/go.sum index f9731c3..ca568f0 100644 --- a/go.sum +++ b/go.sum @@ -2,9 +2,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -17,7 +14,6 @@ github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1 github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= @@ -28,30 +24,27 @@ github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2Kv github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= -github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU= github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -78,10 +71,10 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.16.0 h1:7q1w9frJDzninhXxjZd+Y/x54XNjG/UlRLIYPZafsPM= -github.com/onsi/ginkgo/v2 v2.16.0/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs= -github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= -github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -102,7 +95,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -128,8 +120,8 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= -golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -137,25 +129,24 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= -golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= -golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -164,10 +155,8 @@ gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -180,28 +169,28 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A= -k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0= -k8s.io/apiextensions-apiserver v0.29.2 h1:UK3xB5lOWSnhaCk0RFZ0LUacPZz9RY4wi/yt2Iu+btg= -k8s.io/apiextensions-apiserver v0.29.2/go.mod h1:aLfYjpA5p3OwtqNXQFkhJ56TB+spV8Gc4wfMhUA3/b8= -k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8= -k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= -k8s.io/client-go v0.29.2 h1:FEg85el1TeZp+/vYJM7hkDlSTFZ+c5nnK44DJ4FyoRg= -k8s.io/client-go v0.29.2/go.mod h1:knlvFZE58VpqbQpJNbCbctTVXcd35mMyAAwBdpt4jrA= -k8s.io/component-base v0.29.2 h1:lpiLyuvPA9yV1aQwGLENYyK7n/8t6l3nn3zAtFTJYe8= -k8s.io/component-base v0.29.2/go.mod h1:BfB3SLrefbZXiBfbM+2H1dlat21Uewg/5qtKOl8degM= -k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= -k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= -k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= -k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/api v0.29.5 h1:levS+umUigHCfI3riD36pMY1vQEbrzh4r1ivVWAhHaI= +k8s.io/api v0.29.5/go.mod h1:7b18TtPcJzdjk7w5zWyIHgoAtpGeRvGGASxlS7UZXdQ= +k8s.io/apiextensions-apiserver v0.29.5 h1:njDywexhE6n+1NEl3A4axT0TMQHREnndrk3/ztdWcNE= +k8s.io/apiextensions-apiserver v0.29.5/go.mod h1:pfIvij+MH9a8NQKtW7MD4EFnzvUjJ1ZQsDL8wuP8fnc= +k8s.io/apimachinery v0.29.5 h1:Hofa2BmPfpoT+IyDTlcPdCHSnHtEQMoJYGVoQpRTfv4= +k8s.io/apimachinery v0.29.5/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y= +k8s.io/client-go v0.29.5 h1:nlASXmPQy190qTteaVP31g3c/wi2kycznkTP7Sv1zPc= +k8s.io/client-go v0.29.5/go.mod h1:aY5CnqUUvXYccJhm47XHoPcRyX6vouHdIBHaKZGTbK4= +k8s.io/component-base v0.29.5 h1:Ptj8AzG+p8c2a839XriHwxakDpZH9uvIgYz+o1agjg8= +k8s.io/component-base v0.29.5/go.mod h1:9nBUoPxW/yimISIgAG7sJDrUGJlu7t8HnDafIrOdU8Q= +k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= +k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/controller-runtime v0.17.2 h1:FwHwD1CTUemg0pW2otk7/U5/i5m2ymzvOXdbeGOUvw0= -sigs.k8s.io/controller-runtime v0.17.2/go.mod h1:+MngTvIQQQhfXtwfdGw/UOQ/aIaqsYywfCINOtwMO/s= +sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk= +sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/lws v0.1.0 h1:fcsAHN6BcjwcH5lgjZphVRjFy2Ack550a5CpWUqBuRQ= -sigs.k8s.io/lws v0.1.0/go.mod h1:9wojYpN6WFa6JUWccK0DzNHuYvqa9a/npKKlVENmY1I= +sigs.k8s.io/lws v0.3.0 h1:PtjiDHZWCxAeMyrsmPNN0i7KAVf6ocVEQFcojPWeA+k= +sigs.k8s.io/lws v0.3.0/go.mod h1:/R1Q2LB2eg6t9mX5M6V4HLkeucxBFgOyaKkSGh/FGAY= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt index 65b8622..ff72ff2 100644 --- a/hack/boilerplate.go.txt +++ b/hack/boilerplate.go.txt @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/internal/controller/inference/playground_controller.go b/internal/controller/inference/playground_controller.go new file mode 100644 index 0000000..1b0f6e9 --- /dev/null +++ b/internal/controller/inference/playground_controller.go @@ -0,0 +1,62 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inference + +import ( + "context" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1" +) + +// PlaygroundReconciler reconciles a Playground object +type PlaygroundReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the Playground object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile +func (r *PlaygroundReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + _ = log.FromContext(ctx) + + // TODO(user): your logic here + + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *PlaygroundReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&inferencev1alpha1.Playground{}). + Complete(r) +} diff --git a/internal/controller/inference/service_controller.go b/internal/controller/inference/service_controller.go new file mode 100644 index 0000000..633710b --- /dev/null +++ b/internal/controller/inference/service_controller.go @@ -0,0 +1,62 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inference + +import ( + "context" + + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1" +) + +// ServiceReconciler reconciles a Service object +type ServiceReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the Service object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile +func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + _ = log.FromContext(ctx) + + // TODO(user): your logic here + + return ctrl.Result{}, nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *ServiceReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&inferencev1alpha1.Service{}). + Complete(r) +} diff --git a/internal/controller/inference/suite_test.go b/internal/controller/inference/suite_test.go new file mode 100644 index 0000000..c724a03 --- /dev/null +++ b/internal/controller/inference/suite_test.go @@ -0,0 +1,90 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inference + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1" + //+kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestControllers(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to perform + // the tests directly. When we run make test it will be setup and used automatically. + BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", + fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = inferencev1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + //+kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/controller/serve_controller.go b/internal/controller/modelprovider_controller.go similarity index 70% rename from internal/controller/serve_controller.go rename to internal/controller/modelprovider_controller.go index 9c00668..cedf9ff 100644 --- a/internal/controller/serve_controller.go +++ b/internal/controller/modelprovider_controller.go @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,29 +24,29 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - llmazv1alpha1 "inftyai.io/llmaz/api/v1alpha1" + llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1" ) -// ServeReconciler reconciles a Serve object -type ServeReconciler struct { +// ModelProviderReconciler reconciles a ModelProvider object +type ModelProviderReconciler struct { client.Client Scheme *runtime.Scheme } -//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=serves,verbs=get;list;watch;create;update;patch;delete -//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=serves/status,verbs=get;update;patch -//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=serves/finalizers,verbs=update +//+kubebuilder:rbac:groups=llmaz.io,resources=modelproviders,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=llmaz.io,resources=modelproviders/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=llmaz.io,resources=modelproviders/finalizers,verbs=update // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. // TODO(user): Modify the Reconcile function to compare the state specified by -// the Serve object against the actual cluster state, and then +// the ModelProvider object against the actual cluster state, and then // perform operations to make the cluster state reflect the state specified by // the user. // // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile -func (r *ServeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *ModelProviderReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = log.FromContext(ctx) // TODO(user): your logic here @@ -55,8 +55,8 @@ func (r *ServeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl } // SetupWithManager sets up the controller with the Manager. -func (r *ServeReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *ModelProviderReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&llmazv1alpha1.Serve{}). + For(&llmaziov1alpha1.ModelProvider{}). Complete(r) } diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 6331b40..f8d050b 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -1,5 +1,5 @@ /* -Copyright 2023. +Copyright 2024. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - llmazv1alpha1 "inftyai.io/llmaz/api/v1alpha1" + llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1" //+kubebuilder:scaffold:imports ) @@ -72,7 +72,7 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - err = llmazv1alpha1.AddToScheme(scheme.Scheme) + err = llmaziov1alpha1.AddToScheme(scheme.Scheme) Expect(err).NotTo(HaveOccurred()) //+kubebuilder:scaffold:scheme