[1/N] Add support for per model deployment

Signed-off-by: kerthcet <[email protected]>
InftyAI · Jul 17, 2024 · dea9a2e · dea9a2e
1 parent 01091cd
commit dea9a2e
Show file tree

Hide file tree

Showing 107 changed files with 6,422 additions and 344 deletions.
diff --git a/.github/workflows/kube-workflow-init.yaml b/.github/workflows/kube-workflow-init.yaml
@@ -1,4 +1,4 @@
-name: init
+name: workflow-as-kube-init
 
 on:
   workflow_dispatch:

diff --git a/Dockerfile b/Dockerfile
@@ -17,7 +17,8 @@ RUN go mod download
 # Copy the go source
 COPY cmd/main.go cmd/main.go
 COPY api/ api/
-COPY internal/ internal/
+COPY pkg/ pkg/
+COPY client-go/ client-go/
 
 # Build
 # the GOARCH has not a default value to allow the binary be built according to the host where the command

diff --git a/Makefile b/Makefile
@@ -1,5 +1,8 @@
+# We use 1.30.0 here because there's a bug about invalid defaults of creationTimestamp.
+# See https://github.com/kubernetes/kubernetes/pull/120757 for more details.
+# FIXME: But seems not related, will revisit this later.
 # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
-ENVTEST_K8S_VERSION = 1.28.3
+ENVTEST_K8S_VERSION = 1.30.0
 
 # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
 ifeq (,$(shell go env GOBIN))
@@ -43,6 +46,8 @@ PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
 ARTIFACTS ?= $(PROJECT_DIR)/bin
 GINKGO_VERSION ?= $(shell go list -m -f '{{.Version}}' github.com/onsi/ginkgo/v2)
 GO_VERSION := $(shell awk '/^go /{print $$2}' go.mod|head -n1)
+E2E_KIND_VERSION ?= kindest/node:v1.30.0
+USE_EXISTING_CLUSTER ?= false
 
 GINKGO = $(shell pwd)/bin/ginkgo
 .PHONY: ginkgo
@@ -56,12 +61,13 @@ BASE_IMAGE ?= gcr.io/distroless/static:nonroot
 DOCKER_BUILDX_CMD ?= docker buildx
 IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build
 IMAGE_BUILD_EXTRA_OPTS ?=
-IMAGE_REGISTRY ?= docker.io/inftyai
+IMAGE_REGISTRY ?= inftyai
 IMAGE_NAME ?= llmaz
 IMAGE_REPO := $(IMAGE_REGISTRY)/$(IMAGE_NAME)
 GIT_TAG ?= $(shell git describe --tags --dirty --always)
 IMG ?= $(IMAGE_REPO):$(GIT_TAG)
 BUILDER_IMAGE ?= golang:$(GO_VERSION)
+KIND_CLUSTER_NAME ?= kind
 
 ##@ Development
 
@@ -74,9 +80,28 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
 		paths="./..."
 
 .PHONY: generate
-generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
+generate: controller-gen code-generator ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
 	$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
 
+# This is a fixed bug in v1.31, will remove this command in the future.
+# Now, we have to modify the files ourself each time regenerate the client-go codes.
+# Generally replace "inftyai.com/llmaz/api/core/v1alpha1" with "inftyai.com/llmaz/api/v1alpha1"
+# See https://github.com/kubernetes/kubernetes/pull/125162
+.PHONY: generate-client-go
+generate-client-go: code-generator
+	./hack/update-codegen.sh go $(PROJECT_DIR)/bin
+
+# Use same code-generator version as k8s.io/api
+CODEGEN_VERSION := $(shell go list -m -f '{{.Version}}' k8s.io/api)
+CODEGEN = $(shell pwd)/bin/code-generator
+CODEGEN_ROOT = $(shell go env GOMODCACHE)/k8s.io/code-generator@$(CODEGEN_VERSION)
+.PHONY: code-generator
+code-generator:
+	@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install k8s.io/code-generator/cmd/client-gen@$(CODEGEN_VERSION)
+	cp -f $(CODEGEN_ROOT)/generate-groups.sh $(PROJECT_DIR)/bin/
+	cp -f $(CODEGEN_ROOT)/generate-internal-groups.sh $(PROJECT_DIR)/bin/
+	cp -f $(CODEGEN_ROOT)/kube_codegen.sh $(PROJECT_DIR)/bin/
+
 .PHONY: fmt
 fmt: ## Run go fmt against code.
 	go fmt ./...
@@ -94,6 +119,11 @@ test-integration: manifests fmt vet envtest ginkgo ## Run integration tests.
 	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
 	$(GINKGO) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET)
 
+.PHONY: test-e2e
+# FIXME: we should install lws CRD.
+test-e2e: kustomize manifests fmt vet envtest ginkgo kind-image-build
+	E2E_KIND_VERSION=$(E2E_KIND_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ./hack/e2e-test.sh
+
 GOLANGCI_LINT = $(shell pwd)/bin/golangci-lint
 GOLANGCI_LINT_VERSION ?= v1.54.2
 golangci-lint:
@@ -150,6 +180,16 @@ image-build:
 image-push: PUSH=--push
 image-push: image-build
 
+KIND = $(shell pwd)/bin/kind
+.PHONY: kind
+kind:
+	@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install sigs.k8s.io/[email protected]
+
+.PHONY: kind-image-build
+kind-image-build: PLATFORMS=linux/amd64
+kind-image-build: kind image-build
+	kind load docker-image $(IMG)
+
 ##@ Deployment
 
 ifndef ignore-not-found
@@ -169,6 +209,12 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in
 	cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
 	$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f -
 
+# This is only used in local development with kind.
+.PHONY: quick-deploy
+quick-deploy: manifests kustomize kind-image-build ## Deploy controller to the K8s cluster specified in ~/.kube/config.
+	cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
+	$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f -
+
 .PHONY: undeploy
 undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
 	$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -

diff --git a/api/core/v1alpha1/doc.go b/api/core/v1alpha1/doc.go
@@ -0,0 +1,20 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// +kubebuilder:object:generate=true
+// +groupName=llmaz.io
+
+package v1alpha1
diff --git a/api/v1alpha1/groupversion_info.go → api/core/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go → api/core/v1alpha1/groupversion_info.go
@@ -28,9 +28,18 @@ var (
 	// GroupVersion is group version used to register these objects
 	GroupVersion = schema.GroupVersion{Group: "llmaz.io", Version: "v1alpha1"}
 
+	// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
+	// It is required by pkg/client/informers/externalversions/...
+	SchemeGroupVersion = GroupVersion
+
 	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
 	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
 
 	// AddToScheme adds the types in this group-version to the given scheme.
 	AddToScheme = SchemeBuilder.AddToScheme
 )
+
+// Resource is required by pkg/client/listers/...
+func Resource(resource string) schema.GroupResource {
+	return GroupVersion.WithResource(resource).GroupResource()
+}
diff --git a/api/v1alpha1/model_types.go → api/core/v1alpha1/model_types.go b/api/v1alpha1/model_types.go → api/core/v1alpha1/model_types.go
@@ -23,6 +23,7 @@ import (
 
 const (
 	ModelFamilyNameLabelKey = "llmaz.io/model-family-name"
+	ModelNameLabelKey       = "llmaz.io/model-name"
 )
 
 // DataSource represents where to load the model.
@@ -75,7 +76,7 @@ type Flavor struct {
 	// cloud-provider.com/accelerator: nvidia-a100.
 	// NodeSelector will be auto injected to the Pods as scheduling primitives.
 	// +optional
-	NodeSelector []v1.NodeSelector `json:"nodeSelector,omitempty"`
+	NodeSelector map[string]string `json:"nodeSelector,omitempty"`
 	// Params stores other useful parameters and will be consumed by the autoscaling components
 	// like cluster-autoscaler, Karpenter.
 	// E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with
@@ -146,6 +147,7 @@ type ModelStatus struct {
 	Conditions []metav1.Condition `json:"conditions,omitempty"`
 }
 
+//+genclient
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
 //+kubebuilder:resource:scope=Cluster

diff --git a/api/core/v1alpha1/types.go b/api/core/v1alpha1/types.go
@@ -0,0 +1,23 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+/*
+This file is needed for kubernetes/code-generator/kube_codegen.sh script used in hack/update-codegen.sh.
+*/
+
+package v1alpha1
+
+//+genclient
diff --git a/api/v1alpha1/zz_generated.deepcopy.go → api/core/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go → api/core/v1alpha1/zz_generated.deepcopy.go
diff --git a/api/inference/v1alpha1/config_types.go b/api/inference/v1alpha1/config_types.go
@@ -0,0 +1,75 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import corev1 "k8s.io/api/core/v1"
+
+type BackendName string
+
+const (
+	DefaultBackend BackendName = "vllm"
+	VLLM           BackendName = "vllm"
+)
+
+type BackendConfig struct {
+	// Name represents the inference backend under the hood, e.g. vLLM.
+	// +kubebuilder:validation:Enum={vllm}
+	// +kubebuilder:default=vllm
+	// +optional
+	Name *BackendName `json:"name,omitempty"`
+	// Version represents the backend version if you want a different one
+	// from the default version.
+	// +optional
+	Version *string `json:"version,omitempty"`
+	// Args represents the arguments passed to the backend.
+	// +optional
+	Args []string `json:"args,omitempty"`
+	// Envs represents the environments set to the container.
+	// +optional
+	Envs []corev1.EnvVar `json:"envs,omitempty"`
+	// Resources represents the resource requirements for backend, like cpu/mem,
+	// accelerators like GPU should not be defined here, but at the Model flavors,
+	// or the same accelerator requirements defined there will be covered and
+	// the workload will lose the fungibility capacity.
+	Resources *ResourceRequirements `json:"resources,omitempty"`
+}
+
+// TODO: Do not support DRA yet, we can support that once needed.
+type ResourceRequirements struct {
+	// Limits describes the maximum amount of compute resources allowed.
+	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+	// +optional
+	Limits corev1.ResourceList `json:"limits,omitempty"`
+	// Requests describes the minimum amount of compute resources required.
+	// If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+	// otherwise to an implementation-defined value. Requests cannot exceed Limits.
+	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+	// +optional
+	Requests corev1.ResourceList `json:"requests,omitempty"`
+}
+
+type ElasticConfig struct {
+	// MinReplicas indicates the minimum number of inference workloads based on the traffic.
+	// Default to nil means we can scale down the instances to 1.
+	// +kubebuilder:default=1
+	// +optional
+	MinReplicas *int32 `json:"minReplicas,omitempty"`
+	// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
+	// Default to nil means there's no limit for the instance number.
+	// +optional
+	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
+}
diff --git a/api/inference/v1alpha1/doc.go b/api/inference/v1alpha1/doc.go
@@ -0,0 +1,20 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// +kubebuilder:object:generate=true
+// +groupName=inference.llmaz.io
+
+package v1alpha1
diff --git a/api/inference/v1alpha1/groupversion_info.go b/api/inference/v1alpha1/groupversion_info.go
@@ -28,9 +28,18 @@ var (
 	// GroupVersion is group version used to register these objects
 	GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"}
 
+	// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
+	// It is required by pkg/client/informers/externalversions/...
+	SchemeGroupVersion = GroupVersion
+
 	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
 	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
 
 	// AddToScheme adds the types in this group-version to the given scheme.
 	AddToScheme = SchemeBuilder.AddToScheme
 )
+
+// Resource is required by pkg/client/listers/...
+func Resource(resource string) schema.GroupResource {
+	return GroupVersion.WithResource(resource).GroupResource()
+}
diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go
@@ -17,8 +17,9 @@ limitations under the License.
 package v1alpha1
 
 import (
-	api "inftyai.com/llmaz/api/v1alpha1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	api "inftyai.com/llmaz/api/core/v1alpha1"
 )
 
 // PlaygroundSpec defines the desired state of Playground
@@ -50,12 +51,21 @@ type PlaygroundSpec struct {
 	ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
 }
 
+const (
+	// PlaygroundProgressing means the Playground is progressing now, such as waiting for the
+	// inference service creation, rolling update or scaling up and down.
+	PlaygroundProgressing = "Progressing"
+	// PlaygroundAvailable indicates the corresponding inference service is available now.
+	PlaygroundAvailable string = "Available"
+)
+
 // PlaygroundStatus defines the observed state of Playground
 type PlaygroundStatus struct {
 	// Conditions represents the Inference condition.
 	Conditions []metav1.Condition `json:"conditions,omitempty"`
 }
 
+//+genclient
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status