Merge pull request #11 from kerthcet/feat/api-defination

Add webhook to Model
InftyAI · Jul 15, 2024 · 94a85fe · 94a85fe
2 parents 9add086 + 5e703f5
commit 94a85fe
Show file tree

Hide file tree

Showing 42 changed files with 1,051 additions and 2,067 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,8 @@
+ARG BASE_IMAGE
+ARG BUILDER_IMAGE
+
 # Build the manager binary
-FROM golang:1.20 as builder
+FROM ${BUILDER_IMAGE} as builder
 ARG TARGETOS
 ARG TARGETARCH
 
@@ -14,7 +17,7 @@ RUN go mod download
 # Copy the go source
 COPY cmd/main.go cmd/main.go
 COPY api/ api/
-COPY internal/controller/ internal/controller/
+COPY internal/ internal/
 
 # Build
 # the GOARCH has not a default value to allow the binary be built according to the host where the command
@@ -25,7 +28,7 @@ RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o ma
 
 # Use distroless as minimal base image to package the manager binary
 # Refer to https://github.com/GoogleContainerTools/distroless for more details
-FROM gcr.io/distroless/static:nonroot
+FROM ${BASE_IMAGE}
 WORKDIR /
 COPY --from=builder /workspace/manager .
 USER 65532:65532

diff --git a/Makefile b/Makefile
@@ -1,8 +1,6 @@
 
-# Image URL to use all building/pushing image targets
-IMG ?= controller:latest
 # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
-ENVTEST_K8S_VERSION = 1.28.0
+ENVTEST_K8S_VERSION = 1.28.3
 
 # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
 ifeq (,$(shell go env GOBIN))
@@ -42,11 +40,39 @@ all: build
 help: ## Display this help.
 	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
 
+PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
+ARTIFACTS ?= $(PROJECT_DIR)/bin
+GINKGO_VERSION ?= $(shell go list -m -f '{{.Version}}' github.com/onsi/ginkgo/v2)
+GO_VERSION := $(shell awk '/^go /{print $$2}' go.mod|head -n1)
+
+GINKGO = $(shell pwd)/bin/ginkgo
+.PHONY: ginkgo
+ginkgo: ## Download ginkgo locally if necessary.
+	test -s $(LOCALBIN)/ginkgo || \
+	GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/v2/ginkgo@$(GINKGO_VERSION)
+
+INTEGRATION_TARGET ?= ./test/integration/...
+
+BASE_IMAGE ?= gcr.io/distroless/static:nonroot
+DOCKER_BUILDX_CMD ?= docker buildx
+IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build
+IMAGE_BUILD_EXTRA_OPTS ?=
+IMAGE_REGISTRY ?= docker.io/inftyai
+IMAGE_NAME ?= llmaz
+IMAGE_REPO := $(IMAGE_REGISTRY)/$(IMAGE_NAME)
+GIT_TAG ?= $(shell git describe --tags --dirty --always)
+IMG ?= $(IMAGE_REPO):$(GIT_TAG)
+BUILDER_IMAGE ?= golang:$(GO_VERSION)
+
 ##@ Development
 
 .PHONY: manifests
 manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
-	$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
+	$(CONTROLLER_GEN) \
+		rbac:roleName=manager-role output:rbac:artifacts:config=config/rbac \
+		crd:generateEmbeddedObjectMeta=true output:crd:artifacts:config=config/crd/bases \
+		webhook output:webhook:artifacts:config=config/webhook \
+		paths="./..."
 
 .PHONY: generate
 generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
@@ -64,6 +90,11 @@ vet: ## Run go vet against code.
 test: manifests generate fmt vet envtest ## Run tests.
 	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./... -coverprofile cover.out
 
+.PHONY: test-integration
+test-integration: manifests fmt vet envtest ginkgo ## Run integration tests.
+	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
+	$(GINKGO) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET)
+
 GOLANGCI_LINT = $(shell pwd)/bin/golangci-lint
 GOLANGCI_LINT_VERSION ?= v1.54.2
 golangci-lint:
@@ -90,17 +121,6 @@ build: manifests generate fmt vet ## Build manager binary.
 run: manifests generate fmt vet ## Run a controller from your host.
 	go run ./cmd/main.go
 
-# If you wish to build the manager image targeting other platforms you can use the --platform flag.
-# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it.
-# More info: https://docs.docker.com/develop/develop-images/build_enhancements/
-.PHONY: docker-build
-docker-build: ## Build docker image with the manager.
-	$(CONTAINER_TOOL) build -t ${IMG} .
-
-.PHONY: docker-push
-docker-push: ## Push docker image with the manager.
-	$(CONTAINER_TOOL) push ${IMG}
-
 # PLATFORMS defines the target platforms for the manager image be built to provide support to multiple
 # architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
 # - be able to use docker buildx. More info: https://docs.docker.com/build/buildx/
@@ -118,6 +138,19 @@ docker-buildx: ## Build and push docker image for the manager for cross-platform
 	- $(CONTAINER_TOOL) buildx rm project-v3-builder
 	rm Dockerfile.cross
 
+.PHONY: image-build
+image-build:
+	$(IMAGE_BUILD_CMD) -t $(IMG) \
+		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
+		--build-arg BUILDER_IMAGE=$(BUILDER_IMAGE) \
+		--build-arg CGO_ENABLED=$(CGO_ENABLED) \
+		$(PUSH) \
+		$(IMAGE_BUILD_EXTRA_OPTS) ./
+
+.PHONY: image-push
+image-push: PUSH=--push
+image-push: image-build
+
 ##@ Deployment
 
 ifndef ignore-not-found
@@ -135,7 +168,7 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified
 .PHONY: deploy
 deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
 	cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
-	$(KUSTOMIZE) build config/default | $(KUBECTL) apply -f -
+	$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f -
 
 .PHONY: undeploy
 undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.

diff --git a/PROJECT b/PROJECT
@@ -35,4 +35,8 @@ resources:
   kind: Model
   path: inftyai.com/llmaz/api/v1alpha1
   version: v1alpha1
+  webhooks:
+    defaulting: true
+    validation: true
+    webhookVersion: v1
 version: "3"
diff --git a/README.md b/README.md
@@ -7,13 +7,13 @@
 [GoReport Widget]: https://goreportcard.com/badge/github.com/inftyai/llmaz
 [GoReport Status]: https://goreportcard.com/report/github.com/inftyai/llmaz
 
-llmaz, pronounced as `/lima:z/`, aims to provide a production-ready inference platform for various LLMs on Kubernetes. It tightly integrates with state-of-the-art inference backends, such as [vLLM](https://github.com/vllm-project/vllm).
+llmaz, pronounced as `/lima:z/`, aims to provide a production-ready inference platform for large language models on Kubernetes. It tightly integrates with state-of-the-art inference backends, such as [vLLM](https://github.com/vllm-project/vllm).
 
 ## Concept
 
 ![image](./docs/assets/overview.png)
 
-## Feature Overview
+## Features Overview
 
 - **Easy to use**: People can deploy a production-ready LLM service with minimal configurations.
 - **High performance**: llmaz integrates with vLLM by default for high performance inference. Other backend supports are on the way.

diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go
@@ -38,7 +38,7 @@ type PlaygroundSpec struct {
 	// technology called splitwise, the workload template is shared by both.
 	// ModelClaim and multiModelsClaims are exclusive configured.
 	// +kubebuilder:validation:MinItems=1
-	MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims"`
+	MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims,omitempty"`
 	// BackendConfig represents the inference backend configuration
 	// under the hood, e.g. vLLM, which is the default backend.
 	// +optional

diff --git a/api/inference/v1alpha1/service_types.go b/api/inference/v1alpha1/service_types.go
@@ -34,7 +34,7 @@ type ServiceSpec struct {
 	// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
 	// will be applied to the workload if not exist.
 	// +kubebuilder:validation:MinItems=1
-	MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims"`
+	MultiModelsClaims []api.MultiModelsClaim `json:"multiModelsClaims,omitempty"`
 	// WorkloadTemplate defines the underlying workload layout and configuration.
 	// Note: the LWS spec might be twisted to support different technologies
 	// like splitwise and accelerator fungibility and several LWSs will be created.

diff --git a/api/v1alpha1/model_types.go b/api/v1alpha1/model_types.go
@@ -21,21 +21,35 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
+const (
+	ModelFamilyNameLabelKey = "llmaz.io/model-family-name"
+)
+
 // DataSource represents where to load the model.
 // Only one data source will be used.
 type DataSource struct {
-	// URL represents the URL link than contains the data sources.
-	// +optional
-	URL *string `json:"url,omitempty"`
-	// The mounted volume that contains the data.
-	// +optional
-	Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
-	// Image represents the the image address that contains the source data.
-	// +optional
-	Image *string `json:"image,omitempty"`
-	// ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image.
+	// ModelID refers to the model identifier on model hub,
+	// such as meta-llama/Meta-Llama-3-8B.
+	ModelID *string `json:"modelID,omitempty"`
+	// ModelHub refers to the model registry, such as huggingface.
+	// +kubebuilder:default=Huggingface
+	// +kubebuilder:validation:Enum={Huggingface,ModelScope}
 	// +optional
-	ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
+	ModelHub *string `json:"modelHub,omitempty"`
+
+	// TODO: support all these sources.
+	// // URL represents the URL link than contains the data sources.
+	// // +optional
+	// URL *string `json:"url,omitempty"`
+	// // The mounted volume that contains the data.
+	// // +optional
+	// Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
+	// // Image represents the the image address that contains the source data.
+	// // +optional
+	// Image *string `json:"image,omitempty"`
+	// // ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image.
+	// // +optional
+	// ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
 }
 
 type FlavorName string
@@ -134,6 +148,7 @@ type ModelStatus struct {
 
 //+kubebuilder:object:root=true
 //+kubebuilder:subresource:status
+//+kubebuilder:resource:scope=Cluster
 
 // Model is the Schema for the models API
 type Model struct {

diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/cmd/main.go b/cmd/main.go
@@ -34,8 +34,10 @@ import (
 
 	inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1"
 	llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1"
+	"inftyai.com/llmaz/internal/cert"
 	"inftyai.com/llmaz/internal/controller"
 	inferencecontroller "inftyai.com/llmaz/internal/controller/inference"
+	"inftyai.com/llmaz/internal/webhook"
 	//+kubebuilder:scaffold:imports
 )
 
@@ -92,41 +94,68 @@ func main() {
 		os.Exit(1)
 	}
 
-	if err = (&inferencecontroller.ServiceReconciler{
+	certsReady := make(chan struct{})
+
+	if err = cert.CertsManager(mgr, certsReady); err != nil {
+		setupLog.Error(err, "unable to setup cert rotation")
+		os.Exit(1)
+	}
+
+	// Cert won't be ready until manager starts, so start a goroutine here which
+	// will block until the cert is ready before setting up the controllers.
+	// Controllers who register after manager starts will start directly.
+	go setupControllers(mgr, certsReady)
+
+	//+kubebuilder:scaffold:builder
+
+	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
+		setupLog.Error(err, "unable to set up health check")
+		os.Exit(1)
+	}
+	if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
+		setupLog.Error(err, "unable to set up ready check")
+		os.Exit(1)
+	}
+
+	setupLog.Info("starting manager")
+	if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
+		setupLog.Error(err, "problem running manager")
+		os.Exit(1)
+	}
+}
+
+func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
+	// The controllers won't work until the webhooks are operating,
+	// and the webhook won't work until the certs are all in places.
+	setupLog.Info("waiting for the cert generation to complete")
+	<-certsReady
+	setupLog.Info("certs ready")
+
+	if err := (&inferencecontroller.ServiceReconciler{
 		Client: mgr.GetClient(),
 		Scheme: mgr.GetScheme(),
 	}).SetupWithManager(mgr); err != nil {
 		setupLog.Error(err, "unable to create controller", "controller", "Service")
 		os.Exit(1)
 	}
-	if err = (&inferencecontroller.PlaygroundReconciler{
+	if err := (&inferencecontroller.PlaygroundReconciler{
 		Client: mgr.GetClient(),
 		Scheme: mgr.GetScheme(),
 	}).SetupWithManager(mgr); err != nil {
 		setupLog.Error(err, "unable to create controller", "controller", "Playground")
 		os.Exit(1)
 	}
-	if err = (&controller.ModelReconciler{
+	if err := (&controller.ModelReconciler{
 		Client: mgr.GetClient(),
 		Scheme: mgr.GetScheme(),
 	}).SetupWithManager(mgr); err != nil {
 		setupLog.Error(err, "unable to create controller", "controller", "Model")
 		os.Exit(1)
 	}
-	//+kubebuilder:scaffold:builder
-
-	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
-		setupLog.Error(err, "unable to set up health check")
-		os.Exit(1)
-	}
-	if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
-		setupLog.Error(err, "unable to set up ready check")
-		os.Exit(1)
-	}
-
-	setupLog.Info("starting manager")
-	if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
-		setupLog.Error(err, "problem running manager")
-		os.Exit(1)
+	if os.Getenv("ENABLE_WEBHOOKS") != "false" {
+		if err := webhook.SetupModelWebhook(mgr); err != nil {
+			setupLog.Error(err, "unable to create webhook", "webhook", "Model")
+			os.Exit(1)
+		}
 	}
 }