-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[1/N] Add support for per model deployment
Signed-off-by: kerthcet <[email protected]>
- Loading branch information
Showing
107 changed files
with
6,422 additions
and
344 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
name: init | ||
name: workflow-as-kube-init | ||
|
||
on: | ||
workflow_dispatch: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,8 @@ | ||
# We use 1.30.0 here because there's a bug about invalid defaults of creationTimestamp. | ||
# See https://github.com/kubernetes/kubernetes/pull/120757 for more details. | ||
# FIXME: But seems not related, will revisit this later. | ||
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. | ||
ENVTEST_K8S_VERSION = 1.28.3 | ||
ENVTEST_K8S_VERSION = 1.30.0 | ||
|
||
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) | ||
ifeq (,$(shell go env GOBIN)) | ||
|
@@ -43,6 +46,8 @@ PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST)))) | |
ARTIFACTS ?= $(PROJECT_DIR)/bin | ||
GINKGO_VERSION ?= $(shell go list -m -f '{{.Version}}' github.com/onsi/ginkgo/v2) | ||
GO_VERSION := $(shell awk '/^go /{print $$2}' go.mod|head -n1) | ||
E2E_KIND_VERSION ?= kindest/node:v1.30.0 | ||
USE_EXISTING_CLUSTER ?= false | ||
|
||
GINKGO = $(shell pwd)/bin/ginkgo | ||
.PHONY: ginkgo | ||
|
@@ -56,12 +61,13 @@ BASE_IMAGE ?= gcr.io/distroless/static:nonroot | |
DOCKER_BUILDX_CMD ?= docker buildx | ||
IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build | ||
IMAGE_BUILD_EXTRA_OPTS ?= | ||
IMAGE_REGISTRY ?= docker.io/inftyai | ||
IMAGE_REGISTRY ?= inftyai | ||
IMAGE_NAME ?= llmaz | ||
IMAGE_REPO := $(IMAGE_REGISTRY)/$(IMAGE_NAME) | ||
GIT_TAG ?= $(shell git describe --tags --dirty --always) | ||
IMG ?= $(IMAGE_REPO):$(GIT_TAG) | ||
BUILDER_IMAGE ?= golang:$(GO_VERSION) | ||
KIND_CLUSTER_NAME ?= kind | ||
|
||
##@ Development | ||
|
||
|
@@ -74,9 +80,28 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust | |
paths="./..." | ||
|
||
.PHONY: generate | ||
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. | ||
generate: controller-gen code-generator ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. | ||
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." | ||
|
||
# This is a fixed bug in v1.31, will remove this command in the future. | ||
# Now, we have to modify the files ourself each time regenerate the client-go codes. | ||
# Generally replace "inftyai.com/llmaz/api/core/v1alpha1" with "inftyai.com/llmaz/api/v1alpha1" | ||
# See https://github.com/kubernetes/kubernetes/pull/125162 | ||
.PHONY: generate-client-go | ||
generate-client-go: code-generator | ||
./hack/update-codegen.sh go $(PROJECT_DIR)/bin | ||
|
||
# Use same code-generator version as k8s.io/api | ||
CODEGEN_VERSION := $(shell go list -m -f '{{.Version}}' k8s.io/api) | ||
CODEGEN = $(shell pwd)/bin/code-generator | ||
CODEGEN_ROOT = $(shell go env GOMODCACHE)/k8s.io/code-generator@$(CODEGEN_VERSION) | ||
.PHONY: code-generator | ||
code-generator: | ||
@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install k8s.io/code-generator/cmd/client-gen@$(CODEGEN_VERSION) | ||
cp -f $(CODEGEN_ROOT)/generate-groups.sh $(PROJECT_DIR)/bin/ | ||
cp -f $(CODEGEN_ROOT)/generate-internal-groups.sh $(PROJECT_DIR)/bin/ | ||
cp -f $(CODEGEN_ROOT)/kube_codegen.sh $(PROJECT_DIR)/bin/ | ||
|
||
.PHONY: fmt | ||
fmt: ## Run go fmt against code. | ||
go fmt ./... | ||
|
@@ -94,6 +119,11 @@ test-integration: manifests fmt vet envtest ginkgo ## Run integration tests. | |
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \ | ||
$(GINKGO) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET) | ||
|
||
.PHONY: test-e2e | ||
# FIXME: we should install lws CRD. | ||
test-e2e: kustomize manifests fmt vet envtest ginkgo kind-image-build | ||
E2E_KIND_VERSION=$(E2E_KIND_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ./hack/e2e-test.sh | ||
|
||
GOLANGCI_LINT = $(shell pwd)/bin/golangci-lint | ||
GOLANGCI_LINT_VERSION ?= v1.54.2 | ||
golangci-lint: | ||
|
@@ -150,6 +180,16 @@ image-build: | |
image-push: PUSH=--push | ||
image-push: image-build | ||
|
||
KIND = $(shell pwd)/bin/kind | ||
.PHONY: kind | ||
kind: | ||
@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install sigs.k8s.io/[email protected] | ||
|
||
.PHONY: kind-image-build | ||
kind-image-build: PLATFORMS=linux/amd64 | ||
kind-image-build: kind image-build | ||
kind load docker-image $(IMG) | ||
|
||
##@ Deployment | ||
|
||
ifndef ignore-not-found | ||
|
@@ -169,6 +209,12 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in | |
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} | ||
$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f - | ||
|
||
# This is only used in local development with kind. | ||
.PHONY: quick-deploy | ||
quick-deploy: manifests kustomize kind-image-build ## Deploy controller to the K8s cluster specified in ~/.kube/config. | ||
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} | ||
$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f - | ||
|
||
.PHONY: undeploy | ||
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. | ||
$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
Copyright 2024. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
// +kubebuilder:object:generate=true | ||
// +groupName=llmaz.io | ||
|
||
package v1alpha1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/* | ||
Copyright 2024. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
/* | ||
This file is needed for kubernetes/code-generator/kube_codegen.sh script used in hack/update-codegen.sh. | ||
*/ | ||
|
||
package v1alpha1 | ||
|
||
//+genclient |
6 changes: 3 additions & 3 deletions
6
api/v1alpha1/zz_generated.deepcopy.go → api/core/v1alpha1/zz_generated.deepcopy.go
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
Copyright 2024. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package v1alpha1 | ||
|
||
import corev1 "k8s.io/api/core/v1" | ||
|
||
type BackendName string | ||
|
||
const ( | ||
DefaultBackend BackendName = "vllm" | ||
VLLM BackendName = "vllm" | ||
) | ||
|
||
type BackendConfig struct { | ||
// Name represents the inference backend under the hood, e.g. vLLM. | ||
// +kubebuilder:validation:Enum={vllm} | ||
// +kubebuilder:default=vllm | ||
// +optional | ||
Name *BackendName `json:"name,omitempty"` | ||
// Version represents the backend version if you want a different one | ||
// from the default version. | ||
// +optional | ||
Version *string `json:"version,omitempty"` | ||
// Args represents the arguments passed to the backend. | ||
// +optional | ||
Args []string `json:"args,omitempty"` | ||
// Envs represents the environments set to the container. | ||
// +optional | ||
Envs []corev1.EnvVar `json:"envs,omitempty"` | ||
// Resources represents the resource requirements for backend, like cpu/mem, | ||
// accelerators like GPU should not be defined here, but at the Model flavors, | ||
// or the same accelerator requirements defined there will be covered and | ||
// the workload will lose the fungibility capacity. | ||
Resources *ResourceRequirements `json:"resources,omitempty"` | ||
} | ||
|
||
// TODO: Do not support DRA yet, we can support that once needed. | ||
type ResourceRequirements struct { | ||
// Limits describes the maximum amount of compute resources allowed. | ||
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ | ||
// +optional | ||
Limits corev1.ResourceList `json:"limits,omitempty"` | ||
// Requests describes the minimum amount of compute resources required. | ||
// If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, | ||
// otherwise to an implementation-defined value. Requests cannot exceed Limits. | ||
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ | ||
// +optional | ||
Requests corev1.ResourceList `json:"requests,omitempty"` | ||
} | ||
|
||
type ElasticConfig struct { | ||
// MinReplicas indicates the minimum number of inference workloads based on the traffic. | ||
// Default to nil means we can scale down the instances to 1. | ||
// +kubebuilder:default=1 | ||
// +optional | ||
MinReplicas *int32 `json:"minReplicas,omitempty"` | ||
// MaxReplicas indicates the maximum number of inference workloads based on the traffic. | ||
// Default to nil means there's no limit for the instance number. | ||
// +optional | ||
MaxReplicas *int32 `json:"maxReplicas,omitempty"` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
Copyright 2024. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
// +kubebuilder:object:generate=true | ||
// +groupName=inference.llmaz.io | ||
|
||
package v1alpha1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.