Skip to content

Commit

Permalink
[1/N] Add support for per model deployment
Browse files Browse the repository at this point in the history
Signed-off-by: kerthcet <[email protected]>
  • Loading branch information
kerthcet committed Jul 17, 2024
1 parent 01091cd commit dea9a2e
Show file tree
Hide file tree
Showing 107 changed files with 6,422 additions and 344 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/kube-workflow-init.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: init
name: workflow-as-kube-init

on:
workflow_dispatch:
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ RUN go mod download
# Copy the go source
COPY cmd/main.go cmd/main.go
COPY api/ api/
COPY internal/ internal/
COPY pkg/ pkg/
COPY client-go/ client-go/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
52 changes: 49 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# We use 1.30.0 here because there's a bug about invalid defaults of creationTimestamp.
# See https://github.com/kubernetes/kubernetes/pull/120757 for more details.
# FIXME: But seems not related, will revisit this later.
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.28.3
ENVTEST_K8S_VERSION = 1.30.0

# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN))
Expand Down Expand Up @@ -43,6 +46,8 @@ PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
ARTIFACTS ?= $(PROJECT_DIR)/bin
GINKGO_VERSION ?= $(shell go list -m -f '{{.Version}}' github.com/onsi/ginkgo/v2)
GO_VERSION := $(shell awk '/^go /{print $$2}' go.mod|head -n1)
E2E_KIND_VERSION ?= kindest/node:v1.30.0
USE_EXISTING_CLUSTER ?= false

GINKGO = $(shell pwd)/bin/ginkgo
.PHONY: ginkgo
Expand All @@ -56,12 +61,13 @@ BASE_IMAGE ?= gcr.io/distroless/static:nonroot
DOCKER_BUILDX_CMD ?= docker buildx
IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build
IMAGE_BUILD_EXTRA_OPTS ?=
IMAGE_REGISTRY ?= docker.io/inftyai
IMAGE_REGISTRY ?= inftyai
IMAGE_NAME ?= llmaz
IMAGE_REPO := $(IMAGE_REGISTRY)/$(IMAGE_NAME)
GIT_TAG ?= $(shell git describe --tags --dirty --always)
IMG ?= $(IMAGE_REPO):$(GIT_TAG)
BUILDER_IMAGE ?= golang:$(GO_VERSION)
KIND_CLUSTER_NAME ?= kind

##@ Development

Expand All @@ -74,9 +80,28 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
paths="./..."

.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
generate: controller-gen code-generator ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."

# This is a fixed bug in v1.31, will remove this command in the future.
# Now, we have to modify the files ourself each time regenerate the client-go codes.
# Generally replace "inftyai.com/llmaz/api/core/v1alpha1" with "inftyai.com/llmaz/api/v1alpha1"
# See https://github.com/kubernetes/kubernetes/pull/125162
.PHONY: generate-client-go
generate-client-go: code-generator
./hack/update-codegen.sh go $(PROJECT_DIR)/bin

# Use same code-generator version as k8s.io/api
CODEGEN_VERSION := $(shell go list -m -f '{{.Version}}' k8s.io/api)
CODEGEN = $(shell pwd)/bin/code-generator
CODEGEN_ROOT = $(shell go env GOMODCACHE)/k8s.io/code-generator@$(CODEGEN_VERSION)
.PHONY: code-generator
code-generator:
@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install k8s.io/code-generator/cmd/client-gen@$(CODEGEN_VERSION)
cp -f $(CODEGEN_ROOT)/generate-groups.sh $(PROJECT_DIR)/bin/
cp -f $(CODEGEN_ROOT)/generate-internal-groups.sh $(PROJECT_DIR)/bin/
cp -f $(CODEGEN_ROOT)/kube_codegen.sh $(PROJECT_DIR)/bin/

.PHONY: fmt
fmt: ## Run go fmt against code.
go fmt ./...
Expand All @@ -94,6 +119,11 @@ test-integration: manifests fmt vet envtest ginkgo ## Run integration tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
$(GINKGO) --junit-report=junit.xml --output-dir=$(ARTIFACTS) -v $(INTEGRATION_TARGET)

.PHONY: test-e2e
# FIXME: we should install lws CRD.
test-e2e: kustomize manifests fmt vet envtest ginkgo kind-image-build
E2E_KIND_VERSION=$(E2E_KIND_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ./hack/e2e-test.sh

GOLANGCI_LINT = $(shell pwd)/bin/golangci-lint
GOLANGCI_LINT_VERSION ?= v1.54.2
golangci-lint:
Expand Down Expand Up @@ -150,6 +180,16 @@ image-build:
image-push: PUSH=--push
image-push: image-build

KIND = $(shell pwd)/bin/kind
.PHONY: kind
kind:
@GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install sigs.k8s.io/[email protected]

.PHONY: kind-image-build
kind-image-build: PLATFORMS=linux/amd64
kind-image-build: kind image-build
kind load docker-image $(IMG)

##@ Deployment

ifndef ignore-not-found
Expand All @@ -169,6 +209,12 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f -

# This is only used in local development with kind.
.PHONY: quick-deploy
quick-deploy: manifests kustomize kind-image-build ## Deploy controller to the K8s cluster specified in ~/.kube/config.
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
$(KUSTOMIZE) build config/default | $(KUBECTL) apply --server-side --force-conflicts -f -

.PHONY: undeploy
undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
Expand Down
20 changes: 20 additions & 0 deletions api/core/v1alpha1/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// +kubebuilder:object:generate=true
// +groupName=llmaz.io

package v1alpha1
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,18 @@ var (
// GroupVersion is group version used to register these objects
GroupVersion = schema.GroupVersion{Group: "llmaz.io", Version: "v1alpha1"}

// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
// It is required by pkg/client/informers/externalversions/...
SchemeGroupVersion = GroupVersion

// SchemeBuilder is used to add go types to the GroupVersionKind scheme
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)

// Resource is required by pkg/client/listers/...
func Resource(resource string) schema.GroupResource {
return GroupVersion.WithResource(resource).GroupResource()
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

const (
ModelFamilyNameLabelKey = "llmaz.io/model-family-name"
ModelNameLabelKey = "llmaz.io/model-name"
)

// DataSource represents where to load the model.
Expand Down Expand Up @@ -75,7 +76,7 @@ type Flavor struct {
// cloud-provider.com/accelerator: nvidia-a100.
// NodeSelector will be auto injected to the Pods as scheduling primitives.
// +optional
NodeSelector []v1.NodeSelector `json:"nodeSelector,omitempty"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
// Params stores other useful parameters and will be consumed by the autoscaling components
// like cluster-autoscaler, Karpenter.
// E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with
Expand Down Expand Up @@ -146,6 +147,7 @@ type ModelStatus struct {
Conditions []metav1.Condition `json:"conditions,omitempty"`
}

//+genclient
//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:resource:scope=Cluster
Expand Down
23 changes: 23 additions & 0 deletions api/core/v1alpha1/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

/*
This file is needed for kubernetes/code-generator/kube_codegen.sh script used in hack/update-codegen.sh.
*/

package v1alpha1

//+genclient

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

75 changes: 75 additions & 0 deletions api/inference/v1alpha1/config_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import corev1 "k8s.io/api/core/v1"

type BackendName string

const (
DefaultBackend BackendName = "vllm"
VLLM BackendName = "vllm"
)

type BackendConfig struct {
// Name represents the inference backend under the hood, e.g. vLLM.
// +kubebuilder:validation:Enum={vllm}
// +kubebuilder:default=vllm
// +optional
Name *BackendName `json:"name,omitempty"`
// Version represents the backend version if you want a different one
// from the default version.
// +optional
Version *string `json:"version,omitempty"`
// Args represents the arguments passed to the backend.
// +optional
Args []string `json:"args,omitempty"`
// Envs represents the environments set to the container.
// +optional
Envs []corev1.EnvVar `json:"envs,omitempty"`
// Resources represents the resource requirements for backend, like cpu/mem,
// accelerators like GPU should not be defined here, but at the Model flavors,
// or the same accelerator requirements defined there will be covered and
// the workload will lose the fungibility capacity.
Resources *ResourceRequirements `json:"resources,omitempty"`
}

// TODO: Do not support DRA yet, we can support that once needed.
type ResourceRequirements struct {
// Limits describes the maximum amount of compute resources allowed.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
// +optional
Limits corev1.ResourceList `json:"limits,omitempty"`
// Requests describes the minimum amount of compute resources required.
// If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
// otherwise to an implementation-defined value. Requests cannot exceed Limits.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
// +optional
Requests corev1.ResourceList `json:"requests,omitempty"`
}

type ElasticConfig struct {
// MinReplicas indicates the minimum number of inference workloads based on the traffic.
// Default to nil means we can scale down the instances to 1.
// +kubebuilder:default=1
// +optional
MinReplicas *int32 `json:"minReplicas,omitempty"`
// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
// Default to nil means there's no limit for the instance number.
// +optional
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
}
20 changes: 20 additions & 0 deletions api/inference/v1alpha1/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// +kubebuilder:object:generate=true
// +groupName=inference.llmaz.io

package v1alpha1
9 changes: 9 additions & 0 deletions api/inference/v1alpha1/groupversion_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,18 @@ var (
// GroupVersion is group version used to register these objects
GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"}

// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
// It is required by pkg/client/informers/externalversions/...
SchemeGroupVersion = GroupVersion

// SchemeBuilder is used to add go types to the GroupVersionKind scheme
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)

// Resource is required by pkg/client/listers/...
func Resource(resource string) schema.GroupResource {
return GroupVersion.WithResource(resource).GroupResource()
}
12 changes: 11 additions & 1 deletion api/inference/v1alpha1/playground_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ limitations under the License.
package v1alpha1

import (
api "inftyai.com/llmaz/api/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

api "inftyai.com/llmaz/api/core/v1alpha1"
)

// PlaygroundSpec defines the desired state of Playground
Expand Down Expand Up @@ -50,12 +51,21 @@ type PlaygroundSpec struct {
ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
}

const (
// PlaygroundProgressing means the Playground is progressing now, such as waiting for the
// inference service creation, rolling update or scaling up and down.
PlaygroundProgressing = "Progressing"
// PlaygroundAvailable indicates the corresponding inference service is available now.
PlaygroundAvailable string = "Available"
)

// PlaygroundStatus defines the observed state of Playground
type PlaygroundStatus struct {
// Conditions represents the Inference condition.
Conditions []metav1.Condition `json:"conditions,omitempty"`
}

//+genclient
//+kubebuilder:object:root=true
//+kubebuilder:subresource:status

Expand Down
Loading

0 comments on commit dea9a2e

Please sign in to comment.