Add Inference API

Signed-off-by: kerthcet <[email protected]>
InftyAI · Jul 12, 2024 · 202fc21 · 202fc21
1 parent ef8ba99
commit 202fc21
Show file tree

Hide file tree

Showing 48 changed files with 2,245 additions and 326 deletions.
diff --git a/PROJECT b/PROJECT
@@ -2,19 +2,37 @@
 # This file is used to track the info used to scaffold your project
 # and allow the plugins properly work.
 # More info: https://book.kubebuilder.io/reference/project-config.html
-domain: inftyai.io
+domain: llmaz.io
 layout:
 - go.kubebuilder.io/v4
-projectName: llmaz-operator
-repo: inftyai.io/llmaz
+multigroup: true
+projectName: llmaz
+repo: inftyai.com/llmaz
 resources:
 - api:
     crdVersion: v1
     namespaced: true
   controller: true
-  domain: inftyai.io
-  group: llmaz
-  kind: Serve
-  path: inftyai.io/llmaz/api/v1alpha1
+  domain: llmaz.io
+  group: inference
+  kind: Service
+  path: inftyai.com/llmaz/api/inference/v1alpha1
+  version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: llmaz.io
+  kind: ModelProvider
+  path: inftyai.com/llmaz/api/v1alpha1
+  version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: llmaz.io
+  group: inference
+  kind: Playground
+  path: inftyai.com/llmaz/api/inference/v1alpha1
   version: v1alpha1
 version: "3"
diff --git a/README.md b/README.md
@@ -1,3 +1,9 @@
 # llmaz
 
-Serving LLMs on Kubernetes in a breeze.
+☸️ Effortlessly operating LLMs on Kubernetes, e.g. Serving.
+
+## Roadmap
+
+- Serverless support
+- CLI tool
+- Gateway support
diff --git a/api/inference/v1alpha1/groupversion_info.go b/api/inference/v1alpha1/groupversion_info.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha1 contains API Schema definitions for the inference v1alpha1 API group
+// +kubebuilder:object:generate=true
+// +groupName=inference.llmaz.io
+package v1alpha1
+
+import (
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+	// GroupVersion is group version used to register these objects
+	GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"}
+
+	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
+	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+	// AddToScheme adds the types in this group-version to the given scheme.
+	AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go
@@ -0,0 +1,71 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	api "inftyai.com/llmaz/api/v1alpha1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// PlaygroundSpec defines the desired state of Playground
+type PlaygroundSpec struct {
+	// Replicas represents the replica number of inference workloads.
+	// +kubebuilder:default=1
+	// +optional
+	Replicas *int32 `json:"replicas,omitempty"`
+	// ModelsClaim represents the references to multiple models.
+	ModelsClaim api.ModelsClaim `json:"modelsClaim"`
+	// BackendConfig represents the inference backend configuration
+	// under the hood, e.g. vLLM, which is the default backend.
+	// +optional
+	BackendConfig *BackendConfig `json:"backendConfig,omitempty"`
+	// ElasticConfig defines the configuration for elastic usage,
+	// e.g. the max/min replicas. Default to 0 ~ Inf+.
+	// +optional
+	ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
+}
+
+// PlaygroundStatus defines the observed state of Playground
+type PlaygroundStatus struct {
+	// Conditions represents the Inference condition.
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+
+// Playground is the Schema for the playgrounds API
+type Playground struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   PlaygroundSpec   `json:"spec,omitempty"`
+	Status PlaygroundStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// PlaygroundList contains a list of Playground
+type PlaygroundList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []Playground `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&Playground{}, &PlaygroundList{})
+}
diff --git a/api/inference/v1alpha1/service_types.go b/api/inference/v1alpha1/service_types.go
@@ -0,0 +1,71 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	lws "sigs.k8s.io/lws/api/leaderworkerset/v1"
+
+	api "inftyai.com/llmaz/api/v1alpha1"
+)
+
+// ServiceSpec defines the desired state of Service.
+// Service controller will maintain multi-flavor of workloads with
+// different accelerators for cost or performance considerations.
+type ServiceSpec struct {
+	// ModelsClaim represents the references to multiple models.
+	// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
+	// will be applied to the workload if not exist.
+	ModelsClaim api.ModelsClaim `json:"modelProviderClaim"`
+	// WorkloadTemplate defines the underlying workload layout and configuration.
+	WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"`
+	// ElasticConfig defines the configuration for elastic usage,
+	// e.g. the max/min replicas. Default to 0 ~ Inf+.
+	// +optional
+	ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
+}
+
+// ServiceStatus defines the observed state of Service
+type ServiceStatus struct {
+	// Conditions represents the Inference condition.
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+
+// Service is the Schema for the services API
+type Service struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   ServiceSpec   `json:"spec,omitempty"`
+	Status ServiceStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// ServiceList contains a list of Service
+type ServiceList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []Service `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&Service{}, &ServiceList{})
+}
diff --git a/api/inference/v1alpha1/types.go b/api/inference/v1alpha1/types.go
@@ -0,0 +1,50 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import corev1 "k8s.io/api/core/v1"
+
+type BackendName string
+
+type BackendConfig struct {
+	// Name represents the inference backend under the hood, e.g. vLLM.
+	// +kubebuilder:validation:Enum={vllm}
+	// +kubebuilder:default=vllm
+	// +optional
+	Name *BackendName `json:"name"`
+	// Version represents the backend version if you want a different one
+	// from the default version.
+	// +optional
+	Version *string `json:"version,omitempty"`
+	// Args represents the arguments passed to the backend.
+	// +optional
+	Args []string `json:"args,omitempty"`
+	// Envs represents the environments set to the container.
+	// +optional
+	Envs []corev1.EnvVar `json:"envs,omitempty"`
+}
+
+type ElasticConfig struct {
+	// MinReplicas indicates the minimum number of inference workloads based on the traffic.
+	// Default to nil means we can scale down the instances to 0.
+	// +optional
+	MinReplicas *int32 `json:"minReplicas,omitempty"`
+	// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
+	// Default to nil means there's no limit for the instance number.
+	// +optional
+	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
+}