From 202fc218a62ad70aa4efcddef5de26c3c76de023 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Fri, 12 Jul 2024 20:26:09 +0800
Subject: [PATCH] Add Inference API

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 PROJECT                                       |  32 +-
 README.md                                     |   8 +-
 api/inference/v1alpha1/groupversion_info.go   |  36 ++
 api/inference/v1alpha1/playground_types.go    |  71 ++++
 api/inference/v1alpha1/service_types.go       |  71 ++++
 api/inference/v1alpha1/types.go               |  50 +++
 .../v1alpha1/zz_generated.deepcopy.go         | 304 +++++++++++++++++
 api/v1alpha1/groupversion_info.go             |   8 +-
 api/v1alpha1/modelprovider_types.go           | 129 ++++++++
 api/v1alpha1/serve_types.go                   |  86 -----
 api/v1alpha1/zz_generated.deepcopy.go         | 153 ++++++---
 cmd/main.go                                   |  31 +-
 .../bases/inference.llmaz.io_backends.yaml    | 125 +++++++
 .../bases/inference.llmaz.io_playgrounds.yaml | 307 ++++++++++++++++++
 ....yaml => inference.llmaz.io_services.yaml} | 159 +++++++--
 config/crd/bases/llmaz.io_modelproviders.yaml | 293 +++++++++++++++++
 config/crd/kustomization.yaml                 |  12 +-
 config/default/kustomization.yaml             |   4 +-
 config/manager/manager.yaml                   |   8 +-
 config/prometheus/monitor.yaml                |   4 +-
 config/rbac/_modelprovider_editor_role.yaml   |  31 ++
 config/rbac/_modelprovider_viewer_role.yaml   |  27 ++
 .../rbac/auth_proxy_client_clusterrole.yaml   |   4 +-
 config/rbac/auth_proxy_role.yaml              |   4 +-
 config/rbac/auth_proxy_role_binding.yaml      |   4 +-
 config/rbac/auth_proxy_service.yaml           |   4 +-
 .../inference_playground_editor_role.yaml     |  31 ++
 .../inference_playground_viewer_role.yaml     |  27 ++
 ...aml => inference_service_editor_role.yaml} |  18 +-
 ...aml => inference_service_viewer_role.yaml} |  18 +-
 config/rbac/leader_election_role.yaml         |   4 +-
 config/rbac/leader_election_role_binding.yaml |   4 +-
 config/rbac/role.yaml                         |  64 +++-
 config/rbac/role_binding.yaml                 |   4 +-
 config/rbac/service_account.yaml              |   4 +-
 config/samples/_v1alpha1_modelprovider.yaml   |  12 +
 .../inference_v1alpha1_playground.yaml        |  12 +
 .../samples/inference_v1alpha1_service.yaml   |  12 +
 config/samples/kustomization.yaml             |   4 +-
 config/samples/llmaz_v1alpha1_serve.yaml      |  12 -
 go.mod                                        |  45 +--
 go.sum                                        |  91 +++---
 hack/boilerplate.go.txt                       |   2 +-
 .../inference/playground_controller.go        |  62 ++++
 .../inference/service_controller.go           |  62 ++++
 internal/controller/inference/suite_test.go   |  90 +++++
 ...troller.go => modelprovider_controller.go} |  22 +-
 internal/controller/suite_test.go             |   6 +-
 48 files changed, 2245 insertions(+), 326 deletions(-)
 create mode 100644 api/inference/v1alpha1/groupversion_info.go
 create mode 100644 api/inference/v1alpha1/playground_types.go
 create mode 100644 api/inference/v1alpha1/service_types.go
 create mode 100644 api/inference/v1alpha1/types.go
 create mode 100644 api/inference/v1alpha1/zz_generated.deepcopy.go
 create mode 100644 api/v1alpha1/modelprovider_types.go
 delete mode 100644 api/v1alpha1/serve_types.go
 create mode 100644 config/crd/bases/inference.llmaz.io_backends.yaml
 create mode 100644 config/crd/bases/inference.llmaz.io_playgrounds.yaml
 rename config/crd/bases/{llmaz.inftyai.io_serves.yaml => inference.llmaz.io_services.yaml} (99%)
 create mode 100644 config/crd/bases/llmaz.io_modelproviders.yaml
 create mode 100644 config/rbac/_modelprovider_editor_role.yaml
 create mode 100644 config/rbac/_modelprovider_viewer_role.yaml
 create mode 100644 config/rbac/inference_playground_editor_role.yaml
 create mode 100644 config/rbac/inference_playground_viewer_role.yaml
 rename config/rbac/{serve_editor_role.yaml => inference_service_editor_role.yaml} (55%)
 rename config/rbac/{serve_viewer_role.yaml => inference_service_viewer_role.yaml} (52%)
 create mode 100644 config/samples/_v1alpha1_modelprovider.yaml
 create mode 100644 config/samples/inference_v1alpha1_playground.yaml
 create mode 100644 config/samples/inference_v1alpha1_service.yaml
 delete mode 100644 config/samples/llmaz_v1alpha1_serve.yaml
 create mode 100644 internal/controller/inference/playground_controller.go
 create mode 100644 internal/controller/inference/service_controller.go
 create mode 100644 internal/controller/inference/suite_test.go
 rename internal/controller/{serve_controller.go => modelprovider_controller.go} (70%)

diff --git a/PROJECT b/PROJECT
index 234561a..e565de7 100644
--- a/PROJECT
+++ b/PROJECT
@@ -2,19 +2,37 @@
 # This file is used to track the info used to scaffold your project
 # and allow the plugins properly work.
 # More info: https://book.kubebuilder.io/reference/project-config.html
-domain: inftyai.io
+domain: llmaz.io
 layout:
 - go.kubebuilder.io/v4
-projectName: llmaz-operator
-repo: inftyai.io/llmaz
+multigroup: true
+projectName: llmaz
+repo: inftyai.com/llmaz
 resources:
 - api:
     crdVersion: v1
     namespaced: true
   controller: true
-  domain: inftyai.io
-  group: llmaz
-  kind: Serve
-  path: inftyai.io/llmaz/api/v1alpha1
+  domain: llmaz.io
+  group: inference
+  kind: Service
+  path: inftyai.com/llmaz/api/inference/v1alpha1
+  version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: llmaz.io
+  kind: ModelProvider
+  path: inftyai.com/llmaz/api/v1alpha1
+  version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: llmaz.io
+  group: inference
+  kind: Playground
+  path: inftyai.com/llmaz/api/inference/v1alpha1
   version: v1alpha1
 version: "3"
diff --git a/README.md b/README.md
index 2e0d297..43aa1c8 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,9 @@
 # llmaz
 
-Serving LLMs on Kubernetes in a breeze.
+☸️ Effortlessly operating LLMs on Kubernetes, e.g. Serving.
+
+## Roadmap
+
+- Serverless support
+- CLI tool
+- Gateway support
diff --git a/api/inference/v1alpha1/groupversion_info.go b/api/inference/v1alpha1/groupversion_info.go
new file mode 100644
index 0000000..6de3201
--- /dev/null
+++ b/api/inference/v1alpha1/groupversion_info.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha1 contains API Schema definitions for the inference v1alpha1 API group
+// +kubebuilder:object:generate=true
+// +groupName=inference.llmaz.io
+package v1alpha1
+
+import (
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+	// GroupVersion is group version used to register these objects
+	GroupVersion = schema.GroupVersion{Group: "inference.llmaz.io", Version: "v1alpha1"}
+
+	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
+	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+	// AddToScheme adds the types in this group-version to the given scheme.
+	AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go
new file mode 100644
index 0000000..7aa3c26
--- /dev/null
+++ b/api/inference/v1alpha1/playground_types.go
@@ -0,0 +1,71 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	api "inftyai.com/llmaz/api/v1alpha1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// PlaygroundSpec defines the desired state of Playground
+type PlaygroundSpec struct {
+	// Replicas represents the replica number of inference workloads.
+	// +kubebuilder:default=1
+	// +optional
+	Replicas *int32 `json:"replicas,omitempty"`
+	// ModelsClaim represents the references to multiple models.
+	ModelsClaim api.ModelsClaim `json:"modelsClaim"`
+	// BackendConfig represents the inference backend configuration
+	// under the hood, e.g. vLLM, which is the default backend.
+	// +optional
+	BackendConfig *BackendConfig `json:"backendConfig,omitempty"`
+	// ElasticConfig defines the configuration for elastic usage,
+	// e.g. the max/min replicas. Default to 0 ~ Inf+.
+	// +optional
+	ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
+}
+
+// PlaygroundStatus defines the observed state of Playground
+type PlaygroundStatus struct {
+	// Conditions represents the Inference condition.
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+
+// Playground is the Schema for the playgrounds API
+type Playground struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   PlaygroundSpec   `json:"spec,omitempty"`
+	Status PlaygroundStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// PlaygroundList contains a list of Playground
+type PlaygroundList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []Playground `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&Playground{}, &PlaygroundList{})
+}
diff --git a/api/inference/v1alpha1/service_types.go b/api/inference/v1alpha1/service_types.go
new file mode 100644
index 0000000..507fbb1
--- /dev/null
+++ b/api/inference/v1alpha1/service_types.go
@@ -0,0 +1,71 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	lws "sigs.k8s.io/lws/api/leaderworkerset/v1"
+
+	api "inftyai.com/llmaz/api/v1alpha1"
+)
+
+// ServiceSpec defines the desired state of Service.
+// Service controller will maintain multi-flavor of workloads with
+// different accelerators for cost or performance considerations.
+type ServiceSpec struct {
+	// ModelsClaim represents the references to multiple models.
+	// Note: properties (nodeSelectors, resources, e.g.) of the model flavors
+	// will be applied to the workload if not exist.
+	ModelsClaim api.ModelsClaim `json:"modelProviderClaim"`
+	// WorkloadTemplate defines the underlying workload layout and configuration.
+	WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"`
+	// ElasticConfig defines the configuration for elastic usage,
+	// e.g. the max/min replicas. Default to 0 ~ Inf+.
+	// +optional
+	ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
+}
+
+// ServiceStatus defines the observed state of Service
+type ServiceStatus struct {
+	// Conditions represents the Inference condition.
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+
+// Service is the Schema for the services API
+type Service struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   ServiceSpec   `json:"spec,omitempty"`
+	Status ServiceStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// ServiceList contains a list of Service
+type ServiceList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []Service `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&Service{}, &ServiceList{})
+}
diff --git a/api/inference/v1alpha1/types.go b/api/inference/v1alpha1/types.go
new file mode 100644
index 0000000..a2725e7
--- /dev/null
+++ b/api/inference/v1alpha1/types.go
@@ -0,0 +1,50 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import corev1 "k8s.io/api/core/v1"
+
+type BackendName string
+
+type BackendConfig struct {
+	// Name represents the inference backend under the hood, e.g. vLLM.
+	// +kubebuilder:validation:Enum={vllm}
+	// +kubebuilder:default=vllm
+	// +optional
+	Name *BackendName `json:"name"`
+	// Version represents the backend version if you want a different one
+	// from the default version.
+	// +optional
+	Version *string `json:"version,omitempty"`
+	// Args represents the arguments passed to the backend.
+	// +optional
+	Args []string `json:"args,omitempty"`
+	// Envs represents the environments set to the container.
+	// +optional
+	Envs []corev1.EnvVar `json:"envs,omitempty"`
+}
+
+type ElasticConfig struct {
+	// MinReplicas indicates the minimum number of inference workloads based on the traffic.
+	// Default to nil means we can scale down the instances to 0.
+	// +optional
+	MinReplicas *int32 `json:"minReplicas,omitempty"`
+	// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
+	// Default to nil means there's no limit for the instance number.
+	// +optional
+	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
+}
diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go
new file mode 100644
index 0000000..a5adfd5
--- /dev/null
+++ b/api/inference/v1alpha1/zz_generated.deepcopy.go
@@ -0,0 +1,304 @@
+//go:build !ignore_autogenerated
+
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by controller-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+)
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *BackendConfig) DeepCopyInto(out *BackendConfig) {
+	*out = *in
+	if in.Name != nil {
+		in, out := &in.Name, &out.Name
+		*out = new(BackendName)
+		**out = **in
+	}
+	if in.Version != nil {
+		in, out := &in.Version, &out.Version
+		*out = new(string)
+		**out = **in
+	}
+	if in.Args != nil {
+		in, out := &in.Args, &out.Args
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.Envs != nil {
+		in, out := &in.Envs, &out.Envs
+		*out = make([]corev1.EnvVar, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackendConfig.
+func (in *BackendConfig) DeepCopy() *BackendConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(BackendConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig) {
+	*out = *in
+	if in.MinReplicas != nil {
+		in, out := &in.MinReplicas, &out.MinReplicas
+		*out = new(int32)
+		**out = **in
+	}
+	if in.MaxReplicas != nil {
+		in, out := &in.MaxReplicas, &out.MaxReplicas
+		*out = new(int32)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticConfig.
+func (in *ElasticConfig) DeepCopy() *ElasticConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(ElasticConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Playground) DeepCopyInto(out *Playground) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Playground.
+func (in *Playground) DeepCopy() *Playground {
+	if in == nil {
+		return nil
+	}
+	out := new(Playground)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *Playground) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PlaygroundList) DeepCopyInto(out *PlaygroundList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]Playground, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundList.
+func (in *PlaygroundList) DeepCopy() *PlaygroundList {
+	if in == nil {
+		return nil
+	}
+	out := new(PlaygroundList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *PlaygroundList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PlaygroundSpec) DeepCopyInto(out *PlaygroundSpec) {
+	*out = *in
+	if in.Replicas != nil {
+		in, out := &in.Replicas, &out.Replicas
+		*out = new(int32)
+		**out = **in
+	}
+	in.ModelsClaim.DeepCopyInto(&out.ModelsClaim)
+	if in.BackendConfig != nil {
+		in, out := &in.BackendConfig, &out.BackendConfig
+		*out = new(BackendConfig)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.ElasticConfig != nil {
+		in, out := &in.ElasticConfig, &out.ElasticConfig
+		*out = new(ElasticConfig)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundSpec.
+func (in *PlaygroundSpec) DeepCopy() *PlaygroundSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(PlaygroundSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PlaygroundStatus) DeepCopyInto(out *PlaygroundStatus) {
+	*out = *in
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]v1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlaygroundStatus.
+func (in *PlaygroundStatus) DeepCopy() *PlaygroundStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(PlaygroundStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Service) DeepCopyInto(out *Service) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Service.
+func (in *Service) DeepCopy() *Service {
+	if in == nil {
+		return nil
+	}
+	out := new(Service)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *Service) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServiceList) DeepCopyInto(out *ServiceList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]Service, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceList.
+func (in *ServiceList) DeepCopy() *ServiceList {
+	if in == nil {
+		return nil
+	}
+	out := new(ServiceList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ServiceList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServiceSpec) DeepCopyInto(out *ServiceSpec) {
+	*out = *in
+	in.ModelsClaim.DeepCopyInto(&out.ModelsClaim)
+	in.WorkloadTemplate.DeepCopyInto(&out.WorkloadTemplate)
+	if in.ElasticConfig != nil {
+		in, out := &in.ElasticConfig, &out.ElasticConfig
+		*out = new(ElasticConfig)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceSpec.
+func (in *ServiceSpec) DeepCopy() *ServiceSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ServiceSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServiceStatus) DeepCopyInto(out *ServiceStatus) {
+	*out = *in
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]v1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServiceStatus.
+func (in *ServiceStatus) DeepCopy() *ServiceStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(ServiceStatus)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go
index b9bf182..683c06e 100644
--- a/api/v1alpha1/groupversion_info.go
+++ b/api/v1alpha1/groupversion_info.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2023.
+Copyright 2024.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package v1alpha1 contains API Schema definitions for the llmaz v1alpha1 API group
+// Package v1alpha1 contains API Schema definitions for the  v1alpha1 API group
 // +kubebuilder:object:generate=true
-// +groupName=llmaz.inftyai.io
+// +groupName=llmaz.io
 package v1alpha1
 
 import (
@@ -26,7 +26,7 @@ import (
 
 var (
 	// GroupVersion is group version used to register these objects
-	GroupVersion = schema.GroupVersion{Group: "llmaz.inftyai.io", Version: "v1alpha1"}
+	GroupVersion = schema.GroupVersion{Group: "llmaz.io", Version: "v1alpha1"}
 
 	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
 	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
diff --git a/api/v1alpha1/modelprovider_types.go b/api/v1alpha1/modelprovider_types.go
new file mode 100644
index 0000000..305f06d
--- /dev/null
+++ b/api/v1alpha1/modelprovider_types.go
@@ -0,0 +1,129 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// DataSource represents where to load the model.
+// Only one data source will be used.
+type DataSource struct {
+	// URL represents the URL link than contains the data sources.
+	// +optional
+	URL *string `json:"url,omitempty"`
+	// The mounted volume that contains the data.
+	// +optional
+	Volume *v1.VolumeSource `json:"volumeSource,omitempty"`
+	// Image represents the the image address that contains the source data.
+	// +optional
+	Image *string `json:"image,omitempty"`
+	// ImagePullSecrets represents a list of secret names in the same namespace used for pulling the image.
+	// +optional
+	ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
+}
+
+type FlavorName string
+
+// Flavor defines the accelerator requirements for a model and the necessary parameters in autoscaling.
+// Generally, it will be used in two places:
+// - Pod scheduling with node selectors specified.
+// - Cluster autoscaling with essential parameters provided.
+type Flavor struct {
+	// Name represents the flavor name, which will be used in model claim.
+	Name FlavorName `json:"name"`
+	// Requests defines the required resources to serve the model, like nvidia.com/gpu: 8.
+	// Note: cpu and memory usage of the model backend (vllm e.g.) can also be declared here,
+	// or a default value will be used based on the community recommendations.
+	Requests v1.ResourceList `json:"requests"`
+	// NodeSelector defines the labels to filter specified nodes, like
+	// cloud-provider.com/accelerator: nvidia-a100.
+	// NodeSelector will be auto injected to the Pods as scheduling primitives.
+	// +optional
+	NodeSelector []v1.NodeSelector `json:"nodeSelector,omitempty"`
+	// Params stores other useful parameters and will be consumed by the autoscaling components
+	// like cluster-autoscaler, Karpenter.
+	// E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with
+	// instance-type: p4d.24xlarge for AWS.
+	// +optional
+	Params map[string]string `json:"params,omitempty"`
+}
+
+type ModelProviderName string
+
+// ModelsClaim represents the references to multiple models,
+// as well as the configured flavors.
+type ModelsClaim struct {
+	// ModelProviderNames represents a list of modelProviders, there maybe
+	// multiple modelProviders here to support state-of-the-art technologies
+	// like speculative decoding.
+	// +kubebuilder:validation:MinItems=1
+	ModelProviderNames []ModelProviderName `json:"modelProviderNames"`
+	// InferenceFlavors represents a list of flavors with fungibility supported.
+	// If not set and multiple modelProviders claimed, apply with the 0-index
+	// model provider by default.
+	// If set, the flavor names will refer to the 0-index model provider.
+	// This is just for simplicity, if needed, will refactor this part in the future.
+	// +optional
+	InferenceFlavors []FlavorName `json:"inferenceFlavors"`
+}
+
+// ModelProviderSpec defines the desired state of ModelProvider
+type ModelProviderSpec struct {
+	// FamilyName represents the model type, like llama2, which will be auto injected
+	// to the labels with the key of `llmaz.io/model-family-name`.
+	FamilyName ModelProviderName `json:"familyName"`
+	// DataSource represents where the model stores, there're several ways like
+	// loading from huggingface, host path, s3 and so on.
+	DataSource DataSource `json:"dataSource"`
+	// InferenceFlavors represents the accelerator requirements to serve the model.
+	// Flavors are fungible following the priority of slice order.
+	// +optional
+	InferenceFlavors []Flavor `json:"inferenceFlavors,omitempty"`
+}
+
+// ModelProviderStatus defines the observed state of ModelProvider
+type ModelProviderStatus struct {
+	// Conditions represents the Inference condition.
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+
+// ModelProvider is the Schema for the modelProvider API.
+type ModelProvider struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   ModelProviderSpec   `json:"spec,omitempty"`
+	Status ModelProviderStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// ModelProviderList contains a list of ModelProvider
+type ModelProviderList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []ModelProvider `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&ModelProvider{}, &ModelProviderList{})
+}
diff --git a/api/v1alpha1/serve_types.go b/api/v1alpha1/serve_types.go
deleted file mode 100644
index 95c763a..0000000
--- a/api/v1alpha1/serve_types.go
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-Copyright 2023.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package v1alpha1
-
-import (
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-
-	lws "sigs.k8s.io/lws/api/leaderworkerset/v1"
-)
-
-// ServeSpec defines the desired state of Serve
-type ServeSpec struct {
-	// ModelNameOrPath represents the model name or the local path.
-	ModelNameOrPath string `json:"modelNameOrPath,omitempty"`
-	// Backend indicates the inference backend under the hood, e.g. vLLM.
-	// Default to use huggingface library.
-	//
-	// +optional
-	// +kubebuilder:validation:Enum={vllm,huggingface}
-	// +kubebuilder:default=huggingface
-	Backend *string `json:"backend,omitempty"`
-	// WorkloadTemplate defines the underlying workload layout and configuration,
-	// e.g. the leader/worker templates and replicas.
-	WorkloadTemplate lws.LeaderWorkerSetSpec `json:"workloadTemplate"`
-	// ElasticConfig defines the configuration for elastic usage,
-	// e.g. the max/min replicas.
-	// Default to 0 ~ Inf+.
-	// +optional
-	ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
-}
-
-type ElasticConfig struct {
-	// MinReplicas indicates the minimum number of Serve instances based on the traffic.
-	// Default to nil means we can scale down the instances to 0.
-	// +optional
-	MinReplicas *int32 `json:"minReplicas,omitempty"`
-	// MaxReplicas indicates the maximum number of Serve instances based on the traffic.
-	// Default to nil means there's no limit for the instance number.
-	// +optional
-	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
-}
-
-// ServeStatus defines the observed state of Serve
-type ServeStatus struct {
-	// Conditions represents the Serve condition.
-	Conditions []metav1.Condition `json:"conditions,omitempty"`
-}
-
-//+kubebuilder:object:root=true
-//+kubebuilder:subresource:status
-
-// Serve is the Schema for the serves API
-type Serve struct {
-	metav1.TypeMeta   `json:",inline"`
-	metav1.ObjectMeta `json:"metadata,omitempty"`
-
-	Spec   ServeSpec   `json:"spec,omitempty"`
-	Status ServeStatus `json:"status,omitempty"`
-}
-
-//+kubebuilder:object:root=true
-
-// ServeList contains a list of Serve
-type ServeList struct {
-	metav1.TypeMeta `json:",inline"`
-	metav1.ListMeta `json:"metadata,omitempty"`
-	Items           []Serve `json:"items"`
-}
-
-func init() {
-	SchemeBuilder.Register(&Serve{}, &ServeList{})
-}
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 95e8213..8c8057d 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -1,7 +1,7 @@
 //go:build !ignore_autogenerated
 
 /*
-Copyright 2023.
+Copyright 2024.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,37 +21,84 @@ limitations under the License.
 package v1alpha1
 
 import (
-	"k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	runtime "k8s.io/apimachinery/pkg/runtime"
 )
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig) {
+func (in *DataSource) DeepCopyInto(out *DataSource) {
 	*out = *in
-	if in.MinReplicas != nil {
-		in, out := &in.MinReplicas, &out.MinReplicas
-		*out = new(int32)
+	if in.URL != nil {
+		in, out := &in.URL, &out.URL
+		*out = new(string)
 		**out = **in
 	}
-	if in.MaxReplicas != nil {
-		in, out := &in.MaxReplicas, &out.MaxReplicas
-		*out = new(int32)
+	if in.Volume != nil {
+		in, out := &in.Volume, &out.Volume
+		*out = new(v1.VolumeSource)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Image != nil {
+		in, out := &in.Image, &out.Image
+		*out = new(string)
 		**out = **in
 	}
+	if in.ImagePullSecrets != nil {
+		in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DataSource.
+func (in *DataSource) DeepCopy() *DataSource {
+	if in == nil {
+		return nil
+	}
+	out := new(DataSource)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Flavor) DeepCopyInto(out *Flavor) {
+	*out = *in
+	if in.Requests != nil {
+		in, out := &in.Requests, &out.Requests
+		*out = make(v1.ResourceList, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val.DeepCopy()
+		}
+	}
+	if in.NodeSelector != nil {
+		in, out := &in.NodeSelector, &out.NodeSelector
+		*out = make([]v1.NodeSelector, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	if in.Params != nil {
+		in, out := &in.Params, &out.Params
+		*out = make(map[string]string, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
 }
 
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticConfig.
-func (in *ElasticConfig) DeepCopy() *ElasticConfig {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Flavor.
+func (in *Flavor) DeepCopy() *Flavor {
 	if in == nil {
 		return nil
 	}
-	out := new(ElasticConfig)
+	out := new(Flavor)
 	in.DeepCopyInto(out)
 	return out
 }
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *Serve) DeepCopyInto(out *Serve) {
+func (in *ModelProvider) DeepCopyInto(out *ModelProvider) {
 	*out = *in
 	out.TypeMeta = in.TypeMeta
 	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
@@ -59,18 +106,18 @@ func (in *Serve) DeepCopyInto(out *Serve) {
 	in.Status.DeepCopyInto(&out.Status)
 }
 
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Serve.
-func (in *Serve) DeepCopy() *Serve {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProvider.
+func (in *ModelProvider) DeepCopy() *ModelProvider {
 	if in == nil {
 		return nil
 	}
-	out := new(Serve)
+	out := new(ModelProvider)
 	in.DeepCopyInto(out)
 	return out
 }
 
 // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *Serve) DeepCopyObject() runtime.Object {
+func (in *ModelProvider) DeepCopyObject() runtime.Object {
 	if c := in.DeepCopy(); c != nil {
 		return c
 	}
@@ -78,31 +125,31 @@ func (in *Serve) DeepCopyObject() runtime.Object {
 }
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ServeList) DeepCopyInto(out *ServeList) {
+func (in *ModelProviderList) DeepCopyInto(out *ModelProviderList) {
 	*out = *in
 	out.TypeMeta = in.TypeMeta
 	in.ListMeta.DeepCopyInto(&out.ListMeta)
 	if in.Items != nil {
 		in, out := &in.Items, &out.Items
-		*out = make([]Serve, len(*in))
+		*out = make([]ModelProvider, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
 	}
 }
 
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServeList.
-func (in *ServeList) DeepCopy() *ServeList {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProviderList.
+func (in *ModelProviderList) DeepCopy() *ModelProviderList {
 	if in == nil {
 		return nil
 	}
-	out := new(ServeList)
+	out := new(ModelProviderList)
 	in.DeepCopyInto(out)
 	return out
 }
 
 // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *ServeList) DeepCopyObject() runtime.Object {
+func (in *ModelProviderList) DeepCopyObject() runtime.Object {
 	if c := in.DeepCopy(); c != nil {
 		return c
 	}
@@ -110,49 +157,71 @@ func (in *ServeList) DeepCopyObject() runtime.Object {
 }
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ServeSpec) DeepCopyInto(out *ServeSpec) {
+func (in *ModelProviderSpec) DeepCopyInto(out *ModelProviderSpec) {
 	*out = *in
-	if in.Backend != nil {
-		in, out := &in.Backend, &out.Backend
-		*out = new(string)
-		**out = **in
-	}
-	in.WorkloadTemplate.DeepCopyInto(&out.WorkloadTemplate)
-	if in.ElasticConfig != nil {
-		in, out := &in.ElasticConfig, &out.ElasticConfig
-		*out = new(ElasticConfig)
-		(*in).DeepCopyInto(*out)
+	in.DataSource.DeepCopyInto(&out.DataSource)
+	if in.InferenceFlavors != nil {
+		in, out := &in.InferenceFlavors, &out.InferenceFlavors
+		*out = make([]Flavor, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
 	}
 }
 
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServeSpec.
-func (in *ServeSpec) DeepCopy() *ServeSpec {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProviderSpec.
+func (in *ModelProviderSpec) DeepCopy() *ModelProviderSpec {
 	if in == nil {
 		return nil
 	}
-	out := new(ServeSpec)
+	out := new(ModelProviderSpec)
 	in.DeepCopyInto(out)
 	return out
 }
 
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ServeStatus) DeepCopyInto(out *ServeStatus) {
+func (in *ModelProviderStatus) DeepCopyInto(out *ModelProviderStatus) {
 	*out = *in
 	if in.Conditions != nil {
 		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
+		*out = make([]metav1.Condition, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
 	}
 }
 
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServeStatus.
-func (in *ServeStatus) DeepCopy() *ServeStatus {
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelProviderStatus.
+func (in *ModelProviderStatus) DeepCopy() *ModelProviderStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(ModelProviderStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ModelsClaim) DeepCopyInto(out *ModelsClaim) {
+	*out = *in
+	if in.ModelProviderNames != nil {
+		in, out := &in.ModelProviderNames, &out.ModelProviderNames
+		*out = make([]ModelProviderName, len(*in))
+		copy(*out, *in)
+	}
+	if in.InferenceFlavors != nil {
+		in, out := &in.InferenceFlavors, &out.InferenceFlavors
+		*out = make([]FlavorName, len(*in))
+		copy(*out, *in)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelsClaim.
+func (in *ModelsClaim) DeepCopy() *ModelsClaim {
 	if in == nil {
 		return nil
 	}
-	out := new(ServeStatus)
+	out := new(ModelsClaim)
 	in.DeepCopyInto(out)
 	return out
 }
diff --git a/cmd/main.go b/cmd/main.go
index a8172ed..30b01d2 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2023.
+Copyright 2024.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -32,8 +32,10 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
 
-	llmazv1alpha1 "inftyai.io/llmaz/api/v1alpha1"
-	"inftyai.io/llmaz/internal/controller"
+	inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1"
+	llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1"
+	"inftyai.com/llmaz/internal/controller"
+	inferencecontroller "inftyai.com/llmaz/internal/controller/inference"
 	//+kubebuilder:scaffold:imports
 )
 
@@ -45,7 +47,8 @@ var (
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 
-	utilruntime.Must(llmazv1alpha1.AddToScheme(scheme))
+	utilruntime.Must(inferencev1alpha1.AddToScheme(scheme))
+	utilruntime.Must(llmaziov1alpha1.AddToScheme(scheme))
 	//+kubebuilder:scaffold:scheme
 }
 
@@ -71,7 +74,7 @@ func main() {
 		Metrics:                metricsserver.Options{BindAddress: metricsAddr},
 		HealthProbeBindAddress: probeAddr,
 		LeaderElection:         enableLeaderElection,
-		LeaderElectionID:       "05d9997c.inftyai.io",
+		LeaderElectionID:       "fbb36db9.llmaz.io",
 		// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
 		// when the Manager ends. This requires the binary to immediately end when the
 		// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
@@ -89,11 +92,25 @@ func main() {
 		os.Exit(1)
 	}
 
-	if err = (&controller.ServeReconciler{
+	if err = (&inferencecontroller.ServiceReconciler{
 		Client: mgr.GetClient(),
 		Scheme: mgr.GetScheme(),
 	}).SetupWithManager(mgr); err != nil {
-		setupLog.Error(err, "unable to create controller", "controller", "Serve")
+		setupLog.Error(err, "unable to create controller", "controller", "Service")
+		os.Exit(1)
+	}
+	if err = (&controller.ModelProviderReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "ModelProvider")
+		os.Exit(1)
+	}
+	if err = (&inferencecontroller.PlaygroundReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "Playground")
 		os.Exit(1)
 	}
 	//+kubebuilder:scaffold:builder
diff --git a/config/crd/bases/inference.llmaz.io_backends.yaml b/config/crd/bases/inference.llmaz.io_backends.yaml
new file mode 100644
index 0000000..0299ff1
--- /dev/null
+++ b/config/crd/bases/inference.llmaz.io_backends.yaml
@@ -0,0 +1,125 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.14.0
+  name: backends.inference.llmaz.io
+spec:
+  group: inference.llmaz.io
+  names:
+    kind: Backend
+    listKind: BackendList
+    plural: backends
+    singular: backend
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: Backend is the Schema for the backends API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: BackendSpec defines the desired state of Backend
+            properties:
+              foo:
+                description: Foo is an example field of Backend. Edit backend_types.go
+                  to remove/update
+                type: string
+            type: object
+          status:
+            description: BackendStatus defines the observed state of Backend
+            properties:
+              conditions:
+                description: Conditions represents the Inference condition.
+                items:
+                  description: "Condition contains details for one aspect of the current
+                    state of this API Resource.\n---\nThis struct is intended for
+                    direct use as an array at the field path .status.conditions.  For
+                    example,\n\n\n\ttype FooStatus struct{\n\t    // Represents the
+                    observations of a foo's current state.\n\t    // Known .status.conditions.type
+                    are: \"Available\", \"Progressing\", and \"Degraded\"\n\t    //
+                    +patchMergeKey=type\n\t    // +patchStrategy=merge\n\t    // +listType=map\n\t
+                    \   // +listMapKey=type\n\t    Conditions []metav1.Condition `json:\"conditions,omitempty\"
+                    patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+                    \   // other fields\n\t}"
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: |-
+                        type of condition in CamelCase or in foo.example.com/CamelCase.
+                        ---
+                        Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+                        useful (see .node.status.conditions), the ability to deconflict is important.
+                        The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
new file mode 100644
index 0000000..3e0a696
--- /dev/null
+++ b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
@@ -0,0 +1,307 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.14.0
+  name: playgrounds.inference.llmaz.io
+spec:
+  group: inference.llmaz.io
+  names:
+    kind: Playground
+    listKind: PlaygroundList
+    plural: playgrounds
+    singular: playground
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: Playground is the Schema for the playgrounds API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: PlaygroundSpec defines the desired state of Playground
+            properties:
+              backendConfig:
+                description: BackendConfig indicates the inference backend under the
+                  hood, e.g. vLLM.
+                properties:
+                  args:
+                    description: Args represents the arguments passed to the backend.
+                    items:
+                      type: string
+                    type: array
+                  envs:
+                    description: Envs represents the environments set to the container.
+                    items:
+                      description: EnvVar represents an environment variable present
+                        in a Container.
+                      properties:
+                        name:
+                          description: Name of the environment variable. Must be a
+                            C_IDENTIFIER.
+                          type: string
+                        value:
+                          description: |-
+                            Variable references $(VAR_NAME) are expanded
+                            using the previously defined environment variables in the container and
+                            any service environment variables. If a variable cannot be resolved,
+                            the reference in the input string will be unchanged. Double $$ are reduced
+                            to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
+                            "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
+                            Escaped references will never be expanded, regardless of whether the variable
+                            exists or not.
+                            Defaults to "".
+                          type: string
+                        valueFrom:
+                          description: Source for the environment variable's value.
+                            Cannot be used if value is not empty.
+                          properties:
+                            configMapKeyRef:
+                              description: Selects a key of a ConfigMap.
+                              properties:
+                                key:
+                                  description: The key to select.
+                                  type: string
+                                name:
+                                  description: |-
+                                    Name of the referent.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                    TODO: Add other useful fields. apiVersion, kind, uid?
+                                  type: string
+                                optional:
+                                  description: Specify whether the ConfigMap or its
+                                    key must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            fieldRef:
+                              description: |-
+                                Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
+                                spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
+                              properties:
+                                apiVersion:
+                                  description: Version of the schema the FieldPath
+                                    is written in terms of, defaults to "v1".
+                                  type: string
+                                fieldPath:
+                                  description: Path of the field to select in the
+                                    specified API version.
+                                  type: string
+                              required:
+                              - fieldPath
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            resourceFieldRef:
+                              description: |-
+                                Selects a resource of the container: only resources limits and requests
+                                (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
+                              properties:
+                                containerName:
+                                  description: 'Container name: required for volumes,
+                                    optional for env vars'
+                                  type: string
+                                divisor:
+                                  anyOf:
+                                  - type: integer
+                                  - type: string
+                                  description: Specifies the output format of the
+                                    exposed resources, defaults to "1"
+                                  pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                  x-kubernetes-int-or-string: true
+                                resource:
+                                  description: 'Required: resource to select'
+                                  type: string
+                              required:
+                              - resource
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            secretKeyRef:
+                              description: Selects a key of a secret in the pod's
+                                namespace
+                              properties:
+                                key:
+                                  description: The key of the secret to select from.  Must
+                                    be a valid secret key.
+                                  type: string
+                                name:
+                                  description: |-
+                                    Name of the referent.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                    TODO: Add other useful fields. apiVersion, kind, uid?
+                                  type: string
+                                optional:
+                                  description: Specify whether the Secret or its key
+                                    must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                          type: object
+                      required:
+                      - name
+                      type: object
+                    type: array
+                  name:
+                    default: vllm
+                    description: Name represents the inference backend under the hood,
+                      e.g. vLLM.
+                    enum:
+                    - vllm
+                    type: string
+                  version:
+                    description: |-
+                      Version represents the backend version if you want a different one
+                      from the default version.
+                    type: string
+                type: object
+              elasticConfig:
+                description: |-
+                  ElasticConfig defines the configuration for elastic usage,
+                  e.g. the max/min replicas.
+                  Default to 0 ~ Inf+.
+                properties:
+                  maxReplicas:
+                    description: |-
+                      MaxReplicas indicates the maximum number of Inference instances based on the traffic.
+                      Default to nil means there's no limit for the instance number.
+                    format: int32
+                    type: integer
+                  minReplicas:
+                    description: |-
+                      MinReplicas indicates the minimum number of Inference instances based on the traffic.
+                      Default to nil means we can scale down the instances to 0.
+                    format: int32
+                    type: integer
+                type: object
+              modelsClaim:
+                description: ModelsClaim refers to the model claims.
+                properties:
+                  inferenceFlavors:
+                    description: |-
+                      InferenceFlavors represents a list of flavors with fungibility supported.
+                      If not set and multiple modelProviders claimed, apply with the 0-index model provider.
+                      If set, the flavor names also refer to the 0-index model provider.
+                      This is just for simplicity, if needed, will refactor this part in the future.
+                    items:
+                      type: string
+                    type: array
+                  modelProviderNames:
+                    description: |-
+                      ClaimNames represents the names of the modelProviders, there maybe
+                      multiple models here to support state-of-the-art technologies like
+                      speculative decoding.
+                    items:
+                      type: string
+                    minItems: 1
+                    type: array
+                required:
+                - modelProviderNames
+                type: object
+              replicas:
+                default: 1
+                description: Replicas represents the replica number of model playground.
+                format: int32
+                type: integer
+            required:
+            - modelsClaim
+            type: object
+          status:
+            description: PlaygroundStatus defines the observed state of Playground
+            properties:
+              conditions:
+                description: Conditions represents the Inference condition.
+                items:
+                  description: "Condition contains details for one aspect of the current
+                    state of this API Resource.\n---\nThis struct is intended for
+                    direct use as an array at the field path .status.conditions.  For
+                    example,\n\n\n\ttype FooStatus struct{\n\t    // Represents the
+                    observations of a foo's current state.\n\t    // Known .status.conditions.type
+                    are: \"Available\", \"Progressing\", and \"Degraded\"\n\t    //
+                    +patchMergeKey=type\n\t    // +patchStrategy=merge\n\t    // +listType=map\n\t
+                    \   // +listMapKey=type\n\t    Conditions []metav1.Condition `json:\"conditions,omitempty\"
+                    patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+                    \   // other fields\n\t}"
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: |-
+                        type of condition in CamelCase or in foo.example.com/CamelCase.
+                        ---
+                        Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+                        useful (see .node.status.conditions), the ability to deconflict is important.
+                        The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/bases/llmaz.inftyai.io_serves.yaml b/config/crd/bases/inference.llmaz.io_services.yaml
similarity index 99%
rename from config/crd/bases/llmaz.inftyai.io_serves.yaml
rename to config/crd/bases/inference.llmaz.io_services.yaml
index ec90456..4a99628 100644
--- a/config/crd/bases/llmaz.inftyai.io_serves.yaml
+++ b/config/crd/bases/inference.llmaz.io_services.yaml
@@ -4,20 +4,20 @@ kind: CustomResourceDefinition
 metadata:
   annotations:
     controller-gen.kubebuilder.io/version: v0.14.0
-  name: serves.llmaz.inftyai.io
+  name: services.inference.llmaz.io
 spec:
-  group: llmaz.inftyai.io
+  group: inference.llmaz.io
   names:
-    kind: Serve
-    listKind: ServeList
-    plural: serves
-    singular: serve
+    kind: Service
+    listKind: ServiceList
+    plural: services
+    singular: service
   scope: Namespaced
   versions:
   - name: v1alpha1
     schema:
       openAPIV3Schema:
-        description: Serve is the Schema for the serves API
+        description: Service is the Schema for the services API
         properties:
           apiVersion:
             description: |-
@@ -37,17 +37,11 @@ spec:
           metadata:
             type: object
           spec:
-            description: ServeSpec defines the desired state of Serve
+            description: |-
+              ServiceSpec defines the desired state of Service.
+              Service will maintain multi-version of lws with different flavors
+              to support accelerator fungibility.
             properties:
-              backend:
-                default: huggingface
-                description: |-
-                  Backend indicates the inference backend under the hood, e.g. vLLM.
-                  Default to use huggingface library.
-                enum:
-                - vllm
-                - huggingface
-                type: string
               elasticConfig:
                 description: |-
                   ElasticConfig defines the configuration for elastic usage,
@@ -56,25 +50,49 @@ spec:
                 properties:
                   maxReplicas:
                     description: |-
-                      MaxReplicas indicates the maximum number of Serve instances based on the traffic.
+                      MaxReplicas indicates the maximum number of Inference instances based on the traffic.
                       Default to nil means there's no limit for the instance number.
                     format: int32
                     type: integer
                   minReplicas:
                     description: |-
-                      MinReplicas indicates the minimum number of Serve instances based on the traffic.
+                      MinReplicas indicates the minimum number of Inference instances based on the traffic.
                       Default to nil means we can scale down the instances to 0.
                     format: int32
                     type: integer
                 type: object
-              modelNameOrPath:
-                description: ModelNameOrPath represents the model name or the local
-                  path.
-                type: string
+              modelProviderClaim:
+                description: |-
+                  ModelsClaim refers to the model claims.
+                  Note: properties (nodeSelectors, resources, e.g.) of the model flavors
+                  will be applied to the workload if not exists.
+                properties:
+                  inferenceFlavors:
+                    description: |-
+                      InferenceFlavors represents a list of flavors with fungibility supported.
+                      If not set and multiple modelProviders claimed, apply with the 0-index model provider.
+                      If set, the flavor names also refer to the 0-index model provider.
+                      This is just for simplicity, if needed, will refactor this part in the future.
+                    items:
+                      type: string
+                    type: array
+                  modelProviderNames:
+                    description: |-
+                      ClaimNames represents the names of the modelProviders, there maybe
+                      multiple models here to support state-of-the-art technologies like
+                      speculative decoding.
+                    items:
+                      type: string
+                    minItems: 1
+                    type: array
+                required:
+                - modelProviderNames
+                type: object
               workloadTemplate:
                 description: |-
                   WorkloadTemplate defines the underlying workload layout and configuration,
-                  e.g. the leader/worker templates and replicas.
+                  e.g. the templates and replicas.
+                  Note: nodeSelectors and resources might be replaced by the modelProviderClaim.
                 properties:
                   leaderWorkerTemplate:
                     description: LeaderWorkerTemplate defines the template for leader/worker
@@ -7746,16 +7764,38 @@ spec:
                             type: object
                         type: object
                       restartPolicy:
+                        default: Default
                         description: RestartPolicy defines the restart policy when
                           pod failures happen.
+                        enum:
+                        - Default
+                        - RecreateGroupOnPodRestart
                         type: string
                       size:
+                        default: 1
                         description: |-
                           Number of pods to create. It is the total number of pods in each group.
                           The minimum is 1 which represent the leader. When set to 1, the leader
                           pod is created for each group as well as a 0-replica StatefulSet for the workers.
+                          Default to 1.
                         format: int32
                         type: integer
+                      subGroupPolicy:
+                        description: |-
+                          SubGroupPolicy describes the policy that will be applied when creating subgroups
+                          in each replica.
+                        properties:
+                          subGroupSize:
+                            description: |-
+                              The number of pods per subgroup. This value is immutable,
+                              and must not be greater than LeaderWorkerSet.Spec.Size.
+                              Size must be divisible by subGroupSize in which case the
+                              subgroups will be of equal size. Or size - 1 is divisible
+                              by subGroupSize, in which case the leader is considered as
+                              the extra pod, and will be part of the first subgroup.
+                            format: int32
+                            type: integer
+                        type: object
                       workerTemplate:
                         description: WorkerTemplate defines the pod template for worker
                           pods.
@@ -15422,10 +15462,10 @@ spec:
                             type: object
                         type: object
                     required:
-                    - size
                     - workerTemplate
                     type: object
                   replicas:
+                    default: 1
                     description: |-
                       Number of leader-workers groups. A scale subresource is available to enable HPA. The
                       selector for HPA will be that of the leader pod, and so practically HPA will be looking up the
@@ -15433,19 +15473,84 @@ spec:
                       the rest of the group and expose them as a summary custom metric representing the whole
                       group.
                       On scale down, the leader pod as well as the workers statefulset will be deleted.
+                      Default to 1.
                     format: int32
                     type: integer
+                  rolloutStrategy:
+                    description: |-
+                      RolloutStrategy defines the strategy that will be applied to update replicas
+                      when a revision is made to the leaderWorkerTemplate.
+                    properties:
+                      rollingUpdateConfiguration:
+                        description: RollingUpdateConfiguration defines the parameters
+                          to be used when type is RollingUpdateStrategyType.
+                        properties:
+                          maxSurge:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            default: 0
+                            description: |-
+                              The maximum number of replicas that can be scheduled above the original number of
+                              replicas.
+                              Value can be an absolute number (ex: 5) or a percentage of total replicas at
+                              the start of the update (ex: 10%).
+                              Absolute number is calculated from percentage by rounding up.
+                              By default, a value of 0 is used.
+                              Example: when this is set to 30%, the new replicas can be scaled up by 30%
+                              immediately when the rolling update starts. Once old replicas have been deleted,
+                              new replicas can be scaled up further, ensuring that total number of replicas running
+                              at any time during the update is at most 130% of original replicas.
+                              When rolling update completes, replicas will fall back to the original replicas.
+                            x-kubernetes-int-or-string: true
+                          maxUnavailable:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            default: 1
+                            description: |-
+                              The maximum number of replicas that can be unavailable during the update.
+                              Value can be an absolute number (ex: 5) or a percentage of total replicas at the start of update (ex: 10%).
+                              Absolute number is calculated from percentage by rounding down.
+                              This can not be 0 if MaxSurge is 0.
+                              By default, a fixed value of 1 is used.
+                              Example: when this is set to 30%, the old replicas can be scaled down by 30%
+                              immediately when the rolling update starts. Once new replicas are ready, old replicas
+                              can be scaled down further, followed by scaling up the new replicas, ensuring
+                              that at least 70% of original number of replicas are available at all times
+                              during the update.
+                            x-kubernetes-int-or-string: true
+                        type: object
+                      type:
+                        default: RollingUpdate
+                        description: Type defines the rollout strategy, it can only
+                          be “RollingUpdate” for now.
+                        enum:
+                        - RollingUpdate
+                        type: string
+                    required:
+                    - type
+                    type: object
+                  startupPolicy:
+                    default: LeaderCreated
+                    description: StartupPolicy determines the startup policy for the
+                      worker statefulset.
+                    enum:
+                    - LeaderCreated
+                    - LeaderReady
+                    type: string
                 required:
                 - leaderWorkerTemplate
                 type: object
             required:
+            - modelProviderClaim
             - workloadTemplate
             type: object
           status:
-            description: ServeStatus defines the observed state of Serve
+            description: ServiceStatus defines the observed state of Service
             properties:
               conditions:
-                description: Conditions represents the Serve condition.
+                description: Conditions represents the Inference condition.
                 items:
                   description: "Condition contains details for one aspect of the current
                     state of this API Resource.\n---\nThis struct is intended for
diff --git a/config/crd/bases/llmaz.io_modelproviders.yaml b/config/crd/bases/llmaz.io_modelproviders.yaml
new file mode 100644
index 0000000..c7f057e
--- /dev/null
+++ b/config/crd/bases/llmaz.io_modelproviders.yaml
@@ -0,0 +1,293 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.14.0
+  name: modelproviders.llmaz.io
+spec:
+  group: llmaz.io
+  names:
+    kind: ModelProvider
+    listKind: ModelProviderList
+    plural: modelproviders
+    singular: modelprovider
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: ModelProvider is the Schema for the modelProvider API.
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: ModelProviderSpec defines the desired state of ModelProvider
+            properties:
+              dataSource:
+                description: DataSource represents where to fetch the models, huggingface,
+                  hostpath, s3, etc..
+                properties:
+                  image:
+                    description: |-
+                      The name of the image that contains the source data. The assumption is that the source data locates in the
+                      `data` directory in the image.
+                    type: string
+                  imagePullSecrets:
+                    description: ImagePullSecrets is a list of secret names in the
+                      same namespace used for pulling the data image.
+                    items:
+                      type: string
+                    type: array
+                  name:
+                    description: |-
+                      The name of the dataset. The same name will be used as a container name.
+                      It must be a valid DNS subdomain value,
+                    type: string
+                  urls:
+                    description: URLs specifies the links to the public data sources.
+                      E.g., files in a public github repository.
+                    items:
+                      type: string
+                    type: array
+                  volumeSource:
+                    description: The mounted volume that contains the data.
+                    x-kubernetes-preserve-unknown-fields: true
+                type: object
+              familyName:
+                description: |-
+                  FamilyName represents the model type, like llama2, which will be auto injected
+                  to the labels with the key of `llmaz.io/model-family-name`.
+                type: string
+              inferenceFlavors:
+                description: |-
+                  InferenceFlavors represents the hardware requirements to serve the model.
+                  Flavors are fungible following the priority of slice order.
+                items:
+                  description: |-
+                    Flavor represents the hardware requirements for one model.
+                    Generally, it will be used in two places:
+                    - Pod scheduling with node selectors specified.
+                    - Cluster autoscaling with essential parameters provided.
+                    Flavor is useful because the hardware requirements for models
+                    are usually clear, like llama2-70B needs 8x Nvidia A100.
+                  properties:
+                    name:
+                      description: Name represents the flavor name.
+                      type: string
+                    nodeSelector:
+                      description: |-
+                        NodeSelector refers to the nodes with specified accelerators equipped to
+                        serve the model, like cloud-provider.com/accelerator: nvidia-a100,
+                        NodeSelector will be auto injected to the Pods as scheduling primitives.
+                      items:
+                        description: |-
+                          A node selector represents the union of the results of one or more label queries
+                          over a set of nodes; that is, it represents the OR of the selectors represented
+                          by the node selector terms.
+                        properties:
+                          nodeSelectorTerms:
+                            description: Required. A list of node selector terms.
+                              The terms are ORed.
+                            items:
+                              description: |-
+                                A null or empty node selector term matches no objects. The requirements of
+                                them are ANDed.
+                                The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+                              properties:
+                                matchExpressions:
+                                  description: A list of node selector requirements
+                                    by node's labels.
+                                  items:
+                                    description: |-
+                                      A node selector requirement is a selector that contains values, a key, and an operator
+                                      that relates the key and values.
+                                    properties:
+                                      key:
+                                        description: The label key that the selector
+                                          applies to.
+                                        type: string
+                                      operator:
+                                        description: |-
+                                          Represents a key's relationship to a set of values.
+                                          Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+                                        type: string
+                                      values:
+                                        description: |-
+                                          An array of string values. If the operator is In or NotIn,
+                                          the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                                          the values array must be empty. If the operator is Gt or Lt, the values
+                                          array must have a single element, which will be interpreted as an integer.
+                                          This array is replaced during a strategic merge patch.
+                                        items:
+                                          type: string
+                                        type: array
+                                    required:
+                                    - key
+                                    - operator
+                                    type: object
+                                  type: array
+                                matchFields:
+                                  description: A list of node selector requirements
+                                    by node's fields.
+                                  items:
+                                    description: |-
+                                      A node selector requirement is a selector that contains values, a key, and an operator
+                                      that relates the key and values.
+                                    properties:
+                                      key:
+                                        description: The label key that the selector
+                                          applies to.
+                                        type: string
+                                      operator:
+                                        description: |-
+                                          Represents a key's relationship to a set of values.
+                                          Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+                                        type: string
+                                      values:
+                                        description: |-
+                                          An array of string values. If the operator is In or NotIn,
+                                          the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                                          the values array must be empty. If the operator is Gt or Lt, the values
+                                          array must have a single element, which will be interpreted as an integer.
+                                          This array is replaced during a strategic merge patch.
+                                        items:
+                                          type: string
+                                        type: array
+                                    required:
+                                    - key
+                                    - operator
+                                    type: object
+                                  type: array
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            type: array
+                        required:
+                        - nodeSelectorTerms
+                        type: object
+                        x-kubernetes-map-type: atomic
+                      type: array
+                    params:
+                      additionalProperties:
+                        type: string
+                      description: |-
+                        Params stores other useful parameters and will be consumed by the autoscaling components
+                        like cluster-autoscaler, Karpenter.
+                        E.g. when scaling up nodes with 8x Nvidia A00, the parameter can be injected with
+                        instance-type: p4d.24xlarge for AWS.
+                      type: object
+                    requests:
+                      additionalProperties:
+                        anyOf:
+                        - type: integer
+                        - type: string
+                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                        x-kubernetes-int-or-string: true
+                      description: |-
+                        Requests represents the required resources to serve the model, like nvidia.com/gpu: 8.
+                        Note: cpu and memory usage of the model backend (vllm e.g.) can also be declared here,
+                        or a default value will be used based on the community recommendations.
+                      type: object
+                  required:
+                  - name
+                  - requests
+                  type: object
+                type: array
+            required:
+            - dataSource
+            - familyName
+            type: object
+          status:
+            description: ModelProviderStatus defines the observed state of ModelProvider
+            properties:
+              conditions:
+                description: Conditions represents the Inference condition.
+                items:
+                  description: "Condition contains details for one aspect of the current
+                    state of this API Resource.\n---\nThis struct is intended for
+                    direct use as an array at the field path .status.conditions.  For
+                    example,\n\n\n\ttype FooStatus struct{\n\t    // Represents the
+                    observations of a foo's current state.\n\t    // Known .status.conditions.type
+                    are: \"Available\", \"Progressing\", and \"Degraded\"\n\t    //
+                    +patchMergeKey=type\n\t    // +patchStrategy=merge\n\t    // +listType=map\n\t
+                    \   // +listMapKey=type\n\t    Conditions []metav1.Condition `json:\"conditions,omitempty\"
+                    patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+                    \   // other fields\n\t}"
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: |-
+                        type of condition in CamelCase or in foo.example.com/CamelCase.
+                        ---
+                        Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+                        useful (see .node.status.conditions), the ability to deconflict is important.
+                        The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml
index 4b6a2d4..06b9a8a 100644
--- a/config/crd/kustomization.yaml
+++ b/config/crd/kustomization.yaml
@@ -2,18 +2,24 @@
 # since it depends on service name and namespace that are out of this kustomize package.
 # It should be run by config/default
 resources:
-- bases/llmaz.inftyai.io_serves.yaml
+- bases/inference.llmaz.io_services.yaml
+- bases/llmaz.io_modelproviders.yaml
+- bases/inference.llmaz.io_playgrounds.yaml
 #+kubebuilder:scaffold:crdkustomizeresource
 
 patches:
 # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
 # patches here are for enabling the conversion webhook for each CRD
-#- path: patches/webhook_in_serves.yaml
+#- path: patches/webhook_in_inference_services.yaml
+#- path: patches/webhook_in__modelproviders.yaml
+#- path: patches/webhook_in_inference_playgrounds.yaml
 #+kubebuilder:scaffold:crdkustomizewebhookpatch
 
 # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix.
 # patches here are for enabling the CA injection for each CRD
-#- path: patches/cainjection_in_serves.yaml
+#- path: patches/cainjection_in_inference_services.yaml
+#- path: patches/cainjection_in__modelproviders.yaml
+#- path: patches/cainjection_in_inference_playgrounds.yaml
 #+kubebuilder:scaffold:crdkustomizecainjectionpatch
 
 # [WEBHOOK] To enable webhook, uncomment the following section
diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml
index a320eaa..d35f3b1 100644
--- a/config/default/kustomization.yaml
+++ b/config/default/kustomization.yaml
@@ -1,12 +1,12 @@
 # Adds namespace to all resources.
-namespace: llmaz-operator-system
+namespace: llmaz-system
 
 # Value of this field is prepended to the
 # names of all resources, e.g. a deployment named
 # "wordpress" becomes "alices-wordpress".
 # Note that it should also match with the prefix (text before '-') of the namespace
 # field above.
-namePrefix: llmaz-operator-
+namePrefix: llmaz-
 
 # Labels to add to all resources and selectors.
 #labels:
diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml
index 7cad38f..14a1b83 100644
--- a/config/manager/manager.yaml
+++ b/config/manager/manager.yaml
@@ -6,8 +6,8 @@ metadata:
     app.kubernetes.io/name: namespace
     app.kubernetes.io/instance: system
     app.kubernetes.io/component: manager
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: system
 ---
@@ -21,8 +21,8 @@ metadata:
     app.kubernetes.io/name: deployment
     app.kubernetes.io/instance: controller-manager
     app.kubernetes.io/component: manager
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
 spec:
   selector:
diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml
index 77a8cc3..52e9051 100644
--- a/config/prometheus/monitor.yaml
+++ b/config/prometheus/monitor.yaml
@@ -7,8 +7,8 @@ metadata:
     app.kubernetes.io/name: servicemonitor
     app.kubernetes.io/instance: controller-manager-metrics-monitor
     app.kubernetes.io/component: metrics
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: controller-manager-metrics-monitor
   namespace: system
diff --git a/config/rbac/_modelprovider_editor_role.yaml b/config/rbac/_modelprovider_editor_role.yaml
new file mode 100644
index 0000000..4b0f788
--- /dev/null
+++ b/config/rbac/_modelprovider_editor_role.yaml
@@ -0,0 +1,31 @@
+# permissions for end users to edit modelproviders.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: clusterrole
+    app.kubernetes.io/instance: modelprovider-editor-role
+    app.kubernetes.io/component: rbac
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
+    app.kubernetes.io/managed-by: kustomize
+  name: modelprovider-editor-role
+rules:
+- apiGroups:
+  - llmaz.io
+  resources:
+  - modelproviders
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - llmaz.io
+  resources:
+  - modelproviders/status
+  verbs:
+  - get
diff --git a/config/rbac/_modelprovider_viewer_role.yaml b/config/rbac/_modelprovider_viewer_role.yaml
new file mode 100644
index 0000000..67f90fc
--- /dev/null
+++ b/config/rbac/_modelprovider_viewer_role.yaml
@@ -0,0 +1,27 @@
+# permissions for end users to view modelproviders.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: clusterrole
+    app.kubernetes.io/instance: modelprovider-viewer-role
+    app.kubernetes.io/component: rbac
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
+    app.kubernetes.io/managed-by: kustomize
+  name: modelprovider-viewer-role
+rules:
+- apiGroups:
+  - llmaz.io
+  resources:
+  - modelproviders
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - llmaz.io
+  resources:
+  - modelproviders/status
+  verbs:
+  - get
diff --git a/config/rbac/auth_proxy_client_clusterrole.yaml b/config/rbac/auth_proxy_client_clusterrole.yaml
index 2fd2c1c..a6b698e 100644
--- a/config/rbac/auth_proxy_client_clusterrole.yaml
+++ b/config/rbac/auth_proxy_client_clusterrole.yaml
@@ -5,8 +5,8 @@ metadata:
     app.kubernetes.io/name: clusterrole
     app.kubernetes.io/instance: metrics-reader
     app.kubernetes.io/component: kube-rbac-proxy
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: metrics-reader
 rules:
diff --git a/config/rbac/auth_proxy_role.yaml b/config/rbac/auth_proxy_role.yaml
index a43ea4c..3eca335 100644
--- a/config/rbac/auth_proxy_role.yaml
+++ b/config/rbac/auth_proxy_role.yaml
@@ -5,8 +5,8 @@ metadata:
     app.kubernetes.io/name: clusterrole
     app.kubernetes.io/instance: proxy-role
     app.kubernetes.io/component: kube-rbac-proxy
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: proxy-role
 rules:
diff --git a/config/rbac/auth_proxy_role_binding.yaml b/config/rbac/auth_proxy_role_binding.yaml
index bc79daf..53c2976 100644
--- a/config/rbac/auth_proxy_role_binding.yaml
+++ b/config/rbac/auth_proxy_role_binding.yaml
@@ -5,8 +5,8 @@ metadata:
     app.kubernetes.io/name: clusterrolebinding
     app.kubernetes.io/instance: proxy-rolebinding
     app.kubernetes.io/component: kube-rbac-proxy
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: proxy-rolebinding
 roleRef:
diff --git a/config/rbac/auth_proxy_service.yaml b/config/rbac/auth_proxy_service.yaml
index 46f6f74..30bd201 100644
--- a/config/rbac/auth_proxy_service.yaml
+++ b/config/rbac/auth_proxy_service.yaml
@@ -6,8 +6,8 @@ metadata:
     app.kubernetes.io/name: service
     app.kubernetes.io/instance: controller-manager-metrics-service
     app.kubernetes.io/component: kube-rbac-proxy
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: controller-manager-metrics-service
   namespace: system
diff --git a/config/rbac/inference_playground_editor_role.yaml b/config/rbac/inference_playground_editor_role.yaml
new file mode 100644
index 0000000..2638557
--- /dev/null
+++ b/config/rbac/inference_playground_editor_role.yaml
@@ -0,0 +1,31 @@
+# permissions for end users to edit playgrounds.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: clusterrole
+    app.kubernetes.io/instance: playground-editor-role
+    app.kubernetes.io/component: rbac
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
+    app.kubernetes.io/managed-by: kustomize
+  name: playground-editor-role
+rules:
+- apiGroups:
+  - inference.llmaz.io
+  resources:
+  - playgrounds
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - inference.llmaz.io
+  resources:
+  - playgrounds/status
+  verbs:
+  - get
diff --git a/config/rbac/inference_playground_viewer_role.yaml b/config/rbac/inference_playground_viewer_role.yaml
new file mode 100644
index 0000000..6e2237e
--- /dev/null
+++ b/config/rbac/inference_playground_viewer_role.yaml
@@ -0,0 +1,27 @@
+# permissions for end users to view playgrounds.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/name: clusterrole
+    app.kubernetes.io/instance: playground-viewer-role
+    app.kubernetes.io/component: rbac
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
+    app.kubernetes.io/managed-by: kustomize
+  name: playground-viewer-role
+rules:
+- apiGroups:
+  - inference.llmaz.io
+  resources:
+  - playgrounds
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.llmaz.io
+  resources:
+  - playgrounds/status
+  verbs:
+  - get
diff --git a/config/rbac/serve_editor_role.yaml b/config/rbac/inference_service_editor_role.yaml
similarity index 55%
rename from config/rbac/serve_editor_role.yaml
rename to config/rbac/inference_service_editor_role.yaml
index cebc4ef..70cbcba 100644
--- a/config/rbac/serve_editor_role.yaml
+++ b/config/rbac/inference_service_editor_role.yaml
@@ -1,20 +1,20 @@
-# permissions for end users to edit serves.
+# permissions for end users to edit services.
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   labels:
     app.kubernetes.io/name: clusterrole
-    app.kubernetes.io/instance: serve-editor-role
+    app.kubernetes.io/instance: service-editor-role
     app.kubernetes.io/component: rbac
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
-  name: serve-editor-role
+  name: service-editor-role
 rules:
 - apiGroups:
-  - llmaz.inftyai.io
+  - inference.llmaz.io
   resources:
-  - serves
+  - services
   verbs:
   - create
   - delete
@@ -24,8 +24,8 @@ rules:
   - update
   - watch
 - apiGroups:
-  - llmaz.inftyai.io
+  - inference.llmaz.io
   resources:
-  - serves/status
+  - services/status
   verbs:
   - get
diff --git a/config/rbac/serve_viewer_role.yaml b/config/rbac/inference_service_viewer_role.yaml
similarity index 52%
rename from config/rbac/serve_viewer_role.yaml
rename to config/rbac/inference_service_viewer_role.yaml
index f368d8c..9d08bad 100644
--- a/config/rbac/serve_viewer_role.yaml
+++ b/config/rbac/inference_service_viewer_role.yaml
@@ -1,27 +1,27 @@
-# permissions for end users to view serves.
+# permissions for end users to view services.
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   labels:
     app.kubernetes.io/name: clusterrole
-    app.kubernetes.io/instance: serve-viewer-role
+    app.kubernetes.io/instance: service-viewer-role
     app.kubernetes.io/component: rbac
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
-  name: serve-viewer-role
+  name: service-viewer-role
 rules:
 - apiGroups:
-  - llmaz.inftyai.io
+  - inference.llmaz.io
   resources:
-  - serves
+  - services
   verbs:
   - get
   - list
   - watch
 - apiGroups:
-  - llmaz.inftyai.io
+  - inference.llmaz.io
   resources:
-  - serves/status
+  - services/status
   verbs:
   - get
diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml
index 1d231ab..1b4f59e 100644
--- a/config/rbac/leader_election_role.yaml
+++ b/config/rbac/leader_election_role.yaml
@@ -6,8 +6,8 @@ metadata:
     app.kubernetes.io/name: role
     app.kubernetes.io/instance: leader-election-role
     app.kubernetes.io/component: rbac
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: leader-election-role
 rules:
diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml
index 40c0ba5..9e982d1 100644
--- a/config/rbac/leader_election_role_binding.yaml
+++ b/config/rbac/leader_election_role_binding.yaml
@@ -5,8 +5,8 @@ metadata:
     app.kubernetes.io/name: rolebinding
     app.kubernetes.io/instance: leader-election-rolebinding
     app.kubernetes.io/component: rbac
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: leader-election-rolebinding
 roleRef:
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
index 2a76a70..4918696 100644
--- a/config/rbac/role.yaml
+++ b/config/rbac/role.yaml
@@ -5,9 +5,9 @@ metadata:
   name: manager-role
 rules:
 - apiGroups:
-  - llmaz.inftyai.io
+  - inference.llmaz.io
   resources:
-  - serves
+  - playgrounds
   verbs:
   - create
   - delete
@@ -17,15 +17,67 @@ rules:
   - update
   - watch
 - apiGroups:
-  - llmaz.inftyai.io
+  - inference.llmaz.io
   resources:
-  - serves/finalizers
+  - playgrounds/finalizers
   verbs:
   - update
 - apiGroups:
-  - llmaz.inftyai.io
+  - inference.llmaz.io
   resources:
-  - serves/status
+  - playgrounds/status
+  verbs:
+  - get
+  - patch
+  - update
+- apiGroups:
+  - inference.llmaz.io
+  resources:
+  - services
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - inference.llmaz.io
+  resources:
+  - services/finalizers
+  verbs:
+  - update
+- apiGroups:
+  - inference.llmaz.io
+  resources:
+  - services/status
+  verbs:
+  - get
+  - patch
+  - update
+- apiGroups:
+  - llmaz.io
+  resources:
+  - modelproviders
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
+- apiGroups:
+  - llmaz.io
+  resources:
+  - modelproviders/finalizers
+  verbs:
+  - update
+- apiGroups:
+  - llmaz.io
+  resources:
+  - modelproviders/status
   verbs:
   - get
   - patch
diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml
index 6e4138e..6911993 100644
--- a/config/rbac/role_binding.yaml
+++ b/config/rbac/role_binding.yaml
@@ -5,8 +5,8 @@ metadata:
     app.kubernetes.io/name: clusterrolebinding
     app.kubernetes.io/instance: manager-rolebinding
     app.kubernetes.io/component: rbac
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: manager-rolebinding
 roleRef:
diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml
index 3a2627b..09904ae 100644
--- a/config/rbac/service_account.yaml
+++ b/config/rbac/service_account.yaml
@@ -5,8 +5,8 @@ metadata:
     app.kubernetes.io/name: serviceaccount
     app.kubernetes.io/instance: controller-manager-sa
     app.kubernetes.io/component: rbac
-    app.kubernetes.io/created-by: llmaz-operator
-    app.kubernetes.io/part-of: llmaz-operator
+    app.kubernetes.io/created-by: llmaz
+    app.kubernetes.io/part-of: llmaz
     app.kubernetes.io/managed-by: kustomize
   name: controller-manager
   namespace: system
diff --git a/config/samples/_v1alpha1_modelprovider.yaml b/config/samples/_v1alpha1_modelprovider.yaml
new file mode 100644
index 0000000..70b460d
--- /dev/null
+++ b/config/samples/_v1alpha1_modelprovider.yaml
@@ -0,0 +1,12 @@
+apiVersion: llmaz.io/v1alpha1
+kind: ModelProvider
+metadata:
+  labels:
+    app.kubernetes.io/name: modelprovider
+    app.kubernetes.io/instance: modelprovider-sample
+    app.kubernetes.io/part-of: llmaz
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/created-by: llmaz
+  name: modelprovider-sample
+spec:
+  # TODO(user): Add fields here
diff --git a/config/samples/inference_v1alpha1_playground.yaml b/config/samples/inference_v1alpha1_playground.yaml
new file mode 100644
index 0000000..e1eb086
--- /dev/null
+++ b/config/samples/inference_v1alpha1_playground.yaml
@@ -0,0 +1,12 @@
+apiVersion: inference.llmaz.io/v1alpha1
+kind: Playground
+metadata:
+  labels:
+    app.kubernetes.io/name: playground
+    app.kubernetes.io/instance: playground-sample
+    app.kubernetes.io/part-of: llmaz
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/created-by: llmaz
+  name: playground-sample
+spec:
+  # TODO(user): Add fields here
diff --git a/config/samples/inference_v1alpha1_service.yaml b/config/samples/inference_v1alpha1_service.yaml
new file mode 100644
index 0000000..0d0c053
--- /dev/null
+++ b/config/samples/inference_v1alpha1_service.yaml
@@ -0,0 +1,12 @@
+apiVersion: inference.llmaz.io/v1alpha1
+kind: Service
+metadata:
+  labels:
+    app.kubernetes.io/name: service
+    app.kubernetes.io/instance: service-sample
+    app.kubernetes.io/part-of: llmaz
+    app.kubernetes.io/managed-by: kustomize
+    app.kubernetes.io/created-by: llmaz
+  name: service-sample
+spec:
+  # TODO(user): Add fields here
diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml
index c81515e..3c10859 100644
--- a/config/samples/kustomization.yaml
+++ b/config/samples/kustomization.yaml
@@ -1,4 +1,6 @@
 ## Append samples of your project ##
 resources:
-- llmaz_v1alpha1_serve.yaml
+- inference_v1alpha1_service.yaml
+- _v1alpha1_modelprovider.yaml
+- inference_v1alpha1_playground.yaml
 #+kubebuilder:scaffold:manifestskustomizesamples
diff --git a/config/samples/llmaz_v1alpha1_serve.yaml b/config/samples/llmaz_v1alpha1_serve.yaml
deleted file mode 100644
index f9f405a..0000000
--- a/config/samples/llmaz_v1alpha1_serve.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-apiVersion: llmaz.inftyai.io/v1alpha1
-kind: Serve
-metadata:
-  labels:
-    app.kubernetes.io/name: serve
-    app.kubernetes.io/instance: serve-sample
-    app.kubernetes.io/part-of: llmaz-operator
-    app.kubernetes.io/managed-by: kustomize
-    app.kubernetes.io/created-by: llmaz-operator
-  name: serve-sample
-spec:
-  # TODO(user): Add fields here
diff --git a/go.mod b/go.mod
index bad51f5..c975355 100644
--- a/go.mod
+++ b/go.mod
@@ -1,16 +1,18 @@
-module inftyai.io/llmaz
+module inftyai.com/llmaz
 
-go 1.22
+go 1.22.0
 
 toolchain go1.22.1
 
 require (
-	github.com/onsi/ginkgo/v2 v2.16.0
-	github.com/onsi/gomega v1.31.1
-	k8s.io/apimachinery v0.29.2
-	k8s.io/client-go v0.29.2
-	sigs.k8s.io/controller-runtime v0.17.2
-	sigs.k8s.io/lws v0.1.0
+	github.com/onsi/ginkgo/v2 v2.19.0
+	github.com/onsi/gomega v1.33.1
+	k8s.io/api v0.29.5
+	k8s.io/apimachinery v0.29.5
+	k8s.io/client-go v0.29.5
+	sigs.k8s.io/controller-runtime v0.17.3
+	sigs.k8s.io/lws v0.3.0
+
 )
 
 require (
@@ -25,14 +27,14 @@ require (
 	github.com/go-openapi/jsonpointer v0.19.6 // indirect
 	github.com/go-openapi/jsonreference v0.20.2 // indirect
 	github.com/go-openapi/swag v0.22.3 // indirect
-	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
-	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/gnostic-models v0.6.8 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gofuzz v1.2.0 // indirect
-	github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
+	github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
 	github.com/google/uuid v1.3.0 // indirect
 	github.com/imdario/mergo v0.3.12 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
@@ -51,24 +53,23 @@ require (
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.26.0 // indirect
 	golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
-	golang.org/x/net v0.20.0 // indirect
+	golang.org/x/net v0.25.0 // indirect
 	golang.org/x/oauth2 v0.12.0 // indirect
-	golang.org/x/sys v0.16.0 // indirect
-	golang.org/x/term v0.16.0 // indirect
-	golang.org/x/text v0.14.0 // indirect
+	golang.org/x/sys v0.20.0 // indirect
+	golang.org/x/term v0.20.0 // indirect
+	golang.org/x/text v0.15.0 // indirect
 	golang.org/x/time v0.3.0 // indirect
-	golang.org/x/tools v0.17.0 // indirect
+	golang.org/x/tools v0.21.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
 	google.golang.org/appengine v1.6.7 // indirect
-	google.golang.org/protobuf v1.31.0 // indirect
+	google.golang.org/protobuf v1.33.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	k8s.io/api v0.29.2 // indirect
-	k8s.io/apiextensions-apiserver v0.29.2 // indirect
-	k8s.io/component-base v0.29.2 // indirect
-	k8s.io/klog/v2 v2.110.1 // indirect
-	k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect
+	k8s.io/apiextensions-apiserver v0.29.5 // indirect
+	k8s.io/component-base v0.29.5 // indirect
+	k8s.io/klog/v2 v2.120.1 // indirect
+	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
 	k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
 	sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
diff --git a/go.sum b/go.sum
index f9731c3..ca568f0 100644
--- a/go.sum
+++ b/go.sum
@@ -2,9 +2,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
 github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
-github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
-github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -17,7 +14,6 @@ github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1
 github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
 github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
 github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
-github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
 github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
@@ -28,30 +24,27 @@ github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2Kv
 github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
 github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g=
 github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
-github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
-github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
-github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
 github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg=
+github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU=
 github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
@@ -78,10 +71,10 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
-github.com/onsi/ginkgo/v2 v2.16.0 h1:7q1w9frJDzninhXxjZd+Y/x54XNjG/UlRLIYPZafsPM=
-github.com/onsi/ginkgo/v2 v2.16.0/go.mod h1:llBI3WDLL9Z6taip6f33H76YcWtJv+7R3HigUjbIBOs=
-github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo=
-github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0=
+github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
+github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
+github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
+github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -102,7 +95,6 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
@@ -128,8 +120,8 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
-golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
+golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4=
 golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -137,25 +129,24 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
-golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
-golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
+golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
 golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc=
-golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps=
+golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw=
+golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -164,10 +155,8 @@ gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw
 gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
 google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
 google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
-google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
@@ -180,28 +169,28 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A=
-k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0=
-k8s.io/apiextensions-apiserver v0.29.2 h1:UK3xB5lOWSnhaCk0RFZ0LUacPZz9RY4wi/yt2Iu+btg=
-k8s.io/apiextensions-apiserver v0.29.2/go.mod h1:aLfYjpA5p3OwtqNXQFkhJ56TB+spV8Gc4wfMhUA3/b8=
-k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8=
-k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU=
-k8s.io/client-go v0.29.2 h1:FEg85el1TeZp+/vYJM7hkDlSTFZ+c5nnK44DJ4FyoRg=
-k8s.io/client-go v0.29.2/go.mod h1:knlvFZE58VpqbQpJNbCbctTVXcd35mMyAAwBdpt4jrA=
-k8s.io/component-base v0.29.2 h1:lpiLyuvPA9yV1aQwGLENYyK7n/8t6l3nn3zAtFTJYe8=
-k8s.io/component-base v0.29.2/go.mod h1:BfB3SLrefbZXiBfbM+2H1dlat21Uewg/5qtKOl8degM=
-k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0=
-k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo=
-k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780=
-k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA=
+k8s.io/api v0.29.5 h1:levS+umUigHCfI3riD36pMY1vQEbrzh4r1ivVWAhHaI=
+k8s.io/api v0.29.5/go.mod h1:7b18TtPcJzdjk7w5zWyIHgoAtpGeRvGGASxlS7UZXdQ=
+k8s.io/apiextensions-apiserver v0.29.5 h1:njDywexhE6n+1NEl3A4axT0TMQHREnndrk3/ztdWcNE=
+k8s.io/apiextensions-apiserver v0.29.5/go.mod h1:pfIvij+MH9a8NQKtW7MD4EFnzvUjJ1ZQsDL8wuP8fnc=
+k8s.io/apimachinery v0.29.5 h1:Hofa2BmPfpoT+IyDTlcPdCHSnHtEQMoJYGVoQpRTfv4=
+k8s.io/apimachinery v0.29.5/go.mod h1:i3FJVwhvSp/6n8Fl4K97PJEP8C+MM+aoDq4+ZJBf70Y=
+k8s.io/client-go v0.29.5 h1:nlASXmPQy190qTteaVP31g3c/wi2kycznkTP7Sv1zPc=
+k8s.io/client-go v0.29.5/go.mod h1:aY5CnqUUvXYccJhm47XHoPcRyX6vouHdIBHaKZGTbK4=
+k8s.io/component-base v0.29.5 h1:Ptj8AzG+p8c2a839XriHwxakDpZH9uvIgYz+o1agjg8=
+k8s.io/component-base v0.29.5/go.mod h1:9nBUoPxW/yimISIgAG7sJDrUGJlu7t8HnDafIrOdU8Q=
+k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw=
+k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
 k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI=
 k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
-sigs.k8s.io/controller-runtime v0.17.2 h1:FwHwD1CTUemg0pW2otk7/U5/i5m2ymzvOXdbeGOUvw0=
-sigs.k8s.io/controller-runtime v0.17.2/go.mod h1:+MngTvIQQQhfXtwfdGw/UOQ/aIaqsYywfCINOtwMO/s=
+sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk=
+sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY=
 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
-sigs.k8s.io/lws v0.1.0 h1:fcsAHN6BcjwcH5lgjZphVRjFy2Ack550a5CpWUqBuRQ=
-sigs.k8s.io/lws v0.1.0/go.mod h1:9wojYpN6WFa6JUWccK0DzNHuYvqa9a/npKKlVENmY1I=
+sigs.k8s.io/lws v0.3.0 h1:PtjiDHZWCxAeMyrsmPNN0i7KAVf6ocVEQFcojPWeA+k=
+sigs.k8s.io/lws v0.3.0/go.mod h1:/R1Q2LB2eg6t9mX5M6V4HLkeucxBFgOyaKkSGh/FGAY=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
 sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt
index 65b8622..ff72ff2 100644
--- a/hack/boilerplate.go.txt
+++ b/hack/boilerplate.go.txt
@@ -1,5 +1,5 @@
 /*
-Copyright 2023.
+Copyright 2024.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/internal/controller/inference/playground_controller.go b/internal/controller/inference/playground_controller.go
new file mode 100644
index 0000000..1b0f6e9
--- /dev/null
+++ b/internal/controller/inference/playground_controller.go
@@ -0,0 +1,62 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package inference
+
+import (
+	"context"
+
+	"k8s.io/apimachinery/pkg/runtime"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1"
+)
+
+// PlaygroundReconciler reconciles a Playground object
+type PlaygroundReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds/status,verbs=get;update;patch
+//+kubebuilder:rbac:groups=inference.llmaz.io,resources=playgrounds/finalizers,verbs=update
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+// TODO(user): Modify the Reconcile function to compare the state specified by
+// the Playground object against the actual cluster state, and then
+// perform operations to make the cluster state reflect the state specified by
+// the user.
+//
+// For more details, check Reconcile and its Result here:
+// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile
+func (r *PlaygroundReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	_ = log.FromContext(ctx)
+
+	// TODO(user): your logic here
+
+	return ctrl.Result{}, nil
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *PlaygroundReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&inferencev1alpha1.Playground{}).
+		Complete(r)
+}
diff --git a/internal/controller/inference/service_controller.go b/internal/controller/inference/service_controller.go
new file mode 100644
index 0000000..633710b
--- /dev/null
+++ b/internal/controller/inference/service_controller.go
@@ -0,0 +1,62 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package inference
+
+import (
+	"context"
+
+	"k8s.io/apimachinery/pkg/runtime"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1"
+)
+
+// ServiceReconciler reconciles a Service object
+type ServiceReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/status,verbs=get;update;patch
+//+kubebuilder:rbac:groups=inference.llmaz.io,resources=services/finalizers,verbs=update
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+// TODO(user): Modify the Reconcile function to compare the state specified by
+// the Service object against the actual cluster state, and then
+// perform operations to make the cluster state reflect the state specified by
+// the user.
+//
+// For more details, check Reconcile and its Result here:
+// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile
+func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	_ = log.FromContext(ctx)
+
+	// TODO(user): your logic here
+
+	return ctrl.Result{}, nil
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *ServiceReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&inferencev1alpha1.Service{}).
+		Complete(r)
+}
diff --git a/internal/controller/inference/suite_test.go b/internal/controller/inference/suite_test.go
new file mode 100644
index 0000000..c724a03
--- /dev/null
+++ b/internal/controller/inference/suite_test.go
@@ -0,0 +1,90 @@
+/*
+Copyright 2024.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package inference
+
+import (
+	"fmt"
+	"path/filepath"
+	"runtime"
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/rest"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/envtest"
+	logf "sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+
+	inferencev1alpha1 "inftyai.com/llmaz/api/inference/v1alpha1"
+	//+kubebuilder:scaffold:imports
+)
+
+// These tests use Ginkgo (BDD-style Go testing framework). Refer to
+// http://onsi.github.io/ginkgo/ to learn more about Ginkgo.
+
+var cfg *rest.Config
+var k8sClient client.Client
+var testEnv *envtest.Environment
+
+func TestControllers(t *testing.T) {
+	RegisterFailHandler(Fail)
+
+	RunSpecs(t, "Controller Suite")
+}
+
+var _ = BeforeSuite(func() {
+	logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))
+
+	By("bootstrapping test environment")
+	testEnv = &envtest.Environment{
+		CRDDirectoryPaths:     []string{filepath.Join("..", "..", "..", "config", "crd", "bases")},
+		ErrorIfCRDPathMissing: true,
+
+		// The BinaryAssetsDirectory is only required if you want to run the tests directly
+		// without call the makefile target test. If not informed it will look for the
+		// default path defined in controller-runtime which is /usr/local/kubebuilder/.
+		// Note that you must have the required binaries setup under the bin directory to perform
+		// the tests directly. When we run make test it will be setup and used automatically.
+		BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s",
+			fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)),
+	}
+
+	var err error
+	// cfg is defined in this file globally.
+	cfg, err = testEnv.Start()
+	Expect(err).NotTo(HaveOccurred())
+	Expect(cfg).NotTo(BeNil())
+
+	err = inferencev1alpha1.AddToScheme(scheme.Scheme)
+	Expect(err).NotTo(HaveOccurred())
+
+	//+kubebuilder:scaffold:scheme
+
+	k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme})
+	Expect(err).NotTo(HaveOccurred())
+	Expect(k8sClient).NotTo(BeNil())
+
+})
+
+var _ = AfterSuite(func() {
+	By("tearing down the test environment")
+	err := testEnv.Stop()
+	Expect(err).NotTo(HaveOccurred())
+})
diff --git a/internal/controller/serve_controller.go b/internal/controller/modelprovider_controller.go
similarity index 70%
rename from internal/controller/serve_controller.go
rename to internal/controller/modelprovider_controller.go
index 9c00668..cedf9ff 100644
--- a/internal/controller/serve_controller.go
+++ b/internal/controller/modelprovider_controller.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2023.
+Copyright 2024.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,29 +24,29 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 
-	llmazv1alpha1 "inftyai.io/llmaz/api/v1alpha1"
+	llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1"
 )
 
-// ServeReconciler reconciles a Serve object
-type ServeReconciler struct {
+// ModelProviderReconciler reconciles a ModelProvider object
+type ModelProviderReconciler struct {
 	client.Client
 	Scheme *runtime.Scheme
 }
 
-//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=serves,verbs=get;list;watch;create;update;patch;delete
-//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=serves/status,verbs=get;update;patch
-//+kubebuilder:rbac:groups=llmaz.inftyai.io,resources=serves/finalizers,verbs=update
+//+kubebuilder:rbac:groups=llmaz.io,resources=modelproviders,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=llmaz.io,resources=modelproviders/status,verbs=get;update;patch
+//+kubebuilder:rbac:groups=llmaz.io,resources=modelproviders/finalizers,verbs=update
 
 // Reconcile is part of the main kubernetes reconciliation loop which aims to
 // move the current state of the cluster closer to the desired state.
 // TODO(user): Modify the Reconcile function to compare the state specified by
-// the Serve object against the actual cluster state, and then
+// the ModelProvider object against the actual cluster state, and then
 // perform operations to make the cluster state reflect the state specified by
 // the user.
 //
 // For more details, check Reconcile and its Result here:
 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile
-func (r *ServeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+func (r *ModelProviderReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	_ = log.FromContext(ctx)
 
 	// TODO(user): your logic here
@@ -55,8 +55,8 @@ func (r *ServeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
 }
 
 // SetupWithManager sets up the controller with the Manager.
-func (r *ServeReconciler) SetupWithManager(mgr ctrl.Manager) error {
+func (r *ModelProviderReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
-		For(&llmazv1alpha1.Serve{}).
+		For(&llmaziov1alpha1.ModelProvider{}).
 		Complete(r)
 }
diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go
index 6331b40..f8d050b 100644
--- a/internal/controller/suite_test.go
+++ b/internal/controller/suite_test.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2023.
+Copyright 2024.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -32,7 +32,7 @@ import (
 	logf "sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 
-	llmazv1alpha1 "inftyai.io/llmaz/api/v1alpha1"
+	llmaziov1alpha1 "inftyai.com/llmaz/api/v1alpha1"
 	//+kubebuilder:scaffold:imports
 )
 
@@ -72,7 +72,7 @@ var _ = BeforeSuite(func() {
 	Expect(err).NotTo(HaveOccurred())
 	Expect(cfg).NotTo(BeNil())
 
-	err = llmazv1alpha1.AddToScheme(scheme.Scheme)
+	err = llmaziov1alpha1.AddToScheme(scheme.Scheme)
 	Expect(err).NotTo(HaveOccurred())
 
 	//+kubebuilder:scaffold:scheme