From cd2963192bedb45b8d3175a8ac840163ef83505f Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Tue, 18 Feb 2025 11:21:13 +0800
Subject: [PATCH 1/3] Add recommendedConfigs to backendRuntime

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 .../v1alpha1/backendruntime_types.go          |   61 +-
 api/inference/v1alpha1/config_types.go        |   56 +-
 api/inference/v1alpha1/playground_types.go    |   15 +-
 .../v1alpha1/zz_generated.deepcopy.go         |  143 +-
 chart/templates/backends/llamacpp.yaml        |   29 +-
 chart/templates/backends/ollama.yaml          |   34 +-
 chart/templates/backends/sglang.yaml          |   18 +-
 chart/templates/backends/tgi.yaml             |   18 +-
 chart/templates/backends/vllm.yaml            |   22 +-
 .../inference/v1alpha1/backendruntimearg.go   |   49 -
 .../v1alpha1/backendruntimeconfig.go          |   48 +-
 .../inference/v1alpha1/elasticconfig.go       |   22 +-
 .../inference/v1alpha1/scaletriggerref.go     |   38 -
 client-go/applyconfiguration/utils.go         |    4 -
 .../inference.llmaz.io_backendruntimes.yaml   | 1236 +++++++++--------
 .../bases/inference.llmaz.io_playgrounds.yaml |  127 +-
 docs/examples/hpa/playground.yaml             |   13 +-
 docs/examples/llamacpp/playground.yaml        |    7 +-
 docs/examples/ollama/playground.yaml          |    2 +-
 docs/examples/sglang/playground.yaml          |    2 +-
 .../llamacpp/playground.yaml                  |    6 +-
 .../inference/playground_controller.go        |  127 +-
 .../inference/service_controller.go           |    2 +-
 .../{ => backendruntime}/backendruntime.go    |   67 +-
 .../backendruntime_test.go                    |    0
 pkg/controller_helper/helper.go               |   16 +-
 .../{model_source => modelsource}/modelhub.go |    0
 .../modelsource.go                            |    0
 .../modelsource_test.go                       |    0
 .../{model_source => modelsource}/uri.go      |    0
 pkg/webhook/backendruntime_webhook.go         |   21 +-
 pkg/webhook/openmodel_webhook.go              |    2 +-
 pkg/webhook/playground_webhook.go             |   10 +-
 pkg/webhook/service_webhook.go                |    2 +-
 test/config/backends/fake_backend.yaml        |   51 +-
 test/config/backends/llamacpp.yaml            |   22 +-
 test/config/backends/ollama.yaml              |   22 +-
 test/config/backends/sglang.yaml              |   20 +-
 test/config/backends/tgi.yaml                 |   18 +-
 test/config/backends/vllm.yaml                |   22 +-
 test/e2e/playground_test.go                   |    4 +-
 .../controller/inference/hpa_test.go          |    9 +-
 .../controller/inference/playground_test.go   |   12 +-
 .../webhook/backendruntime_test.go            |   13 +-
 test/util/mock.go                             |    2 +-
 test/util/validation/validate_playground.go   |  222 ++-
 test/util/validation/validate_service.go      |    2 +-
 test/util/wrapper/backend.go                  |   78 +-
 test/util/wrapper/playground.go               |   36 +-
 49 files changed, 1322 insertions(+), 1408 deletions(-)
 delete mode 100644 client-go/applyconfiguration/inference/v1alpha1/backendruntimearg.go
 delete mode 100644 client-go/applyconfiguration/inference/v1alpha1/scaletriggerref.go
 rename pkg/controller_helper/{ => backendruntime}/backendruntime.go (66%)
 rename pkg/controller_helper/{ => backendruntime}/backendruntime_test.go (100%)
 rename pkg/controller_helper/{model_source => modelsource}/modelhub.go (100%)
 rename pkg/controller_helper/{model_source => modelsource}/modelsource.go (100%)
 rename pkg/controller_helper/{model_source => modelsource}/modelsource_test.go (100%)
 rename pkg/controller_helper/{model_source => modelsource}/uri.go (100%)

diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go
index cee21e2d..d26829df 100644
--- a/api/inference/v1alpha1/backendruntime_types.go
+++ b/api/inference/v1alpha1/backendruntime_types.go
@@ -19,22 +19,10 @@ package v1alpha1
 import (
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
-// BackendRuntimeArg is the preset arguments for easy to use.
-// Three preset names are provided: default, speculative-decoding, model-parallelism,
-// do not change the name.
-type BackendRuntimeArg struct {
-	// Name represents the identifier of the backendRuntime argument.
-	// +kubebuilder:default=default
-	// +optional
-	Name *string `json:"name,omitempty"`
-	// Flags represents all the preset configurations.
-	// Flag around with {{ .CONFIG }} is a configuration waiting for render.
-	Flags []string `json:"flags,omitempty"`
-}
-
 // HPATrigger represents the configuration of the HorizontalPodAutoscaler.
 // Inspired by kubernetes.io/pkg/apis/autoscaling/types.go#HorizontalPodAutoscalerSpec.
 // Note: HPA component should be installed in prior.
@@ -55,17 +43,6 @@ type HPATrigger struct {
 	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
 }
 
-// NamedScaleTrigger defines the rules to scale the workloads.
-// Only one trigger cloud work at a time. The name is used to identify
-// the trigger in backendRuntime.
-type NamedScaleTrigger struct {
-	// Name represents the identifier of the scale trigger, e.g. some triggers defined for
-	// latency sensitive workloads, some are defined for throughput sensitive workloads.
-	Name string `json:"name,omitempty"`
-	// HPA represents the trigger configuration of the HorizontalPodAutoscaler.
-	HPA *HPATrigger `json:"hpa,omitempty"`
-}
-
 // ScaleTrigger defines the rules to scale the workloads.
 // Only one trigger cloud work at a time, mostly used in Playground.
 type ScaleTrigger struct {
@@ -83,6 +60,30 @@ type MultiHostCommands struct {
 	Worker []string `json:"worker,omitempty"`
 }
 
+// RecommendedConfig represents the recommended configurations for the backendRuntime,
+// user can choose one of them to apply.
+type RecommendedConfig struct {
+	// Name represents the identifier of the config.
+	Name string `json:"name"`
+	// Args represents all the arguments for the command.
+	// Argument around with {{ .CONFIG }} is a configuration waiting for render.
+	// +optional
+	Args []string `json:"args,omitempty"`
+	// Resources represents the resource requirements for backend, like cpu/mem,
+	// accelerators like GPU should not be defined here, but at the model flavors,
+	// or the values here will be overwritten.
+	// +optional
+	Resources *ResourceRequirements `json:"resources,omitempty"`
+	// SharedMemorySize represents the size of /dev/shm required in the runtime of
+	// inference workload.
+	// +optional
+	SharedMemorySize *resource.Quantity `json:"sharedMemorySize,omitempty"`
+	// ScaleTrigger defines the rules to scale the workloads.
+	// Only one trigger cloud work at a time.
+	// +optional
+	ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
+}
+
 // BackendRuntimeSpec defines the desired state of BackendRuntime
 type BackendRuntimeSpec struct {
 	// Commands represents the default commands for the backendRuntime.
@@ -98,16 +99,9 @@ type BackendRuntimeSpec struct {
 	// Version represents the default version of the backendRuntime.
 	// It will be appended to the image as a tag.
 	Version string `json:"version"`
-	// Args represents the preset arguments of the backendRuntime.
-	// They can be appended or overwritten by the Playground backendRuntimeConfig.
-	Args []BackendRuntimeArg `json:"args,omitempty"`
 	// Envs represents the environments set to the container.
 	// +optional
 	Envs []corev1.EnvVar `json:"envs,omitempty"`
-	// Resources represents the resource requirements for backendRuntime, like cpu/mem,
-	// accelerators like GPU should not be defined here, but at the model flavors,
-	// or the values here will be overwritten.
-	Resources ResourceRequirements `json:"resources"`
 	// Periodic probe of backend liveness.
 	// Backend will be restarted if the probe fails.
 	// Cannot be updated.
@@ -124,10 +118,9 @@ type BackendRuntimeSpec struct {
 	// when it might take a long time to load data or warm a cache, than during steady-state operation.
 	// +optional
 	StartupProbe *corev1.Probe `json:"startupProbe,omitempty"`
-	// ScaleTriggers represents a set of triggers preset to be used by Playground.
-	// If Playground not specify the scale trigger, the 0-index trigger will be used.
+	// RecommendedConfigs represents the recommended configurations for the backendRuntime.
 	// +optional
-	ScaleTriggers []NamedScaleTrigger `json:"scaleTriggers,omitempty"`
+	RecommendedConfigs []RecommendedConfig `json:"recommendedConfigs,omitempty"`
 }
 
 // BackendRuntimeStatus defines the observed state of BackendRuntime
diff --git a/api/inference/v1alpha1/config_types.go b/api/inference/v1alpha1/config_types.go
index 3bcf7fca..d95add61 100644
--- a/api/inference/v1alpha1/config_types.go
+++ b/api/inference/v1alpha1/config_types.go
@@ -28,10 +28,10 @@ const (
 )
 
 type BackendRuntimeConfig struct {
-	// Name represents the inference backend under the hood, e.g. vLLM.
+	// BackendName represents the inference backend under the hood, e.g. vLLM.
 	// +kubebuilder:default=vllm
 	// +optional
-	Name *BackendName `json:"name,omitempty"`
+	BackendName *BackendName `json:"backendName,omitempty"`
 	// Version represents the backend version if you want a different one
 	// from the default version.
 	// +optional
@@ -39,18 +39,32 @@ type BackendRuntimeConfig struct {
 	// Envs represents the environments set to the container.
 	// +optional
 	Envs []corev1.EnvVar `json:"envs,omitempty"`
-
+	// ConfigName represents the recommended configuration name for the backend,
+	// It will be inferred from the models in the runtime if not specified, e.g. default,
+	// speculative-decoding or model-parallelism.
+	ConfigName *string `json:"configName,omitempty"`
+	// Args represents all the arguments for the command.
+	// Argument around with {{ .CONFIG }} is a configuration waiting for render.
+	// +optional
+	// Args defined here will "append" the args in the recommendedConfig.
+	// +optional
+	Args []string `json:"args,omitempty"`
 	// Resources represents the resource requirements for backend, like cpu/mem,
 	// accelerators like GPU should not be defined here, but at the model flavors,
 	// or the values here will be overwritten.
+	// Resources defined here will "overwrite" the resources in the recommendedConfig.
+	// +optional
 	Resources *ResourceRequirements `json:"resources,omitempty"`
 	// SharedMemorySize represents the size of /dev/shm required in the runtime of
 	// inference workload.
+	// SharedMemorySize defined here will "overwrite" the sharedMemorySize in the recommendedConfig.
 	// +optional
 	SharedMemorySize *resource.Quantity `json:"sharedMemorySize,omitempty"`
-	// Args represents the specified arguments of the backendRuntime,
-	// will be append to the backendRuntime.spec.Args.
-	Args *BackendRuntimeArg `json:"args,omitempty"`
+	// ScaleTrigger defines the rules to scale the workloads.
+	// Only one trigger cloud work at a time, mostly used in Playground.
+	// ScaleTrigger defined here will "overwrite" the scaleTrigger in the recommendedConfig.
+	// +optional
+	ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
 }
 
 // TODO: Do not support DRA yet, we can support that once needed.
@@ -66,33 +80,3 @@ type ResourceRequirements struct {
 	// +optional
 	Requests corev1.ResourceList `json:"requests,omitempty"`
 }
-
-// ScaleTriggerRef refers to the configured scaleTrigger in the backendRuntime.
-type ScaleTriggerRef struct {
-	// Name represents the scale trigger name defined in the backendRuntime.scaleTriggers.
-	Name string `json:"name"`
-}
-
-type ElasticConfig struct {
-	// MinReplicas indicates the minimum number of inference workloads based on the traffic.
-	// Default to 1.
-	// MinReplicas couldn't be 0 now, will support serverless in the future.
-	// +kubebuilder:default=1
-	// +optional
-	MinReplicas *int32 `json:"minReplicas,omitempty"`
-	// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
-	// Default to nil means there's no limit for the instance number.
-	// +optional
-	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
-	// ScaleTriggerRef refers to the configured scaleTrigger in the backendRuntime
-	// with tuned target value.
-	// ScaleTriggerRef and ScaleTrigger can't be set at the same time.
-	// +optional
-	ScaleTriggerRef *ScaleTriggerRef `json:"scaleTriggerRef,omitempty"`
-	// ScaleTrigger defines a set of triggers to scale the workloads.
-	// If not defined, trigger configured in backendRuntime will be used,
-	// otherwise, trigger defined here will overwrite the defaulted ones.
-	// ScaleTriggerRef and ScaleTrigger can't be set at the same time.
-	// +optional
-	ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
-}
diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go
index 1afaf33a..4c18ae75 100644
--- a/api/inference/v1alpha1/playground_types.go
+++ b/api/inference/v1alpha1/playground_types.go
@@ -44,11 +44,22 @@ type PlaygroundSpec struct {
 	BackendRuntimeConfig *BackendRuntimeConfig `json:"backendRuntimeConfig,omitempty"`
 	// ElasticConfig defines the configuration for elastic usage,
 	// e.g. the max/min replicas.
-	// Note: this requires to install the HPA first or will report error.
-	// +optional
 	ElasticConfig *ElasticConfig `json:"elasticConfig,omitempty"`
 }
 
+type ElasticConfig struct {
+	// MinReplicas indicates the minimum number of inference workloads based on the traffic.
+	// Default to 1.
+	// MinReplicas couldn't be 0 now, will support serverless in the future.
+	// +kubebuilder:default=1
+	// +optional
+	MinReplicas *int32 `json:"minReplicas,omitempty"`
+	// MaxReplicas indicates the maximum number of inference workloads based on the traffic.
+	// Default to nil means there's no limit for the instance number.
+	// +optional
+	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
+}
+
 const (
 	// PlaygroundProgressing means the Playground is progressing now, such as waiting for the
 	// inference service creation, rolling update or scaling up and down.
diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go
index 4a7a8cff..cde4189b 100644
--- a/api/inference/v1alpha1/zz_generated.deepcopy.go
+++ b/api/inference/v1alpha1/zz_generated.deepcopy.go
@@ -55,36 +55,11 @@ func (in *BackendRuntime) DeepCopyObject() runtime.Object {
 	return nil
 }
 
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *BackendRuntimeArg) DeepCopyInto(out *BackendRuntimeArg) {
-	*out = *in
-	if in.Name != nil {
-		in, out := &in.Name, &out.Name
-		*out = new(string)
-		**out = **in
-	}
-	if in.Flags != nil {
-		in, out := &in.Flags, &out.Flags
-		*out = make([]string, len(*in))
-		copy(*out, *in)
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackendRuntimeArg.
-func (in *BackendRuntimeArg) DeepCopy() *BackendRuntimeArg {
-	if in == nil {
-		return nil
-	}
-	out := new(BackendRuntimeArg)
-	in.DeepCopyInto(out)
-	return out
-}
-
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *BackendRuntimeConfig) DeepCopyInto(out *BackendRuntimeConfig) {
 	*out = *in
-	if in.Name != nil {
-		in, out := &in.Name, &out.Name
+	if in.BackendName != nil {
+		in, out := &in.BackendName, &out.BackendName
 		*out = new(BackendName)
 		**out = **in
 	}
@@ -93,11 +68,6 @@ func (in *BackendRuntimeConfig) DeepCopyInto(out *BackendRuntimeConfig) {
 		*out = new(string)
 		**out = **in
 	}
-	if in.Args != nil {
-		in, out := &in.Args, &out.Args
-		*out = new(BackendRuntimeArg)
-		(*in).DeepCopyInto(*out)
-	}
 	if in.Envs != nil {
 		in, out := &in.Envs, &out.Envs
 		*out = make([]v1.EnvVar, len(*in))
@@ -105,6 +75,16 @@ func (in *BackendRuntimeConfig) DeepCopyInto(out *BackendRuntimeConfig) {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
 	}
+	if in.ConfigName != nil {
+		in, out := &in.ConfigName, &out.ConfigName
+		*out = new(string)
+		**out = **in
+	}
+	if in.Args != nil {
+		in, out := &in.Args, &out.Args
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
 	if in.Resources != nil {
 		in, out := &in.Resources, &out.Resources
 		*out = new(ResourceRequirements)
@@ -115,6 +95,11 @@ func (in *BackendRuntimeConfig) DeepCopyInto(out *BackendRuntimeConfig) {
 		x := (*in).DeepCopy()
 		*out = &x
 	}
+	if in.ScaleTrigger != nil {
+		in, out := &in.ScaleTrigger, &out.ScaleTrigger
+		*out = new(ScaleTrigger)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackendRuntimeConfig.
@@ -172,13 +157,6 @@ func (in *BackendRuntimeSpec) DeepCopyInto(out *BackendRuntimeSpec) {
 		*out = new(MultiHostCommands)
 		(*in).DeepCopyInto(*out)
 	}
-	if in.Args != nil {
-		in, out := &in.Args, &out.Args
-		*out = make([]BackendRuntimeArg, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
 	if in.Envs != nil {
 		in, out := &in.Envs, &out.Envs
 		*out = make([]v1.EnvVar, len(*in))
@@ -186,7 +164,6 @@ func (in *BackendRuntimeSpec) DeepCopyInto(out *BackendRuntimeSpec) {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
 	}
-	in.Resources.DeepCopyInto(&out.Resources)
 	if in.LivenessProbe != nil {
 		in, out := &in.LivenessProbe, &out.LivenessProbe
 		*out = new(v1.Probe)
@@ -202,9 +179,9 @@ func (in *BackendRuntimeSpec) DeepCopyInto(out *BackendRuntimeSpec) {
 		*out = new(v1.Probe)
 		(*in).DeepCopyInto(*out)
 	}
-	if in.ScaleTriggers != nil {
-		in, out := &in.ScaleTriggers, &out.ScaleTriggers
-		*out = make([]NamedScaleTrigger, len(*in))
+	if in.RecommendedConfigs != nil {
+		in, out := &in.RecommendedConfigs, &out.RecommendedConfigs
+		*out = make([]RecommendedConfig, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
@@ -256,16 +233,6 @@ func (in *ElasticConfig) DeepCopyInto(out *ElasticConfig) {
 		*out = new(int32)
 		**out = **in
 	}
-	if in.ScaleTriggerRef != nil {
-		in, out := &in.ScaleTriggerRef, &out.ScaleTriggerRef
-		*out = new(ScaleTriggerRef)
-		**out = **in
-	}
-	if in.ScaleTrigger != nil {
-		in, out := &in.ScaleTrigger, &out.ScaleTrigger
-		*out = new(ScaleTrigger)
-		(*in).DeepCopyInto(*out)
-	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticConfig.
@@ -330,26 +297,6 @@ func (in *MultiHostCommands) DeepCopy() *MultiHostCommands {
 	return out
 }
 
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *NamedScaleTrigger) DeepCopyInto(out *NamedScaleTrigger) {
-	*out = *in
-	if in.HPA != nil {
-		in, out := &in.HPA, &out.HPA
-		*out = new(HPATrigger)
-		(*in).DeepCopyInto(*out)
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NamedScaleTrigger.
-func (in *NamedScaleTrigger) DeepCopy() *NamedScaleTrigger {
-	if in == nil {
-		return nil
-	}
-	out := new(NamedScaleTrigger)
-	in.DeepCopyInto(out)
-	return out
-}
-
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *Playground) DeepCopyInto(out *Playground) {
 	*out = *in
@@ -471,6 +418,41 @@ func (in *PlaygroundStatus) DeepCopy() *PlaygroundStatus {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RecommendedConfig) DeepCopyInto(out *RecommendedConfig) {
+	*out = *in
+	if in.Args != nil {
+		in, out := &in.Args, &out.Args
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+	if in.Resources != nil {
+		in, out := &in.Resources, &out.Resources
+		*out = new(ResourceRequirements)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.SharedMemorySize != nil {
+		in, out := &in.SharedMemorySize, &out.SharedMemorySize
+		x := (*in).DeepCopy()
+		*out = &x
+	}
+	if in.ScaleTrigger != nil {
+		in, out := &in.ScaleTrigger, &out.ScaleTrigger
+		*out = new(ScaleTrigger)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RecommendedConfig.
+func (in *RecommendedConfig) DeepCopy() *RecommendedConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(RecommendedConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ResourceRequirements) DeepCopyInto(out *ResourceRequirements) {
 	*out = *in
@@ -520,21 +502,6 @@ func (in *ScaleTrigger) DeepCopy() *ScaleTrigger {
 	return out
 }
 
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ScaleTriggerRef) DeepCopyInto(out *ScaleTriggerRef) {
-	*out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScaleTriggerRef.
-func (in *ScaleTriggerRef) DeepCopy() *ScaleTriggerRef {
-	if in == nil {
-		return nil
-	}
-	out := new(ScaleTriggerRef)
-	in.DeepCopyInto(out)
-	return out
-}
-
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *Service) DeepCopyInto(out *Service) {
 	*out = *in
diff --git a/chart/templates/backends/llamacpp.yaml b/chart/templates/backends/llamacpp.yaml
index 2b85c24c..2923f07d 100644
--- a/chart/templates/backends/llamacpp.yaml
+++ b/chart/templates/backends/llamacpp.yaml
@@ -14,33 +14,22 @@ spec:
   version: server
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
-  args:
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - -m
         - "{{`{{ .ModelPath }}`}}"
         - --host
         - "0.0.0.0"
         - --port
         - "8080"
-    # TODO: not supported yet, see https://github.com/InftyAI/llmaz/issues/240.
-    # - name: speculative-decoding
-    #   flags:
-    #     - -m
-    #     - "{{`{{ .ModelPath }}`}}"
-    #     - -md
-    #     - "{{`{{ .DraftModelPath }}`}}"
-    #     - --host
-    #     - "0.0.0.0"
-    #     - --port
-    #     - "8080"
-  resources:
-    requests:
-      cpu: 2
-      memory: 4Gi
-    limits:
-      cpu: 2
-      memory: 4Gi
+      resources:
+        requests:
+          cpu: 2
+          memory: 4Gi
+        limits:
+          cpu: 2
+          memory: 4Gi
   startupProbe:
     periodSeconds: 10
     failureThreshold: 30
diff --git a/chart/templates/backends/ollama.yaml b/chart/templates/backends/ollama.yaml
index e931d616..83efb1d7 100644
--- a/chart/templates/backends/ollama.yaml
+++ b/chart/templates/backends/ollama.yaml
@@ -13,24 +13,24 @@ spec:
     - -c
   image: ollama/ollama
   version: latest
+  envs:
+    - name: OLLAMA_HOST
+      value: 0.0.0.0:8080
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
-  args:
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - "ollama serve &
-           while true; do output=$(ollama list 2>&1);
-           if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;
-           ollama run {{`{{ .ModelName }}`}};
-           while true;do sleep 60;done"
-  envs:
-    - name: OLLAMA_HOST
-      value: 0.0.0.0:8080
-  resources:
-    requests:
-      cpu: 2
-      memory: 4Gi
-    limits:
-      cpu: 2
-      memory: 4Gi
-{{- end }}
\ No newline at end of file
+          while true; do output=$(ollama list 2>&1);
+          if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;
+          ollama run {{`{{ .ModelName }}`}};
+          while true;do sleep 60;done"
+      resources:
+        requests:
+          cpu: 2
+          memory: 4Gi
+        limits:
+          cpu: 2
+          memory: 4Gi
+{{- end }}
diff --git a/chart/templates/backends/sglang.yaml b/chart/templates/backends/sglang.yaml
index 86a5b44d..335b3a01 100644
--- a/chart/templates/backends/sglang.yaml
+++ b/chart/templates/backends/sglang.yaml
@@ -16,9 +16,9 @@ spec:
   version: v0.2.10-cu121
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
-  args:
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - --model-path
         - "{{`{{ .ModelPath }}`}}"
         - --served-model-name
@@ -27,13 +27,13 @@ spec:
         - "0.0.0.0"
         - --port
         - "8080"
-  resources:
-    requests:
-      cpu: 4
-      memory: 8Gi
-    limits:
-      cpu: 4
-      memory: 8Gi
+      resources:
+        requests:
+          cpu: 4
+          memory: 8Gi
+        limits:
+          cpu: 4
+          memory: 8Gi
   startupProbe:
     periodSeconds: 10
     failureThreshold: 30
diff --git a/chart/templates/backends/tgi.yaml b/chart/templates/backends/tgi.yaml
index 5a2bd87c..812be7e0 100644
--- a/chart/templates/backends/tgi.yaml
+++ b/chart/templates/backends/tgi.yaml
@@ -12,20 +12,20 @@ spec:
   version: 2.3.1
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
-  args:
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - --model-id
         - "{{`{{ .ModelPath }}`}}"
         - --port
         - "8080"
-  resources:
-    requests:
-      cpu: 4
-      memory: 8Gi
-    limits:
-      cpu: 4
-      memory: 8Gi
+      resources:
+        requests:
+          cpu: 4
+          memory: 8Gi
+        limits:
+          cpu: 4
+          memory: 8Gi
   startupProbe:
     periodSeconds: 10
     failureThreshold: 30
diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml
index 2a2888c0..318cc87c 100644
--- a/chart/templates/backends/vllm.yaml
+++ b/chart/templates/backends/vllm.yaml
@@ -59,9 +59,9 @@ spec:
   version: v0.6.0
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
-  args:
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - --model
         - "{{`{{ .ModelPath }}`}}"
         - --served-model-name
@@ -70,8 +70,15 @@ spec:
         - "0.0.0.0"
         - --port
         - "8080"
+      resources:
+        requests:
+          cpu: 4
+          memory: 8Gi
+        limits:
+          cpu: 4
+          memory: 8Gi
     - name: speculative-decoding
-      flags:
+      args:
         - --model
         - "{{`{{ .ModelPath }}`}}"
         - --served-model-name
@@ -87,7 +94,7 @@ spec:
         - -tp
         - "1"
     - name: model-parallelism
-      flags:
+      args:
         - --model
         - "{{`{{ .ModelPath }}`}}"
         - --served-model-name
@@ -100,13 +107,6 @@ spec:
         - "{{`{{ .TP }}`}}"
         - --pipeline-parallel-size
         - "{{`{{ .PP }}`}}"
-  resources:
-    requests:
-      cpu: 4
-      memory: 8Gi
-    limits:
-      cpu: 4
-      memory: 8Gi
   startupProbe:
     periodSeconds: 10
     failureThreshold: 30
diff --git a/client-go/applyconfiguration/inference/v1alpha1/backendruntimearg.go b/client-go/applyconfiguration/inference/v1alpha1/backendruntimearg.go
deleted file mode 100644
index 231aa87a..00000000
--- a/client-go/applyconfiguration/inference/v1alpha1/backendruntimearg.go
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
-Copyright 2024.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-// BackendRuntimeArgApplyConfiguration represents a declarative configuration of the BackendRuntimeArg type for use
-// with apply.
-type BackendRuntimeArgApplyConfiguration struct {
-	Name  *string  `json:"name,omitempty"`
-	Flags []string `json:"flags,omitempty"`
-}
-
-// BackendRuntimeArgApplyConfiguration constructs a declarative configuration of the BackendRuntimeArg type for use with
-// apply.
-func BackendRuntimeArg() *BackendRuntimeArgApplyConfiguration {
-	return &BackendRuntimeArgApplyConfiguration{}
-}
-
-// WithName sets the Name field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *BackendRuntimeArgApplyConfiguration) WithName(value string) *BackendRuntimeArgApplyConfiguration {
-	b.Name = &value
-	return b
-}
-
-// WithFlags adds the given value to the Flags field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the Flags field.
-func (b *BackendRuntimeArgApplyConfiguration) WithFlags(values ...string) *BackendRuntimeArgApplyConfiguration {
-	for i := range values {
-		b.Flags = append(b.Flags, values[i])
-	}
-	return b
-}
diff --git a/client-go/applyconfiguration/inference/v1alpha1/backendruntimeconfig.go b/client-go/applyconfiguration/inference/v1alpha1/backendruntimeconfig.go
index 9f34a792..3cda1928 100644
--- a/client-go/applyconfiguration/inference/v1alpha1/backendruntimeconfig.go
+++ b/client-go/applyconfiguration/inference/v1alpha1/backendruntimeconfig.go
@@ -26,12 +26,14 @@ import (
 // BackendRuntimeConfigApplyConfiguration represents a declarative configuration of the BackendRuntimeConfig type for use
 // with apply.
 type BackendRuntimeConfigApplyConfiguration struct {
-	Name             *inferencev1alpha1.BackendName          `json:"name,omitempty"`
+	BackendName      *inferencev1alpha1.BackendName          `json:"backendName,omitempty"`
 	Version          *string                                 `json:"version,omitempty"`
-	Args             *BackendRuntimeArgApplyConfiguration    `json:"args,omitempty"`
 	Envs             []v1.EnvVar                             `json:"envs,omitempty"`
+	ConfigName       *string                                 `json:"configName,omitempty"`
+	Args             []string                                `json:"flags,omitempty"`
 	Resources        *ResourceRequirementsApplyConfiguration `json:"resources,omitempty"`
 	SharedMemorySize *resource.Quantity                      `json:"sharedMemorySize,omitempty"`
+	ScaleTrigger     *ScaleTriggerApplyConfiguration         `json:"scaleTrigger,omitempty"`
 }
 
 // BackendRuntimeConfigApplyConfiguration constructs a declarative configuration of the BackendRuntimeConfig type for use with
@@ -40,11 +42,11 @@ func BackendRuntimeConfig() *BackendRuntimeConfigApplyConfiguration {
 	return &BackendRuntimeConfigApplyConfiguration{}
 }
 
-// WithName sets the Name field in the declarative configuration to the given value
+// WithBackendName sets the BackendName field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *BackendRuntimeConfigApplyConfiguration) WithName(value inferencev1alpha1.BackendName) *BackendRuntimeConfigApplyConfiguration {
-	b.Name = &value
+// If called multiple times, the BackendName field is set to the value of the last call.
+func (b *BackendRuntimeConfigApplyConfiguration) WithBackendName(value inferencev1alpha1.BackendName) *BackendRuntimeConfigApplyConfiguration {
+	b.BackendName = &value
 	return b
 }
 
@@ -56,14 +58,6 @@ func (b *BackendRuntimeConfigApplyConfiguration) WithVersion(value string) *Back
 	return b
 }
 
-// WithArgs sets the Args field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Args field is set to the value of the last call.
-func (b *BackendRuntimeConfigApplyConfiguration) WithArgs(value *BackendRuntimeArgApplyConfiguration) *BackendRuntimeConfigApplyConfiguration {
-	b.Args = value
-	return b
-}
-
 // WithEnvs adds the given value to the Envs field in the declarative configuration
 // and returns the receiver, so that objects can be build by chaining "With" function invocations.
 // If called multiple times, values provided by each call will be appended to the Envs field.
@@ -74,6 +68,24 @@ func (b *BackendRuntimeConfigApplyConfiguration) WithEnvs(values ...v1.EnvVar) *
 	return b
 }
 
+// WithConfigName sets the ConfigName field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ConfigName field is set to the value of the last call.
+func (b *BackendRuntimeConfigApplyConfiguration) WithConfigName(value string) *BackendRuntimeConfigApplyConfiguration {
+	b.ConfigName = &value
+	return b
+}
+
+// WithArgs adds the given value to the Args field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Args field.
+func (b *BackendRuntimeConfigApplyConfiguration) WithArgs(values ...string) *BackendRuntimeConfigApplyConfiguration {
+	for i := range values {
+		b.Args = append(b.Args, values[i])
+	}
+	return b
+}
+
 // WithResources sets the Resources field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the Resources field is set to the value of the last call.
@@ -89,3 +101,11 @@ func (b *BackendRuntimeConfigApplyConfiguration) WithSharedMemorySize(value reso
 	b.SharedMemorySize = &value
 	return b
 }
+
+// WithScaleTrigger sets the ScaleTrigger field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ScaleTrigger field is set to the value of the last call.
+func (b *BackendRuntimeConfigApplyConfiguration) WithScaleTrigger(value *ScaleTriggerApplyConfiguration) *BackendRuntimeConfigApplyConfiguration {
+	b.ScaleTrigger = value
+	return b
+}
diff --git a/client-go/applyconfiguration/inference/v1alpha1/elasticconfig.go b/client-go/applyconfiguration/inference/v1alpha1/elasticconfig.go
index 7603a088..69a06a75 100644
--- a/client-go/applyconfiguration/inference/v1alpha1/elasticconfig.go
+++ b/client-go/applyconfiguration/inference/v1alpha1/elasticconfig.go
@@ -20,10 +20,8 @@ package v1alpha1
 // ElasticConfigApplyConfiguration represents a declarative configuration of the ElasticConfig type for use
 // with apply.
 type ElasticConfigApplyConfiguration struct {
-	MinReplicas     *int32                             `json:"minReplicas,omitempty"`
-	MaxReplicas     *int32                             `json:"maxReplicas,omitempty"`
-	ScaleTriggerRef *ScaleTriggerRefApplyConfiguration `json:"scaleTriggerRef,omitempty"`
-	ScaleTrigger    *ScaleTriggerApplyConfiguration    `json:"scaleTrigger,omitempty"`
+	MinReplicas *int32 `json:"minReplicas,omitempty"`
+	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
 }
 
 // ElasticConfigApplyConfiguration constructs a declarative configuration of the ElasticConfig type for use with
@@ -47,19 +45,3 @@ func (b *ElasticConfigApplyConfiguration) WithMaxReplicas(value int32) *ElasticC
 	b.MaxReplicas = &value
 	return b
 }
-
-// WithScaleTriggerRef sets the ScaleTriggerRef field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the ScaleTriggerRef field is set to the value of the last call.
-func (b *ElasticConfigApplyConfiguration) WithScaleTriggerRef(value *ScaleTriggerRefApplyConfiguration) *ElasticConfigApplyConfiguration {
-	b.ScaleTriggerRef = value
-	return b
-}
-
-// WithScaleTrigger sets the ScaleTrigger field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the ScaleTrigger field is set to the value of the last call.
-func (b *ElasticConfigApplyConfiguration) WithScaleTrigger(value *ScaleTriggerApplyConfiguration) *ElasticConfigApplyConfiguration {
-	b.ScaleTrigger = value
-	return b
-}
diff --git a/client-go/applyconfiguration/inference/v1alpha1/scaletriggerref.go b/client-go/applyconfiguration/inference/v1alpha1/scaletriggerref.go
deleted file mode 100644
index ba87d027..00000000
--- a/client-go/applyconfiguration/inference/v1alpha1/scaletriggerref.go
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
-Copyright 2024.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-// ScaleTriggerRefApplyConfiguration represents a declarative configuration of the ScaleTriggerRef type for use
-// with apply.
-type ScaleTriggerRefApplyConfiguration struct {
-	Name *string `json:"name,omitempty"`
-}
-
-// ScaleTriggerRefApplyConfiguration constructs a declarative configuration of the ScaleTriggerRef type for use with
-// apply.
-func ScaleTriggerRef() *ScaleTriggerRefApplyConfiguration {
-	return &ScaleTriggerRefApplyConfiguration{}
-}
-
-// WithName sets the Name field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *ScaleTriggerRefApplyConfiguration) WithName(value string) *ScaleTriggerRefApplyConfiguration {
-	b.Name = &value
-	return b
-}
diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go
index 62e75b80..e412064f 100644
--- a/client-go/applyconfiguration/utils.go
+++ b/client-go/applyconfiguration/utils.go
@@ -33,8 +33,6 @@ import (
 func ForKind(kind schema.GroupVersionKind) interface{} {
 	switch kind {
 	// Group=inference.llmaz.io, Version=v1alpha1
-	case v1alpha1.SchemeGroupVersion.WithKind("BackendRuntimeArg"):
-		return &inferencev1alpha1.BackendRuntimeArgApplyConfiguration{}
 	case v1alpha1.SchemeGroupVersion.WithKind("BackendRuntimeConfig"):
 		return &inferencev1alpha1.BackendRuntimeConfigApplyConfiguration{}
 	case v1alpha1.SchemeGroupVersion.WithKind("ElasticConfig"):
@@ -51,8 +49,6 @@ func ForKind(kind schema.GroupVersionKind) interface{} {
 		return &inferencev1alpha1.ResourceRequirementsApplyConfiguration{}
 	case v1alpha1.SchemeGroupVersion.WithKind("ScaleTrigger"):
 		return &inferencev1alpha1.ScaleTriggerApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("ScaleTriggerRef"):
-		return &inferencev1alpha1.ScaleTriggerRefApplyConfiguration{}
 	case v1alpha1.SchemeGroupVersion.WithKind("Service"):
 		return &inferencev1alpha1.ServiceApplyConfiguration{}
 	case v1alpha1.SchemeGroupVersion.WithKind("ServiceSpec"):
diff --git a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
index 7b7f89da..cf10d168 100644
--- a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
+++ b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
@@ -41,30 +41,6 @@ spec:
           spec:
             description: BackendRuntimeSpec defines the desired state of BackendRuntime
             properties:
-              args:
-                description: |-
-                  Args represents the preset arguments of the backendRuntime.
-                  They can be appended or overwritten by the Playground backendRuntimeConfig.
-                items:
-                  description: |-
-                    BackendRuntimeArg is the preset arguments for easy to use.
-                    Three preset names are provided: default, speculative-decoding, model-parallelism,
-                    do not change the name.
-                  properties:
-                    flags:
-                      description: |-
-                        Flags represents all the preset configurations.
-                        Flag around with {{ .CONFIG }} is a configuration waiting for render.
-                      items:
-                        type: string
-                      type: array
-                    name:
-                      default: default
-                      description: Name represents the identifier of the backendRuntime
-                        argument.
-                      type: string
-                  type: object
-                type: array
               commands:
                 description: Commands represents the default commands for the backendRuntime.
                 items:
@@ -516,648 +492,679 @@ spec:
                     format: int32
                     type: integer
                 type: object
-              resources:
-                description: |-
-                  Resources represents the resource requirements for backendRuntime, like cpu/mem,
-                  accelerators like GPU should not be defined here, but at the model flavors,
-                  or the values here will be overwritten.
-                properties:
-                  limits:
-                    additionalProperties:
-                      anyOf:
-                      - type: integer
-                      - type: string
-                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                      x-kubernetes-int-or-string: true
-                    description: |-
-                      Limits describes the maximum amount of compute resources allowed.
-                      More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
-                    type: object
-                  requests:
-                    additionalProperties:
-                      anyOf:
-                      - type: integer
-                      - type: string
-                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                      x-kubernetes-int-or-string: true
-                    description: |-
-                      Requests describes the minimum amount of compute resources required.
-                      If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
-                      otherwise to an implementation-defined value. Requests cannot exceed Limits.
-                      More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
-                    type: object
-                type: object
-              scaleTriggers:
-                description: |-
-                  ScaleTriggers represents a set of triggers preset to be used by Playground.
-                  If Playground not specify the scale trigger, the 0-index trigger will be used.
+              recommendedConfigs:
+                description: RecommendedConfigs represents the recommended configurations
+                  for the backendRuntime.
                 items:
                   description: |-
-                    NamedScaleTrigger defines the rules to scale the workloads.
-                    Only one trigger cloud work at a time. The name is used to identify
-                    the trigger in backendRuntime.
+                    RecommendedConfig represents the recommended configurations for the backendRuntime,
+                    user can choose one of them to apply.
                   properties:
-                    hpa:
-                      description: HPA represents the trigger configuration of the
-                        HorizontalPodAutoscaler.
+                    args:
+                      description: |-
+                        Args represents all the arguments for the command.
+                        Argument around with {{ .CONFIG }} is a configuration waiting for render.
+                      items:
+                        type: string
+                      type: array
+                    name:
+                      description: Name represents the identifier of the config.
+                      type: string
+                    resources:
+                      description: |-
+                        Resources represents the resource requirements for backend, like cpu/mem,
+                        accelerators like GPU should not be defined here, but at the model flavors,
+                        or the values here will be overwritten.
                       properties:
-                        behavior:
+                        limits:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
+                          description: |-
+                            Limits describes the maximum amount of compute resources allowed.
+                            More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+                          type: object
+                        requests:
+                          additionalProperties:
+                            anyOf:
+                            - type: integer
+                            - type: string
+                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                            x-kubernetes-int-or-string: true
                           description: |-
-                            behavior configures the scaling behavior of the target
-                            in both Up and Down directions (scaleUp and scaleDown fields respectively).
-                            If not set, the default HPAScalingRules for scale up and scale down are used.
+                            Requests describes the minimum amount of compute resources required.
+                            If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+                            otherwise to an implementation-defined value. Requests cannot exceed Limits.
+                            More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+                          type: object
+                      type: object
+                    scaleTrigger:
+                      description: |-
+                        ScaleTrigger defines the rules to scale the workloads.
+                        Only one trigger cloud work at a time.
+                      properties:
+                        hpa:
+                          description: HPA represents the trigger configuration of
+                            the HorizontalPodAutoscaler.
                           properties:
-                            scaleDown:
+                            behavior:
                               description: |-
-                                scaleDown is scaling policy for scaling Down.
-                                If not set, the default value is to allow to scale down to minReplicas pods, with a
-                                300 second stabilization window (i.e., the highest recommendation for
-                                the last 300sec is used).
+                                behavior configures the scaling behavior of the target
+                                in both Up and Down directions (scaleUp and scaleDown fields respectively).
+                                If not set, the default HPAScalingRules for scale up and scale down are used.
                               properties:
-                                policies:
-                                  description: |-
-                                    policies is a list of potential scaling polices which can be used during scaling.
-                                    At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
-                                  items:
-                                    description: HPAScalingPolicy is a single policy
-                                      which must hold true for a specified past interval.
-                                    properties:
-                                      periodSeconds:
-                                        description: |-
-                                          periodSeconds specifies the window of time for which the policy should hold true.
-                                          PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
-                                        format: int32
-                                        type: integer
-                                      type:
-                                        description: type is used to specify the scaling
-                                          policy.
-                                        type: string
-                                      value:
-                                        description: |-
-                                          value contains the amount of change which is permitted by the policy.
-                                          It must be greater than zero
-                                        format: int32
-                                        type: integer
-                                    required:
-                                    - periodSeconds
-                                    - type
-                                    - value
-                                    type: object
-                                  type: array
-                                  x-kubernetes-list-type: atomic
-                                selectPolicy:
+                                scaleDown:
                                   description: |-
-                                    selectPolicy is used to specify which policy should be used.
-                                    If not set, the default value Max is used.
-                                  type: string
-                                stabilizationWindowSeconds:
+                                    scaleDown is scaling policy for scaling Down.
+                                    If not set, the default value is to allow to scale down to minReplicas pods, with a
+                                    300 second stabilization window (i.e., the highest recommendation for
+                                    the last 300sec is used).
+                                  properties:
+                                    policies:
+                                      description: |-
+                                        policies is a list of potential scaling polices which can be used during scaling.
+                                        At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
+                                      items:
+                                        description: HPAScalingPolicy is a single
+                                          policy which must hold true for a specified
+                                          past interval.
+                                        properties:
+                                          periodSeconds:
+                                            description: |-
+                                              periodSeconds specifies the window of time for which the policy should hold true.
+                                              PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
+                                            format: int32
+                                            type: integer
+                                          type:
+                                            description: type is used to specify the
+                                              scaling policy.
+                                            type: string
+                                          value:
+                                            description: |-
+                                              value contains the amount of change which is permitted by the policy.
+                                              It must be greater than zero
+                                            format: int32
+                                            type: integer
+                                        required:
+                                        - periodSeconds
+                                        - type
+                                        - value
+                                        type: object
+                                      type: array
+                                      x-kubernetes-list-type: atomic
+                                    selectPolicy:
+                                      description: |-
+                                        selectPolicy is used to specify which policy should be used.
+                                        If not set, the default value Max is used.
+                                      type: string
+                                    stabilizationWindowSeconds:
+                                      description: |-
+                                        stabilizationWindowSeconds is the number of seconds for which past recommendations should be
+                                        considered while scaling up or scaling down.
+                                        StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
+                                        If not set, use the default values:
+                                        - For scale up: 0 (i.e. no stabilization is done).
+                                        - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
+                                      format: int32
+                                      type: integer
+                                  type: object
+                                scaleUp:
                                   description: |-
-                                    stabilizationWindowSeconds is the number of seconds for which past recommendations should be
-                                    considered while scaling up or scaling down.
-                                    StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
-                                    If not set, use the default values:
-                                    - For scale up: 0 (i.e. no stabilization is done).
-                                    - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
-                                  format: int32
-                                  type: integer
+                                    scaleUp is scaling policy for scaling Up.
+                                    If not set, the default value is the higher of:
+                                      * increase no more than 4 pods per 60 seconds
+                                      * double the number of pods per 60 seconds
+                                    No stabilization is used.
+                                  properties:
+                                    policies:
+                                      description: |-
+                                        policies is a list of potential scaling polices which can be used during scaling.
+                                        At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
+                                      items:
+                                        description: HPAScalingPolicy is a single
+                                          policy which must hold true for a specified
+                                          past interval.
+                                        properties:
+                                          periodSeconds:
+                                            description: |-
+                                              periodSeconds specifies the window of time for which the policy should hold true.
+                                              PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
+                                            format: int32
+                                            type: integer
+                                          type:
+                                            description: type is used to specify the
+                                              scaling policy.
+                                            type: string
+                                          value:
+                                            description: |-
+                                              value contains the amount of change which is permitted by the policy.
+                                              It must be greater than zero
+                                            format: int32
+                                            type: integer
+                                        required:
+                                        - periodSeconds
+                                        - type
+                                        - value
+                                        type: object
+                                      type: array
+                                      x-kubernetes-list-type: atomic
+                                    selectPolicy:
+                                      description: |-
+                                        selectPolicy is used to specify which policy should be used.
+                                        If not set, the default value Max is used.
+                                      type: string
+                                    stabilizationWindowSeconds:
+                                      description: |-
+                                        stabilizationWindowSeconds is the number of seconds for which past recommendations should be
+                                        considered while scaling up or scaling down.
+                                        StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
+                                        If not set, use the default values:
+                                        - For scale up: 0 (i.e. no stabilization is done).
+                                        - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
+                                      format: int32
+                                      type: integer
+                                  type: object
                               type: object
-                            scaleUp:
+                            metrics:
                               description: |-
-                                scaleUp is scaling policy for scaling Up.
-                                If not set, the default value is the higher of:
-                                  * increase no more than 4 pods per 60 seconds
-                                  * double the number of pods per 60 seconds
-                                No stabilization is used.
-                              properties:
-                                policies:
-                                  description: |-
-                                    policies is a list of potential scaling polices which can be used during scaling.
-                                    At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
-                                  items:
-                                    description: HPAScalingPolicy is a single policy
-                                      which must hold true for a specified past interval.
-                                    properties:
-                                      periodSeconds:
-                                        description: |-
-                                          periodSeconds specifies the window of time for which the policy should hold true.
-                                          PeriodSeconds must be greater than zero and less than or equal to 1800 (30 min).
-                                        format: int32
-                                        type: integer
-                                      type:
-                                        description: type is used to specify the scaling
-                                          policy.
-                                        type: string
-                                      value:
-                                        description: |-
-                                          value contains the amount of change which is permitted by the policy.
-                                          It must be greater than zero
-                                        format: int32
-                                        type: integer
-                                    required:
-                                    - periodSeconds
-                                    - type
-                                    - value
-                                    type: object
-                                  type: array
-                                  x-kubernetes-list-type: atomic
-                                selectPolicy:
-                                  description: |-
-                                    selectPolicy is used to specify which policy should be used.
-                                    If not set, the default value Max is used.
-                                  type: string
-                                stabilizationWindowSeconds:
-                                  description: |-
-                                    stabilizationWindowSeconds is the number of seconds for which past recommendations should be
-                                    considered while scaling up or scaling down.
-                                    StabilizationWindowSeconds must be greater than or equal to zero and less than or equal to 3600 (one hour).
-                                    If not set, use the default values:
-                                    - For scale up: 0 (i.e. no stabilization is done).
-                                    - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
-                                  format: int32
-                                  type: integer
-                              type: object
-                          type: object
-                        metrics:
-                          description: |-
-                            metrics contains the specifications for which to use to calculate the
-                            desired replica count (the maximum replica count across all metrics will
-                            be used).  The desired replica count is calculated multiplying the
-                            ratio between the target value and the current value by the current
-                            number of pods.  Ergo, metrics used must decrease as the pod count is
-                            increased, and vice-versa.  See the individual metric source types for
-                            more information about how each type of metric must respond.
-                          items:
-                            description: |-
-                              MetricSpec specifies how to scale based on a single metric
-                              (only `type` and one other matching field should be set at once).
-                            properties:
-                              containerResource:
+                                metrics contains the specifications for which to use to calculate the
+                                desired replica count (the maximum replica count across all metrics will
+                                be used).  The desired replica count is calculated multiplying the
+                                ratio between the target value and the current value by the current
+                                number of pods.  Ergo, metrics used must decrease as the pod count is
+                                increased, and vice-versa.  See the individual metric source types for
+                                more information about how each type of metric must respond.
+                              items:
                                 description: |-
-                                  containerResource refers to a resource metric (such as those specified in
-                                  requests and limits) known to Kubernetes describing a single container in
-                                  each pod of the current scale target (e.g. CPU or memory). Such metrics are
-                                  built in to Kubernetes, and have special scaling options on top of those
-                                  available to normal per-pod metrics using the "pods" source.
+                                  MetricSpec specifies how to scale based on a single metric
+                                  (only `type` and one other matching field should be set at once).
                                 properties:
-                                  container:
-                                    description: container is the name of the container
-                                      in the pods of the scaling target
-                                    type: string
-                                  name:
-                                    description: name is the name of the resource
-                                      in question.
-                                    type: string
-                                  target:
-                                    description: target specifies the target value
-                                      for the given metric
+                                  containerResource:
+                                    description: |-
+                                      containerResource refers to a resource metric (such as those specified in
+                                      requests and limits) known to Kubernetes describing a single container in
+                                      each pod of the current scale target (e.g. CPU or memory). Such metrics are
+                                      built in to Kubernetes, and have special scaling options on top of those
+                                      available to normal per-pod metrics using the "pods" source.
                                     properties:
-                                      averageUtilization:
-                                        description: |-
-                                          averageUtilization is the target value of the average of the
-                                          resource metric across all relevant pods, represented as a percentage of
-                                          the requested value of the resource for the pods.
-                                          Currently only valid for Resource metric source type
-                                        format: int32
-                                        type: integer
-                                      averageValue:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: |-
-                                          averageValue is the target value of the average of the
-                                          metric across all relevant pods (as a quantity)
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                      type:
-                                        description: type represents whether the metric
-                                          type is Utilization, Value, or AverageValue
+                                      container:
+                                        description: container is the name of the
+                                          container in the pods of the scaling target
                                         type: string
-                                      value:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: value is the target value of
-                                          the metric (as a quantity).
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                    required:
-                                    - type
-                                    type: object
-                                required:
-                                - container
-                                - name
-                                - target
-                                type: object
-                              external:
-                                description: |-
-                                  external refers to a global metric that is not associated
-                                  with any Kubernetes object. It allows autoscaling based on information
-                                  coming from components running outside of cluster
-                                  (for example length of queue in cloud messaging service, or
-                                  QPS from loadbalancer running outside of cluster).
-                                properties:
-                                  metric:
-                                    description: metric identifies the target metric
-                                      by name and selector
-                                    properties:
                                       name:
-                                        description: name is the name of the given
-                                          metric
+                                        description: name is the name of the resource
+                                          in question.
                                         type: string
-                                      selector:
-                                        description: |-
-                                          selector is the string-encoded form of a standard kubernetes label selector for the given metric
-                                          When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping.
-                                          When unset, just the metricName will be used to gather metrics.
+                                      target:
+                                        description: target specifies the target value
+                                          for the given metric
                                         properties:
-                                          matchExpressions:
-                                            description: matchExpressions is a list
-                                              of label selector requirements. The
-                                              requirements are ANDed.
-                                            items:
-                                              description: |-
-                                                A label selector requirement is a selector that contains values, a key, and an operator that
-                                                relates the key and values.
-                                              properties:
-                                                key:
-                                                  description: key is the label key
-                                                    that the selector applies to.
-                                                  type: string
-                                                operator:
-                                                  description: |-
-                                                    operator represents a key's relationship to a set of values.
-                                                    Valid operators are In, NotIn, Exists and DoesNotExist.
-                                                  type: string
-                                                values:
-                                                  description: |-
-                                                    values is an array of string values. If the operator is In or NotIn,
-                                                    the values array must be non-empty. If the operator is Exists or DoesNotExist,
-                                                    the values array must be empty. This array is replaced during a strategic
-                                                    merge patch.
-                                                  items:
-                                                    type: string
-                                                  type: array
-                                                  x-kubernetes-list-type: atomic
-                                              required:
-                                              - key
-                                              - operator
-                                              type: object
-                                            type: array
-                                            x-kubernetes-list-type: atomic
-                                          matchLabels:
-                                            additionalProperties:
-                                              type: string
+                                          averageUtilization:
                                             description: |-
-                                              matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
-                                              map is equivalent to an element of matchExpressions, whose key field is "key", the
-                                              operator is "In", and the values array contains only "value". The requirements are ANDed.
-                                            type: object
+                                              averageUtilization is the target value of the average of the
+                                              resource metric across all relevant pods, represented as a percentage of
+                                              the requested value of the resource for the pods.
+                                              Currently only valid for Resource metric source type
+                                            format: int32
+                                            type: integer
+                                          averageValue:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: |-
+                                              averageValue is the target value of the average of the
+                                              metric across all relevant pods (as a quantity)
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                          type:
+                                            description: type represents whether the
+                                              metric type is Utilization, Value, or
+                                              AverageValue
+                                            type: string
+                                          value:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: value is the target value
+                                              of the metric (as a quantity).
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                        required:
+                                        - type
                                         type: object
-                                        x-kubernetes-map-type: atomic
                                     required:
+                                    - container
                                     - name
+                                    - target
                                     type: object
-                                  target:
-                                    description: target specifies the target value
-                                      for the given metric
+                                  external:
+                                    description: |-
+                                      external refers to a global metric that is not associated
+                                      with any Kubernetes object. It allows autoscaling based on information
+                                      coming from components running outside of cluster
+                                      (for example length of queue in cloud messaging service, or
+                                      QPS from loadbalancer running outside of cluster).
                                     properties:
-                                      averageUtilization:
-                                        description: |-
-                                          averageUtilization is the target value of the average of the
-                                          resource metric across all relevant pods, represented as a percentage of
-                                          the requested value of the resource for the pods.
-                                          Currently only valid for Resource metric source type
-                                        format: int32
-                                        type: integer
-                                      averageValue:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: |-
-                                          averageValue is the target value of the average of the
-                                          metric across all relevant pods (as a quantity)
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                      type:
-                                        description: type represents whether the metric
-                                          type is Utilization, Value, or AverageValue
-                                        type: string
-                                      value:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: value is the target value of
-                                          the metric (as a quantity).
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                    required:
-                                    - type
-                                    type: object
-                                required:
-                                - metric
-                                - target
-                                type: object
-                              object:
-                                description: |-
-                                  object refers to a metric describing a single kubernetes object
-                                  (for example, hits-per-second on an Ingress object).
-                                properties:
-                                  describedObject:
-                                    description: describedObject specifies the descriptions
-                                      of a object,such as kind,name apiVersion
-                                    properties:
-                                      apiVersion:
-                                        description: apiVersion is the API version
-                                          of the referent
-                                        type: string
-                                      kind:
-                                        description: 'kind is the kind of the referent;
-                                          More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
-                                        type: string
-                                      name:
-                                        description: 'name is the name of the referent;
-                                          More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
-                                        type: string
+                                      metric:
+                                        description: metric identifies the target
+                                          metric by name and selector
+                                        properties:
+                                          name:
+                                            description: name is the name of the given
+                                              metric
+                                            type: string
+                                          selector:
+                                            description: |-
+                                              selector is the string-encoded form of a standard kubernetes label selector for the given metric
+                                              When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping.
+                                              When unset, just the metricName will be used to gather metrics.
+                                            properties:
+                                              matchExpressions:
+                                                description: matchExpressions is a
+                                                  list of label selector requirements.
+                                                  The requirements are ANDed.
+                                                items:
+                                                  description: |-
+                                                    A label selector requirement is a selector that contains values, a key, and an operator that
+                                                    relates the key and values.
+                                                  properties:
+                                                    key:
+                                                      description: key is the label
+                                                        key that the selector applies
+                                                        to.
+                                                      type: string
+                                                    operator:
+                                                      description: |-
+                                                        operator represents a key's relationship to a set of values.
+                                                        Valid operators are In, NotIn, Exists and DoesNotExist.
+                                                      type: string
+                                                    values:
+                                                      description: |-
+                                                        values is an array of string values. If the operator is In or NotIn,
+                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                                                        the values array must be empty. This array is replaced during a strategic
+                                                        merge patch.
+                                                      items:
+                                                        type: string
+                                                      type: array
+                                                      x-kubernetes-list-type: atomic
+                                                  required:
+                                                  - key
+                                                  - operator
+                                                  type: object
+                                                type: array
+                                                x-kubernetes-list-type: atomic
+                                              matchLabels:
+                                                additionalProperties:
+                                                  type: string
+                                                description: |-
+                                                  matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+                                                  map is equivalent to an element of matchExpressions, whose key field is "key", the
+                                                  operator is "In", and the values array contains only "value". The requirements are ANDed.
+                                                type: object
+                                            type: object
+                                            x-kubernetes-map-type: atomic
+                                        required:
+                                        - name
+                                        type: object
+                                      target:
+                                        description: target specifies the target value
+                                          for the given metric
+                                        properties:
+                                          averageUtilization:
+                                            description: |-
+                                              averageUtilization is the target value of the average of the
+                                              resource metric across all relevant pods, represented as a percentage of
+                                              the requested value of the resource for the pods.
+                                              Currently only valid for Resource metric source type
+                                            format: int32
+                                            type: integer
+                                          averageValue:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: |-
+                                              averageValue is the target value of the average of the
+                                              metric across all relevant pods (as a quantity)
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                          type:
+                                            description: type represents whether the
+                                              metric type is Utilization, Value, or
+                                              AverageValue
+                                            type: string
+                                          value:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: value is the target value
+                                              of the metric (as a quantity).
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                        required:
+                                        - type
+                                        type: object
                                     required:
-                                    - kind
-                                    - name
+                                    - metric
+                                    - target
                                     type: object
-                                  metric:
-                                    description: metric identifies the target metric
-                                      by name and selector
+                                  object:
+                                    description: |-
+                                      object refers to a metric describing a single kubernetes object
+                                      (for example, hits-per-second on an Ingress object).
                                     properties:
-                                      name:
-                                        description: name is the name of the given
-                                          metric
-                                        type: string
-                                      selector:
-                                        description: |-
-                                          selector is the string-encoded form of a standard kubernetes label selector for the given metric
-                                          When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping.
-                                          When unset, just the metricName will be used to gather metrics.
+                                      describedObject:
+                                        description: describedObject specifies the
+                                          descriptions of a object,such as kind,name
+                                          apiVersion
                                         properties:
-                                          matchExpressions:
-                                            description: matchExpressions is a list
-                                              of label selector requirements. The
-                                              requirements are ANDed.
-                                            items:
-                                              description: |-
-                                                A label selector requirement is a selector that contains values, a key, and an operator that
-                                                relates the key and values.
-                                              properties:
-                                                key:
-                                                  description: key is the label key
-                                                    that the selector applies to.
-                                                  type: string
-                                                operator:
+                                          apiVersion:
+                                            description: apiVersion is the API version
+                                              of the referent
+                                            type: string
+                                          kind:
+                                            description: 'kind is the kind of the
+                                              referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+                                            type: string
+                                          name:
+                                            description: 'name is the name of the
+                                              referent; More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
+                                            type: string
+                                        required:
+                                        - kind
+                                        - name
+                                        type: object
+                                      metric:
+                                        description: metric identifies the target
+                                          metric by name and selector
+                                        properties:
+                                          name:
+                                            description: name is the name of the given
+                                              metric
+                                            type: string
+                                          selector:
+                                            description: |-
+                                              selector is the string-encoded form of a standard kubernetes label selector for the given metric
+                                              When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping.
+                                              When unset, just the metricName will be used to gather metrics.
+                                            properties:
+                                              matchExpressions:
+                                                description: matchExpressions is a
+                                                  list of label selector requirements.
+                                                  The requirements are ANDed.
+                                                items:
                                                   description: |-
-                                                    operator represents a key's relationship to a set of values.
-                                                    Valid operators are In, NotIn, Exists and DoesNotExist.
+                                                    A label selector requirement is a selector that contains values, a key, and an operator that
+                                                    relates the key and values.
+                                                  properties:
+                                                    key:
+                                                      description: key is the label
+                                                        key that the selector applies
+                                                        to.
+                                                      type: string
+                                                    operator:
+                                                      description: |-
+                                                        operator represents a key's relationship to a set of values.
+                                                        Valid operators are In, NotIn, Exists and DoesNotExist.
+                                                      type: string
+                                                    values:
+                                                      description: |-
+                                                        values is an array of string values. If the operator is In or NotIn,
+                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                                                        the values array must be empty. This array is replaced during a strategic
+                                                        merge patch.
+                                                      items:
+                                                        type: string
+                                                      type: array
+                                                      x-kubernetes-list-type: atomic
+                                                  required:
+                                                  - key
+                                                  - operator
+                                                  type: object
+                                                type: array
+                                                x-kubernetes-list-type: atomic
+                                              matchLabels:
+                                                additionalProperties:
                                                   type: string
-                                                values:
-                                                  description: |-
-                                                    values is an array of string values. If the operator is In or NotIn,
-                                                    the values array must be non-empty. If the operator is Exists or DoesNotExist,
-                                                    the values array must be empty. This array is replaced during a strategic
-                                                    merge patch.
-                                                  items:
-                                                    type: string
-                                                  type: array
-                                                  x-kubernetes-list-type: atomic
-                                              required:
-                                              - key
-                                              - operator
-                                              type: object
-                                            type: array
-                                            x-kubernetes-list-type: atomic
-                                          matchLabels:
-                                            additionalProperties:
-                                              type: string
-                                            description: |-
-                                              matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
-                                              map is equivalent to an element of matchExpressions, whose key field is "key", the
-                                              operator is "In", and the values array contains only "value". The requirements are ANDed.
+                                                description: |-
+                                                  matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+                                                  map is equivalent to an element of matchExpressions, whose key field is "key", the
+                                                  operator is "In", and the values array contains only "value". The requirements are ANDed.
+                                                type: object
                                             type: object
+                                            x-kubernetes-map-type: atomic
+                                        required:
+                                        - name
+                                        type: object
+                                      target:
+                                        description: target specifies the target value
+                                          for the given metric
+                                        properties:
+                                          averageUtilization:
+                                            description: |-
+                                              averageUtilization is the target value of the average of the
+                                              resource metric across all relevant pods, represented as a percentage of
+                                              the requested value of the resource for the pods.
+                                              Currently only valid for Resource metric source type
+                                            format: int32
+                                            type: integer
+                                          averageValue:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: |-
+                                              averageValue is the target value of the average of the
+                                              metric across all relevant pods (as a quantity)
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                          type:
+                                            description: type represents whether the
+                                              metric type is Utilization, Value, or
+                                              AverageValue
+                                            type: string
+                                          value:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: value is the target value
+                                              of the metric (as a quantity).
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                        required:
+                                        - type
                                         type: object
-                                        x-kubernetes-map-type: atomic
                                     required:
-                                    - name
+                                    - describedObject
+                                    - metric
+                                    - target
                                     type: object
-                                  target:
-                                    description: target specifies the target value
-                                      for the given metric
+                                  pods:
+                                    description: |-
+                                      pods refers to a metric describing each pod in the current scale target
+                                      (for example, transactions-processed-per-second).  The values will be
+                                      averaged together before being compared to the target value.
                                     properties:
-                                      averageUtilization:
-                                        description: |-
-                                          averageUtilization is the target value of the average of the
-                                          resource metric across all relevant pods, represented as a percentage of
-                                          the requested value of the resource for the pods.
-                                          Currently only valid for Resource metric source type
-                                        format: int32
-                                        type: integer
-                                      averageValue:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: |-
-                                          averageValue is the target value of the average of the
-                                          metric across all relevant pods (as a quantity)
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                      type:
-                                        description: type represents whether the metric
-                                          type is Utilization, Value, or AverageValue
-                                        type: string
-                                      value:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: value is the target value of
-                                          the metric (as a quantity).
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                    required:
-                                    - type
-                                    type: object
-                                required:
-                                - describedObject
-                                - metric
-                                - target
-                                type: object
-                              pods:
-                                description: |-
-                                  pods refers to a metric describing each pod in the current scale target
-                                  (for example, transactions-processed-per-second).  The values will be
-                                  averaged together before being compared to the target value.
-                                properties:
-                                  metric:
-                                    description: metric identifies the target metric
-                                      by name and selector
-                                    properties:
-                                      name:
-                                        description: name is the name of the given
-                                          metric
-                                        type: string
-                                      selector:
-                                        description: |-
-                                          selector is the string-encoded form of a standard kubernetes label selector for the given metric
-                                          When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping.
-                                          When unset, just the metricName will be used to gather metrics.
+                                      metric:
+                                        description: metric identifies the target
+                                          metric by name and selector
                                         properties:
-                                          matchExpressions:
-                                            description: matchExpressions is a list
-                                              of label selector requirements. The
-                                              requirements are ANDed.
-                                            items:
-                                              description: |-
-                                                A label selector requirement is a selector that contains values, a key, and an operator that
-                                                relates the key and values.
-                                              properties:
-                                                key:
-                                                  description: key is the label key
-                                                    that the selector applies to.
-                                                  type: string
-                                                operator:
+                                          name:
+                                            description: name is the name of the given
+                                              metric
+                                            type: string
+                                          selector:
+                                            description: |-
+                                              selector is the string-encoded form of a standard kubernetes label selector for the given metric
+                                              When set, it is passed as an additional parameter to the metrics server for more specific metrics scoping.
+                                              When unset, just the metricName will be used to gather metrics.
+                                            properties:
+                                              matchExpressions:
+                                                description: matchExpressions is a
+                                                  list of label selector requirements.
+                                                  The requirements are ANDed.
+                                                items:
                                                   description: |-
-                                                    operator represents a key's relationship to a set of values.
-                                                    Valid operators are In, NotIn, Exists and DoesNotExist.
+                                                    A label selector requirement is a selector that contains values, a key, and an operator that
+                                                    relates the key and values.
+                                                  properties:
+                                                    key:
+                                                      description: key is the label
+                                                        key that the selector applies
+                                                        to.
+                                                      type: string
+                                                    operator:
+                                                      description: |-
+                                                        operator represents a key's relationship to a set of values.
+                                                        Valid operators are In, NotIn, Exists and DoesNotExist.
+                                                      type: string
+                                                    values:
+                                                      description: |-
+                                                        values is an array of string values. If the operator is In or NotIn,
+                                                        the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                                                        the values array must be empty. This array is replaced during a strategic
+                                                        merge patch.
+                                                      items:
+                                                        type: string
+                                                      type: array
+                                                      x-kubernetes-list-type: atomic
+                                                  required:
+                                                  - key
+                                                  - operator
+                                                  type: object
+                                                type: array
+                                                x-kubernetes-list-type: atomic
+                                              matchLabels:
+                                                additionalProperties:
                                                   type: string
-                                                values:
-                                                  description: |-
-                                                    values is an array of string values. If the operator is In or NotIn,
-                                                    the values array must be non-empty. If the operator is Exists or DoesNotExist,
-                                                    the values array must be empty. This array is replaced during a strategic
-                                                    merge patch.
-                                                  items:
-                                                    type: string
-                                                  type: array
-                                                  x-kubernetes-list-type: atomic
-                                              required:
-                                              - key
-                                              - operator
-                                              type: object
-                                            type: array
-                                            x-kubernetes-list-type: atomic
-                                          matchLabels:
-                                            additionalProperties:
-                                              type: string
-                                            description: |-
-                                              matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
-                                              map is equivalent to an element of matchExpressions, whose key field is "key", the
-                                              operator is "In", and the values array contains only "value". The requirements are ANDed.
+                                                description: |-
+                                                  matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+                                                  map is equivalent to an element of matchExpressions, whose key field is "key", the
+                                                  operator is "In", and the values array contains only "value". The requirements are ANDed.
+                                                type: object
                                             type: object
+                                            x-kubernetes-map-type: atomic
+                                        required:
+                                        - name
+                                        type: object
+                                      target:
+                                        description: target specifies the target value
+                                          for the given metric
+                                        properties:
+                                          averageUtilization:
+                                            description: |-
+                                              averageUtilization is the target value of the average of the
+                                              resource metric across all relevant pods, represented as a percentage of
+                                              the requested value of the resource for the pods.
+                                              Currently only valid for Resource metric source type
+                                            format: int32
+                                            type: integer
+                                          averageValue:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: |-
+                                              averageValue is the target value of the average of the
+                                              metric across all relevant pods (as a quantity)
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                          type:
+                                            description: type represents whether the
+                                              metric type is Utilization, Value, or
+                                              AverageValue
+                                            type: string
+                                          value:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: value is the target value
+                                              of the metric (as a quantity).
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                        required:
+                                        - type
                                         type: object
-                                        x-kubernetes-map-type: atomic
                                     required:
-                                    - name
+                                    - metric
+                                    - target
                                     type: object
-                                  target:
-                                    description: target specifies the target value
-                                      for the given metric
+                                  resource:
+                                    description: |-
+                                      resource refers to a resource metric (such as those specified in
+                                      requests and limits) known to Kubernetes describing each pod in the
+                                      current scale target (e.g. CPU or memory). Such metrics are built in to
+                                      Kubernetes, and have special scaling options on top of those available
+                                      to normal per-pod metrics using the "pods" source.
                                     properties:
-                                      averageUtilization:
-                                        description: |-
-                                          averageUtilization is the target value of the average of the
-                                          resource metric across all relevant pods, represented as a percentage of
-                                          the requested value of the resource for the pods.
-                                          Currently only valid for Resource metric source type
-                                        format: int32
-                                        type: integer
-                                      averageValue:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: |-
-                                          averageValue is the target value of the average of the
-                                          metric across all relevant pods (as a quantity)
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                      type:
-                                        description: type represents whether the metric
-                                          type is Utilization, Value, or AverageValue
+                                      name:
+                                        description: name is the name of the resource
+                                          in question.
                                         type: string
-                                      value:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: value is the target value of
-                                          the metric (as a quantity).
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
+                                      target:
+                                        description: target specifies the target value
+                                          for the given metric
+                                        properties:
+                                          averageUtilization:
+                                            description: |-
+                                              averageUtilization is the target value of the average of the
+                                              resource metric across all relevant pods, represented as a percentage of
+                                              the requested value of the resource for the pods.
+                                              Currently only valid for Resource metric source type
+                                            format: int32
+                                            type: integer
+                                          averageValue:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: |-
+                                              averageValue is the target value of the average of the
+                                              metric across all relevant pods (as a quantity)
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                          type:
+                                            description: type represents whether the
+                                              metric type is Utilization, Value, or
+                                              AverageValue
+                                            type: string
+                                          value:
+                                            anyOf:
+                                            - type: integer
+                                            - type: string
+                                            description: value is the target value
+                                              of the metric (as a quantity).
+                                            pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                            x-kubernetes-int-or-string: true
+                                        required:
+                                        - type
+                                        type: object
                                     required:
-                                    - type
+                                    - name
+                                    - target
                                     type: object
-                                required:
-                                - metric
-                                - target
-                                type: object
-                              resource:
-                                description: |-
-                                  resource refers to a resource metric (such as those specified in
-                                  requests and limits) known to Kubernetes describing each pod in the
-                                  current scale target (e.g. CPU or memory). Such metrics are built in to
-                                  Kubernetes, and have special scaling options on top of those available
-                                  to normal per-pod metrics using the "pods" source.
-                                properties:
-                                  name:
-                                    description: name is the name of the resource
-                                      in question.
+                                  type:
+                                    description: |-
+                                      type is the type of metric source.  It should be one of "ContainerResource", "External",
+                                      "Object", "Pods" or "Resource", each mapping to a matching field in the object.
                                     type: string
-                                  target:
-                                    description: target specifies the target value
-                                      for the given metric
-                                    properties:
-                                      averageUtilization:
-                                        description: |-
-                                          averageUtilization is the target value of the average of the
-                                          resource metric across all relevant pods, represented as a percentage of
-                                          the requested value of the resource for the pods.
-                                          Currently only valid for Resource metric source type
-                                        format: int32
-                                        type: integer
-                                      averageValue:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: |-
-                                          averageValue is the target value of the average of the
-                                          metric across all relevant pods (as a quantity)
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                      type:
-                                        description: type represents whether the metric
-                                          type is Utilization, Value, or AverageValue
-                                        type: string
-                                      value:
-                                        anyOf:
-                                        - type: integer
-                                        - type: string
-                                        description: value is the target value of
-                                          the metric (as a quantity).
-                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                                        x-kubernetes-int-or-string: true
-                                    required:
-                                    - type
-                                    type: object
                                 required:
-                                - name
-                                - target
+                                - type
                                 type: object
-                              type:
-                                description: |-
-                                  type is the type of metric source.  It should be one of "ContainerResource", "External",
-                                  "Object", "Pods" or "Resource", each mapping to a matching field in the object.
-                                type: string
-                            required:
-                            - type
-                            type: object
-                          type: array
+                              type: array
+                          type: object
                       type: object
-                    name:
+                    sharedMemorySize:
+                      anyOf:
+                      - type: integer
+                      - type: string
                       description: |-
-                        Name represents the identifier of the scale trigger, e.g. some triggers defined for
-                        latency sensitive workloads, some are defined for throughput sensitive workloads.
-                      type: string
+                        SharedMemorySize represents the size of /dev/shm required in the runtime of
+                        inference workload.
+                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                      x-kubernetes-int-or-string: true
+                  required:
+                  - name
                   type: object
                 type: array
               startupProbe:
@@ -1323,7 +1330,6 @@ spec:
                 type: string
             required:
             - image
-            - resources
             - version
             type: object
           status:
diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
index f7b62761..5774f499 100644
--- a/config/crd/bases/inference.llmaz.io_playgrounds.yaml
+++ b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
@@ -48,22 +48,23 @@ spec:
                 properties:
                   args:
                     description: |-
-                      Args represents the specified arguments of the backendRuntime,
-                      will be append to the backendRuntime.spec.Args.
-                    properties:
-                      flags:
-                        description: |-
-                          Flags represents all the preset configurations.
-                          Flag around with {{ .CONFIG }} is a configuration waiting for render.
-                        items:
-                          type: string
-                        type: array
-                      name:
-                        default: default
-                        description: Name represents the identifier of the backendRuntime
-                          argument.
-                        type: string
-                    type: object
+                      Args represents all the arguments for the command.
+                      Argument around with {{ .CONFIG }} is a configuration waiting for render.
+                      Args defined here will "append" the args in the recommendedConfig.
+                    items:
+                      type: string
+                    type: array
+                  backendName:
+                    default: vllm
+                    description: BackendName represents the inference backend under
+                      the hood, e.g. vLLM.
+                    type: string
+                  configName:
+                    description: |-
+                      ConfigName represents the recommended configuration name for the backend,
+                      It will be inferred from the models in the runtime if not specified, e.g. default,
+                      speculative-decoding or model-parallelism.
+                    type: string
                   envs:
                     description: Envs represents the environments set to the container.
                     items:
@@ -184,16 +185,12 @@ spec:
                       - name
                       type: object
                     type: array
-                  name:
-                    default: vllm
-                    description: Name represents the inference backend under the hood,
-                      e.g. vLLM.
-                    type: string
                   resources:
                     description: |-
                       Resources represents the resource requirements for backend, like cpu/mem,
                       accelerators like GPU should not be defined here, but at the model flavors,
                       or the values here will be overwritten.
+                      Resources defined here will "overwrite" the resources in the recommendedConfig.
                     properties:
                       limits:
                         additionalProperties:
@@ -220,47 +217,11 @@ spec:
                           More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                         type: object
                     type: object
-                  sharedMemorySize:
-                    anyOf:
-                    - type: integer
-                    - type: string
-                    description: |-
-                      SharedMemorySize represents the size of /dev/shm required in the runtime of
-                      inference workload.
-                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                    x-kubernetes-int-or-string: true
-                  version:
-                    description: |-
-                      Version represents the backend version if you want a different one
-                      from the default version.
-                    type: string
-                type: object
-              elasticConfig:
-                description: |-
-                  ElasticConfig defines the configuration for elastic usage,
-                  e.g. the max/min replicas.
-                  Note: this requires to install the HPA first or will report error.
-                properties:
-                  maxReplicas:
-                    description: |-
-                      MaxReplicas indicates the maximum number of inference workloads based on the traffic.
-                      Default to nil means there's no limit for the instance number.
-                    format: int32
-                    type: integer
-                  minReplicas:
-                    default: 1
-                    description: |-
-                      MinReplicas indicates the minimum number of inference workloads based on the traffic.
-                      Default to 1.
-                      MinReplicas couldn't be 0 now, will support serverless in the future.
-                    format: int32
-                    type: integer
                   scaleTrigger:
                     description: |-
-                      ScaleTrigger defines a set of triggers to scale the workloads.
-                      If not defined, trigger configured in backendRuntime will be used,
-                      otherwise, trigger defined here will overwrite the defaulted ones.
-                      ScaleTriggerRef and ScaleTrigger can't be set at the same time.
+                      ScaleTrigger defines the rules to scale the workloads.
+                      Only one trigger cloud work at a time, mostly used in Playground.
+                      ScaleTrigger defined here will "overwrite" the scaleTrigger in the recommendedConfig.
                     properties:
                       hpa:
                         description: HPA represents the trigger configuration of the
@@ -869,19 +830,41 @@ spec:
                             type: array
                         type: object
                     type: object
-                  scaleTriggerRef:
+                  sharedMemorySize:
+                    anyOf:
+                    - type: integer
+                    - type: string
                     description: |-
-                      ScaleTriggerRef refers to the configured scaleTrigger in the backendRuntime
-                      with tuned target value.
-                      ScaleTriggerRef and ScaleTrigger can't be set at the same time.
-                    properties:
-                      name:
-                        description: Name represents the scale trigger name defined
-                          in the backendRuntime.scaleTriggers.
-                        type: string
-                    required:
-                    - name
-                    type: object
+                      SharedMemorySize represents the size of /dev/shm required in the runtime of
+                      inference workload.
+                      SharedMemorySize defined here will "overwrite" the sharedMemorySize in the recommendedConfig.
+                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                    x-kubernetes-int-or-string: true
+                  version:
+                    description: |-
+                      Version represents the backend version if you want a different one
+                      from the default version.
+                    type: string
+                type: object
+              elasticConfig:
+                description: |-
+                  ElasticConfig defines the configuration for elastic usage,
+                  e.g. the max/min replicas.
+                properties:
+                  maxReplicas:
+                    description: |-
+                      MaxReplicas indicates the maximum number of inference workloads based on the traffic.
+                      Default to nil means there's no limit for the instance number.
+                    format: int32
+                    type: integer
+                  minReplicas:
+                    default: 1
+                    description: |-
+                      MinReplicas indicates the minimum number of inference workloads based on the traffic.
+                      Default to 1.
+                      MinReplicas couldn't be 0 now, will support serverless in the future.
+                    format: int32
+                    type: integer
                 type: object
               modelClaim:
                 description: |-
diff --git a/docs/examples/hpa/playground.yaml b/docs/examples/hpa/playground.yaml
index 07b68770..34fd84da 100644
--- a/docs/examples/hpa/playground.yaml
+++ b/docs/examples/hpa/playground.yaml
@@ -1,20 +1,19 @@
 apiVersion: inference.llmaz.io/v1alpha1
 kind: Playground
 metadata:
-  name: qwen2-0--5b
+  name: qwen2-0--5b-hpa
 spec:
   replicas: 1
   modelClaim:
     modelName: qwen2-0--5b-gguf
-  backendRuntimeConfig:
-    name: llamacpp
-    args:
-      name: "default"
-      flags:
-        - -fa # use flash attention
   elasticConfig:
     minReplicas: 1
     maxReplicas: 3
+  backendRuntimeConfig:
+    backendName: llamacpp
+    configName: default
+    args:
+      - -fa # use flash attention
     scaleTrigger:
       hpa:
         metrics:
diff --git a/docs/examples/llamacpp/playground.yaml b/docs/examples/llamacpp/playground.yaml
index de621667..95e6524f 100644
--- a/docs/examples/llamacpp/playground.yaml
+++ b/docs/examples/llamacpp/playground.yaml
@@ -7,8 +7,7 @@ spec:
   modelClaim:
     modelName: qwen2-0--5b-gguf
   backendRuntimeConfig:
-    name: llamacpp
+    backendName: llamacpp
+    configName: default
     args:
-      name: "default"
-      flags:
-        - -fa # use flash attention
+      - -fa # use flash attention
diff --git a/docs/examples/ollama/playground.yaml b/docs/examples/ollama/playground.yaml
index f91949ac..62f47c9b 100644
--- a/docs/examples/ollama/playground.yaml
+++ b/docs/examples/ollama/playground.yaml
@@ -7,4 +7,4 @@ spec:
   modelClaim:
     modelName: qwen2-0--5b
   backendRuntimeConfig:
-    name: ollama
+    backendName: ollama
diff --git a/docs/examples/sglang/playground.yaml b/docs/examples/sglang/playground.yaml
index a94a55f8..6b76e133 100644
--- a/docs/examples/sglang/playground.yaml
+++ b/docs/examples/sglang/playground.yaml
@@ -7,4 +7,4 @@ spec:
   modelClaim:
     modelName: qwen2-0--5b
   backendRuntimeConfig:
-    name: sglang
+    backendName: sglang
diff --git a/docs/examples/speculative-decoding/llamacpp/playground.yaml b/docs/examples/speculative-decoding/llamacpp/playground.yaml
index 4d797263..870f692f 100644
--- a/docs/examples/speculative-decoding/llamacpp/playground.yaml
+++ b/docs/examples/speculative-decoding/llamacpp/playground.yaml
@@ -14,11 +14,9 @@ spec:
       - name: llama2-7b-q2-k-gguf # the draft model
         role: draft
   backendRuntimeConfig:
-    name: llamacpp
+    backendName: llamacpp
     args:
-      name: "speculative-decoding"
-      flags:
-        - -fa # use flash attention
+      - -fa # use flash attention
     resources:
       requests:
         cpu: 4
diff --git a/pkg/controller/inference/playground_controller.go b/pkg/controller/inference/playground_controller.go
index f322ea41..fc1648c7 100644
--- a/pkg/controller/inference/playground_controller.go
+++ b/pkg/controller/inference/playground_controller.go
@@ -47,7 +47,8 @@ import (
 	coreclientgo "github.com/inftyai/llmaz/client-go/applyconfiguration/core/v1alpha1"
 	inferenceclientgo "github.com/inftyai/llmaz/client-go/applyconfiguration/inference/v1alpha1"
 	helper "github.com/inftyai/llmaz/pkg/controller_helper"
-	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/model_source"
+	backendruntime "github.com/inftyai/llmaz/pkg/controller_helper/backendruntime"
+	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/modelsource"
 	"github.com/inftyai/llmaz/pkg/util"
 )
 
@@ -106,8 +107,8 @@ func (r *PlaygroundReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	}
 
 	backendRuntimeName := inferenceapi.DefaultBackend
-	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Name != nil {
-		backendRuntimeName = *playground.Spec.BackendRuntimeConfig.Name
+	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.BackendName != nil {
+		backendRuntimeName = *playground.Spec.BackendRuntimeConfig.BackendName
 	}
 	backendRuntime := &inferenceapi.BackendRuntime{}
 	if err := r.Get(ctx, types.NamespacedName{Name: string(backendRuntimeName)}, backendRuntime); err != nil {
@@ -129,7 +130,7 @@ func (r *PlaygroundReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 		return ctrl.Result{}, err
 	}
 
-	scalingConfiguration := buildScalingConfiguration(playground, backendRuntime)
+	scalingConfiguration := buildScalingConfiguration(models, playground, backendRuntime)
 	if scalingConfiguration != nil {
 		if err := setControllerReferenceForScalingConfiguration(playground, scalingConfiguration, r.Scheme); err != nil {
 			logger.Error(err, "failed to set OwnerReference for scaling workload", "workload", fmt.Sprintf("%s/%s", playground.Namespace, playground.Name), "kind", scalingConfiguration.Kind)
@@ -244,8 +245,7 @@ func buildServiceApplyConfiguration(models []*coreapi.OpenModel, playground *inf
 // Model flavors will not be considered but in inferenceService controller to support accelerator fungibility.
 func buildWorkloadTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) (lws.LeaderWorkerSetSpec, error) {
 	workload := lws.LeaderWorkerSetSpec{
-		// Use the default policy defined in lws.
-		StartupPolicy: lws.LeaderCreatedStartupPolicy,
+		StartupPolicy: lws.LeaderReadyStartupPolicy,
 		RolloutStrategy: lws.RolloutStrategy{
 			Type: lws.RollingUpdateStrategyType,
 		},
@@ -265,7 +265,7 @@ func buildWorkloadTemplate(models []*coreapi.OpenModel, playground *inferenceapi
 
 	if multiHost {
 		workload.LeaderWorkerTemplate.LeaderTemplate = &template
-		workload.LeaderWorkerTemplate.WorkerTemplate = buildWorkerTemplate(playground, backendRuntime)
+		workload.LeaderWorkerTemplate.WorkerTemplate = buildWorkerTemplate(models, playground, backendRuntime)
 	} else {
 		workload.LeaderWorkerTemplate.WorkerTemplate = template
 	}
@@ -274,33 +274,35 @@ func buildWorkloadTemplate(models []*coreapi.OpenModel, playground *inferenceapi
 }
 
 func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime, multiHost bool) (corev1.PodTemplateSpec, error) {
-	parser := helper.NewBackendRuntimeParser(backendRuntime)
+	parser := backendruntime.NewBackendRuntimeParser(backendRuntime, models, playground)
 
-	commands := parser.Commands()
-	if multiHost {
-		commands = parser.LeaderCommands()
+	// envs
+	envs := parser.Envs()
+	if playground.Spec.BackendRuntimeConfig != nil {
+		envs = append(envs, playground.Spec.BackendRuntimeConfig.Envs...)
 	}
 
-	args, err := parser.Args(playground, models, multiHost)
+	// args
+	args, err := parser.Args()
 	if err != nil {
 		return corev1.PodTemplateSpec{}, err
 	}
-	envs := parser.Envs()
-
 	if playground.Spec.BackendRuntimeConfig != nil {
-		envs = append(envs, playground.Spec.BackendRuntimeConfig.Envs...)
-		if playground.Spec.BackendRuntimeConfig.Args != nil {
-			args = append(args, playground.Spec.BackendRuntimeConfig.Args.Flags...)
-		}
+		args = append(args, playground.Spec.BackendRuntimeConfig.Args...)
 	}
 
+	// resources
+	r := parser.Resources()
+	if r == nil {
+		r = &inferenceapi.ResourceRequirements{}
+	}
 	resources := corev1.ResourceRequirements{
-		Requests: parser.Resources().Requests,
-		Limits:   parser.Resources().Limits,
+		Requests: r.Requests,
+		Limits:   r.Limits,
 	}
 	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Resources != nil {
-		limits := util.MergeResources(playground.Spec.BackendRuntimeConfig.Resources.Limits, parser.Resources().Limits)
-		requests := util.MergeResources(playground.Spec.BackendRuntimeConfig.Resources.Requests, parser.Resources().Requests)
+		limits := util.MergeResources(playground.Spec.BackendRuntimeConfig.Resources.Limits, r.Limits)
+		requests := util.MergeResources(playground.Spec.BackendRuntimeConfig.Resources.Requests, r.Requests)
 
 		resources = corev1.ResourceRequirements{
 			Limits:   limits,
@@ -308,26 +310,31 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
 		}
 
 		// Make sure the limits are always greater than requests.
-		for k, v := range resources.Limits {
+		for k, v := range resources.Requests {
 			if k == corev1.ResourceCPU || k == corev1.ResourceMemory {
-				if v.Cmp(requests[k]) == -1 {
+				if v.Cmp(limits[k]) == 1 {
 					resources.Limits[k] = requests[k]
 				}
 			}
 		}
 	}
 
+	// image version
 	version := parser.Version()
 	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Version != nil {
 		version = *playground.Spec.BackendRuntimeConfig.Version
 	}
 
-	// Pod can not accept shell commands with args together, merge the args with the commands.
+	// commands
+	commands := parser.Commands()
 	if multiHost {
+		commands = parser.LeaderCommands()
+		// Pod can not accept shell commands with args together, merge the args with the commands.
 		commands = util.MergeArgsWithCommands(commands, args)
 		args = nil
 	}
 
+	// probe
 	var livenessProbe, readinessProbe, startupProbe *corev1.Probe
 	if backendRuntime.Spec.StartupProbe != nil {
 		startupProbe = backendRuntime.Spec.StartupProbe
@@ -366,14 +373,19 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
 		},
 	}
 
-	// construct /dev/shm size
+	// sharedMemorySize
+	sharedMemorySize := parser.SharedMemorySize()
 	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.SharedMemorySize != nil {
+		sharedMemorySize = playground.Spec.BackendRuntimeConfig.SharedMemorySize
+	}
+	if sharedMemorySize != nil {
+		// construct /dev/shm size
 		template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{
 			Name: "dshm",
 			VolumeSource: corev1.VolumeSource{
 				EmptyDir: &corev1.EmptyDirVolumeSource{
 					Medium:    corev1.StorageMediumMemory,
-					SizeLimit: playground.Spec.BackendRuntimeConfig.SharedMemorySize,
+					SizeLimit: sharedMemorySize,
 				},
 			},
 		})
@@ -389,21 +401,25 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
 
 // This is a copy of buildTemplate with some refactors, only used in multi-nodes cases.
 // Worker template has no args, no contain port.
-func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) corev1.PodTemplateSpec {
-	parser := helper.NewBackendRuntimeParser(backendRuntime)
+func buildWorkerTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) corev1.PodTemplateSpec {
+	parser := backendruntime.NewBackendRuntimeParser(backendRuntime, models, playground)
 
 	envs := parser.Envs()
 	if playground.Spec.BackendRuntimeConfig != nil {
 		envs = append(envs, playground.Spec.BackendRuntimeConfig.Envs...)
 	}
 
+	r := parser.Resources()
+	if r == nil {
+		r = &inferenceapi.ResourceRequirements{}
+	}
 	resources := corev1.ResourceRequirements{
-		Requests: parser.Resources().Requests,
-		Limits:   parser.Resources().Limits,
+		Requests: r.Requests,
+		Limits:   r.Limits,
 	}
 	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Resources != nil {
-		limits := util.MergeResources(playground.Spec.BackendRuntimeConfig.Resources.Limits, parser.Resources().Limits)
-		requests := util.MergeResources(playground.Spec.BackendRuntimeConfig.Resources.Requests, parser.Resources().Requests)
+		limits := util.MergeResources(playground.Spec.BackendRuntimeConfig.Resources.Limits, r.Limits)
+		requests := util.MergeResources(playground.Spec.BackendRuntimeConfig.Resources.Requests, r.Requests)
 
 		resources = corev1.ResourceRequirements{
 			Limits:   limits,
@@ -411,9 +427,9 @@ func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *in
 		}
 
 		// Make sure the limits are always greater than requests.
-		for k, v := range resources.Limits {
+		for k, v := range resources.Requests {
 			if k == corev1.ResourceCPU || k == corev1.ResourceMemory {
-				if v.Cmp(requests[k]) == -1 {
+				if v.Cmp(limits[k]) == 1 {
 					resources.Limits[k] = requests[k]
 				}
 			}
@@ -441,14 +457,18 @@ func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *in
 		},
 	}
 
-	// construct /dev/shm size
+	sharedMemorySize := parser.SharedMemorySize()
 	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.SharedMemorySize != nil {
+		sharedMemorySize = playground.Spec.BackendRuntimeConfig.SharedMemorySize
+	}
+	if sharedMemorySize != nil {
+		// construct /dev/shm size
 		template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{
 			Name: "dshm",
 			VolumeSource: corev1.VolumeSource{
 				EmptyDir: &corev1.EmptyDirVolumeSource{
 					Medium:    corev1.StorageMediumMemory,
-					SizeLimit: playground.Spec.BackendRuntimeConfig.SharedMemorySize,
+					SizeLimit: sharedMemorySize,
 				},
 			},
 		})
@@ -564,33 +584,28 @@ func setControllerReferenceForService(owner metav1.Object, saf *inferenceclientg
 }
 
 // buildScalingConfiguration supports HPA only now.
-func buildScalingConfiguration(playground *inferenceapi.Playground, backend *inferenceapi.BackendRuntime) *autoscalingv2.HorizontalPodAutoscaler {
+func buildScalingConfiguration(models []*coreapi.OpenModel, playground *inferenceapi.Playground, backend *inferenceapi.BackendRuntime) *autoscalingv2.HorizontalPodAutoscaler {
 	if playground.Spec.ElasticConfig == nil {
 		return nil
 	}
 
-	// Handle HPA.
-	if playground.Spec.ElasticConfig.ScaleTrigger != nil && playground.Spec.ElasticConfig.ScaleTrigger.HPA != nil {
+	// Prefer the playground config.
+	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.ScaleTrigger != nil {
 		hpa := newHPA(playground)
-		hpa.Spec.Metrics = playground.Spec.ElasticConfig.ScaleTrigger.HPA.Metrics
-		hpa.Spec.Behavior = playground.Spec.ElasticConfig.ScaleTrigger.HPA.Behavior
+		hpa.Spec.Metrics = playground.Spec.BackendRuntimeConfig.ScaleTrigger.HPA.Metrics
+		hpa.Spec.Behavior = playground.Spec.BackendRuntimeConfig.ScaleTrigger.HPA.Behavior
 		return hpa
+
 	}
 
-	if len(backend.Spec.ScaleTriggers) > 0 {
-		hpa := newHPA(playground)
-		if playground.Spec.ElasticConfig.ScaleTriggerRef != nil {
-			for _, trigger := range backend.Spec.ScaleTriggers {
-				if trigger.Name == playground.Spec.ElasticConfig.ScaleTriggerRef.Name {
-					hpa.Spec.Metrics = trigger.HPA.Metrics
-					hpa.Spec.Behavior = trigger.HPA.Behavior
-					return hpa
-				}
-			}
-		} else {
-			// use the 0-index as the default value.
-			hpa.Spec.Metrics = backend.Spec.ScaleTriggers[0].HPA.Metrics
-			hpa.Spec.Behavior = backend.Spec.ScaleTriggers[0].HPA.Behavior
+	_, multiHost := helper.MultiHostInference(models[0], playground)
+	mode := helper.DetectArgFrom(playground, multiHost)
+
+	for _, recommend := range backend.Spec.RecommendedConfigs {
+		if recommend.Name == mode && recommend.ScaleTrigger != nil {
+			hpa := newHPA(playground)
+			hpa.Spec.Metrics = recommend.ScaleTrigger.HPA.Metrics
+			hpa.Spec.Behavior = recommend.ScaleTrigger.HPA.Behavior
 			return hpa
 		}
 	}
diff --git a/pkg/controller/inference/service_controller.go b/pkg/controller/inference/service_controller.go
index 3aae6e8e..cc0de1ec 100644
--- a/pkg/controller/inference/service_controller.go
+++ b/pkg/controller/inference/service_controller.go
@@ -45,7 +45,7 @@ import (
 	coreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	helper "github.com/inftyai/llmaz/pkg/controller_helper"
-	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/model_source"
+	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/modelsource"
 	"github.com/inftyai/llmaz/pkg/util"
 )
 
diff --git a/pkg/controller_helper/backendruntime.go b/pkg/controller_helper/backendruntime/backendruntime.go
similarity index 66%
rename from pkg/controller_helper/backendruntime.go
rename to pkg/controller_helper/backendruntime/backendruntime.go
index bdb8bfba..2c1eb021 100644
--- a/pkg/controller_helper/backendruntime.go
+++ b/pkg/controller_helper/backendruntime/backendruntime.go
@@ -22,19 +22,33 @@ import (
 	"strings"
 
 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 
 	coreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
-	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/model_source"
+	helper "github.com/inftyai/llmaz/pkg/controller_helper"
+	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/modelsource"
 )
 
 // TODO: add unit tests.
 type BackendRuntimeParser struct {
-	backendRuntime *inferenceapi.BackendRuntime
+	backendRuntime      *inferenceapi.BackendRuntime
+	models              []*coreapi.OpenModel
+	playground          *inferenceapi.Playground
+	recommendConfigName string
+	multiHost           bool
 }
 
-func NewBackendRuntimeParser(backendRuntime *inferenceapi.BackendRuntime) *BackendRuntimeParser {
-	return &BackendRuntimeParser{backendRuntime}
+func NewBackendRuntimeParser(backendRuntime *inferenceapi.BackendRuntime, models []*coreapi.OpenModel, playground *inferenceapi.Playground) *BackendRuntimeParser {
+	_, multiHost := helper.MultiHostInference(models[0], playground)
+	name := helper.RecommendedConfigName(playground, multiHost)
+	return &BackendRuntimeParser{
+		backendRuntime,
+		models,
+		playground,
+		name,
+		multiHost,
+	}
 }
 
 func (p *BackendRuntimeParser) Commands() []string {
@@ -59,16 +73,8 @@ func (p *BackendRuntimeParser) Envs() []corev1.EnvVar {
 	return p.backendRuntime.Spec.Envs
 }
 
-func (p *BackendRuntimeParser) Args(playground *inferenceapi.Playground, models []*coreapi.OpenModel, multiNodes bool) ([]string, error) {
-	var argName string
-	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Args != nil {
-		argName = *playground.Spec.BackendRuntimeConfig.Args.Name
-	} else {
-		// Auto detect the args from model roles.
-		argName = DetectArgFrom(playground, multiNodes)
-	}
-
-	mainModel := models[0]
+func (p *BackendRuntimeParser) Args() ([]string, error) {
+	mainModel := p.models[0]
 
 	source := modelSource.NewModelSourceProvider(mainModel)
 	modelInfo := map[string]string{
@@ -76,8 +82,8 @@ func (p *BackendRuntimeParser) Args(playground *inferenceapi.Playground, models
 		"ModelName": source.ModelName(),
 	}
 
-	if multiNodes {
-		flavors := FirstAssignedFlavor(mainModel, playground)
+	if p.multiHost {
+		flavors := helper.FirstAssignedFlavor(mainModel, p.playground)
 		if len(flavors) > 0 {
 			modelInfo["PP"] = flavors[0].Params["PP"]
 			modelInfo["TP"] = flavors[0].Params["TP"]
@@ -86,13 +92,13 @@ func (p *BackendRuntimeParser) Args(playground *inferenceapi.Playground, models
 
 	// TODO: This is not that reliable because two models doesn't always means speculative-decoding.
 	// Revisit this later.
-	if len(models) > 1 {
-		modelInfo["DraftModelPath"] = modelSource.NewModelSourceProvider(models[1]).ModelPath()
+	if len(p.models) > 1 {
+		modelInfo["DraftModelPath"] = modelSource.NewModelSourceProvider(p.models[1]).ModelPath()
 	}
 
-	for _, arg := range p.backendRuntime.Spec.Args {
-		if *arg.Name == argName {
-			return renderFlags(arg.Flags, modelInfo)
+	for _, recommend := range p.backendRuntime.Spec.RecommendedConfigs {
+		if recommend.Name == p.recommendConfigName {
+			return renderFlags(recommend.Args, modelInfo)
 		}
 	}
 
@@ -108,8 +114,23 @@ func (p *BackendRuntimeParser) Version() string {
 	return p.backendRuntime.Spec.Version
 }
 
-func (p *BackendRuntimeParser) Resources() inferenceapi.ResourceRequirements {
-	return p.backendRuntime.Spec.Resources
+func (p *BackendRuntimeParser) Resources() *inferenceapi.ResourceRequirements {
+	for _, recommend := range p.backendRuntime.Spec.RecommendedConfigs {
+		if recommend.Name == p.recommendConfigName {
+			return recommend.Resources
+		}
+	}
+	// We should not reach here.
+	return nil
+}
+
+func (p *BackendRuntimeParser) SharedMemorySize() *resource.Quantity {
+	for _, recommend := range p.backendRuntime.Spec.RecommendedConfigs {
+		if recommend.Name == p.recommendConfigName {
+			return recommend.SharedMemorySize
+		}
+	}
+	return nil
 }
 
 func renderFlags(flags []string, modelInfo map[string]string) ([]string, error) {
diff --git a/pkg/controller_helper/backendruntime_test.go b/pkg/controller_helper/backendruntime/backendruntime_test.go
similarity index 100%
rename from pkg/controller_helper/backendruntime_test.go
rename to pkg/controller_helper/backendruntime/backendruntime_test.go
diff --git a/pkg/controller_helper/helper.go b/pkg/controller_helper/helper.go
index 99cad552..85699fbe 100644
--- a/pkg/controller_helper/helper.go
+++ b/pkg/controller_helper/helper.go
@@ -33,6 +33,18 @@ const (
 	ModelParallelismArg    string = "model-parallelism"
 )
 
+func RecommendedConfigName(playground *inferenceapi.Playground, multiNodes bool) string {
+	var name string
+	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.ConfigName != nil {
+		name = *playground.Spec.BackendRuntimeConfig.ConfigName
+	} else {
+		// Auto detect the args from model roles.
+		name = DetectArgFrom(playground, multiNodes)
+	}
+
+	return name
+}
+
 // DetectArgFrom wil auto detect the arg from model roles if not set explicitly.
 func DetectArgFrom(playground *inferenceapi.Playground, isMultiNodesInference bool) string {
 	if isMultiNodesInference {
@@ -91,7 +103,7 @@ func fetchModels(ctx context.Context, k8sClient client.Client, mrs []coreapi.Mod
 	return models, nil
 }
 
-// FirstAssignedFlavor will return the first assigned flavor of the model, always the 0-index flavor.
+// FirstAssignedFlavor will return the first assigned flavor of the model.
 func FirstAssignedFlavor(model *coreapi.OpenModel, playground *inferenceapi.Playground) []coreapi.Flavor {
 	var flavors []coreapi.FlavorName
 	if playground.Spec.ModelClaim != nil {
@@ -117,7 +129,7 @@ func FirstAssignedFlavor(model *coreapi.OpenModel, playground *inferenceapi.Play
 	return nil
 }
 
-// MultiHostInference returns two values, the first one is the TP size,
+// MultiHostInference returns two values, the first one is the PP size,
 // the second one is whether this is a multi-host inference.
 func MultiHostInference(model *coreapi.OpenModel, playground *inferenceapi.Playground) (int32, bool) {
 	flavors := FirstAssignedFlavor(model, playground)
diff --git a/pkg/controller_helper/model_source/modelhub.go b/pkg/controller_helper/modelsource/modelhub.go
similarity index 100%
rename from pkg/controller_helper/model_source/modelhub.go
rename to pkg/controller_helper/modelsource/modelhub.go
diff --git a/pkg/controller_helper/model_source/modelsource.go b/pkg/controller_helper/modelsource/modelsource.go
similarity index 100%
rename from pkg/controller_helper/model_source/modelsource.go
rename to pkg/controller_helper/modelsource/modelsource.go
diff --git a/pkg/controller_helper/model_source/modelsource_test.go b/pkg/controller_helper/modelsource/modelsource_test.go
similarity index 100%
rename from pkg/controller_helper/model_source/modelsource_test.go
rename to pkg/controller_helper/modelsource/modelsource_test.go
diff --git a/pkg/controller_helper/model_source/uri.go b/pkg/controller_helper/modelsource/uri.go
similarity index 100%
rename from pkg/controller_helper/model_source/uri.go
rename to pkg/controller_helper/modelsource/uri.go
diff --git a/pkg/webhook/backendruntime_webhook.go b/pkg/webhook/backendruntime_webhook.go
index babd1b76..3b7c3519 100644
--- a/pkg/webhook/backendruntime_webhook.go
+++ b/pkg/webhook/backendruntime_webhook.go
@@ -78,20 +78,25 @@ func (w *BackendRuntimeWebhook) generateValidate(obj runtime.Object) field.Error
 	var allErrs field.ErrorList
 
 	// Validate resources.
-	for k, v := range backend.Spec.Resources.Limits {
-		if requestV, ok := backend.Spec.Resources.Requests[k]; ok {
-			if v.Cmp(requestV) == -1 {
-				allErrs = append(allErrs, field.Forbidden(specPath.Child("resources"), fmt.Sprintf("resource limit of %s is less than resource request", k)))
+	for _, recommend := range backend.Spec.RecommendedConfigs {
+		if recommend.Resources == nil {
+			continue
+		}
+		for k, v := range recommend.Resources.Limits {
+			if requestV, ok := recommend.Resources.Requests[k]; ok {
+				if v.Cmp(requestV) == -1 {
+					allErrs = append(allErrs, field.Forbidden(specPath.Child("resources"), fmt.Sprintf("resource limit of %s is less than resource request", k)))
+				}
 			}
 		}
 	}
 
 	names := []string{}
-	for _, arg := range backend.Spec.Args {
-		if util.In(names, *arg.Name) {
-			allErrs = append(allErrs, field.Forbidden(specPath.Child("args", "name"), fmt.Sprintf("duplicated name %s", *arg.Name)))
+	for _, recommend := range backend.Spec.RecommendedConfigs {
+		if util.In(names, recommend.Name) {
+			allErrs = append(allErrs, field.Forbidden(specPath.Child("args", "name"), fmt.Sprintf("duplicated name %s", recommend.Name)))
 		}
-		names = append(names, *arg.Name)
+		names = append(names, recommend.Name)
 	}
 	return allErrs
 }
diff --git a/pkg/webhook/openmodel_webhook.go b/pkg/webhook/openmodel_webhook.go
index 9ede0ce5..d78c02b7 100644
--- a/pkg/webhook/openmodel_webhook.go
+++ b/pkg/webhook/openmodel_webhook.go
@@ -27,7 +27,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
 
 	coreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
-	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/model_source"
+	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/modelsource"
 	"github.com/inftyai/llmaz/pkg/util"
 )
 
diff --git a/pkg/webhook/playground_webhook.go b/pkg/webhook/playground_webhook.go
index 53c25839..47ac0ff3 100644
--- a/pkg/webhook/playground_webhook.go
+++ b/pkg/webhook/playground_webhook.go
@@ -141,7 +141,7 @@ func (w *PlaygroundWebhook) generateValidate(obj runtime.Object) field.ErrorList
 	}
 
 	if playground.Spec.ElasticConfig != nil {
-		if *playground.Spec.ElasticConfig.MinReplicas == 0 {
+		if playground.Spec.ElasticConfig.MinReplicas != nil && *playground.Spec.ElasticConfig.MinReplicas == 0 {
 			allErrs = append(allErrs, field.Forbidden(specPath.Child("elasticConfig.minReplicas"), "minReplicas couldn't be 0"))
 		}
 
@@ -150,11 +150,11 @@ func (w *PlaygroundWebhook) generateValidate(obj runtime.Object) field.ErrorList
 				allErrs = append(allErrs, field.Invalid(specPath.Child("elasticConfig.scaleTrigger.hpa"), *playground.Spec.ElasticConfig.MinReplicas, "minReplicas must be less than maxReplicas"))
 			}
 		}
+	}
 
-		if playground.Spec.ElasticConfig.ScaleTrigger != nil {
-			if playground.Spec.ElasticConfig.ScaleTrigger.HPA == nil {
-				allErrs = append(allErrs, field.Forbidden(specPath.Child("elasticConfig.scaleTrigger.hpa"), "hpa couldn't be nil"))
-			}
+	if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.ScaleTrigger != nil {
+		if playground.Spec.BackendRuntimeConfig.ScaleTrigger.HPA == nil {
+			allErrs = append(allErrs, field.Forbidden(specPath.Child("backendRuntime.scaleTrigger.hpa"), "hpa couldn't be nil"))
 		}
 	}
 
diff --git a/pkg/webhook/service_webhook.go b/pkg/webhook/service_webhook.go
index fd21b2e5..008dc820 100644
--- a/pkg/webhook/service_webhook.go
+++ b/pkg/webhook/service_webhook.go
@@ -28,7 +28,7 @@ import (
 
 	coreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
-	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/model_source"
+	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/modelsource"
 )
 
 type ServiceWebhook struct{}
diff --git a/test/config/backends/fake_backend.yaml b/test/config/backends/fake_backend.yaml
index 250f1621..ba573040 100644
--- a/test/config/backends/fake_backend.yaml
+++ b/test/config/backends/fake_backend.yaml
@@ -13,42 +13,33 @@ spec:
     - echo "hello"
   image: busybox
   version: latest
-  args:
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - mode
         - "default"
+      resources:
+        requests:
+          cpu: 4
+          memory: 8Gi
+        limits:
+          cpu: 4
+          memory: 8Gi
+      sharedMemorySize: 1Gi
+      scaleTrigger:
+        hpa:
+          metrics:
+            - type: Resource
+              resource:
+                name: cpu
+                target:
+                  type: Utilization
+                  averageUtilization: 20
     - name: speculative-decoding
-      flags:
+      args:
         - mode
         - "speculative-decoding"
     - name: fuz
-      flags:
+      args:
         - mode
         - "fuz"
-  resources:
-    requests:
-      cpu: 4
-      memory: 8Gi
-    limits:
-      cpu: 4
-      memory: 8Gi
-  scaleTriggers:
-    - name: hpa
-      hpa:
-        metrics:
-          - type: Resource
-            resource:
-              name: cpu
-              target:
-                type: Utilization
-                averageUtilization: 50
-    - name: hpa2
-      hpa:
-        metrics:
-          - type: Resource
-            resource:
-              name: cpu
-              target:
-                type: Utilization
-                averageUtilization: 80
diff --git a/test/config/backends/llamacpp.yaml b/test/config/backends/llamacpp.yaml
index ea4554e4..ae3d04b9 100644
--- a/test/config/backends/llamacpp.yaml
+++ b/test/config/backends/llamacpp.yaml
@@ -11,18 +11,27 @@ spec:
     - ./llama-server
   image: ghcr.io/ggerganov/llama.cpp
   version: server
-  args:
+  # Do not edit the preset argument name unless you know what you're doing.
+  # Free to add more arguments with your requirements.
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - -m
         - "{{ .ModelPath }}"
         - --host
         - "0.0.0.0"
         - --port
         - "8080"
+      resources:
+        requests:
+          cpu: 2
+          memory: 4Gi
+        limits:
+          cpu: 2
+          memory: 4Gi
     # TODO: not supported yet, see https://github.com/InftyAI/llmaz/issues/240.
     # - name: speculative-decoding
-    #   flags:
+    #   args:
     #     - -m
     #     - "{{ .ModelPath }}"
     #     - -md
@@ -35,13 +44,6 @@ spec:
     #     - "16"
     #     - --draft-min
     #     - "5"
-  resources:
-    requests:
-      cpu: 2
-      memory: 4Gi
-    limits:
-      cpu: 2
-      memory: 4Gi
   startupProbe:
     periodSeconds: 10
     failureThreshold: 30
diff --git a/test/config/backends/ollama.yaml b/test/config/backends/ollama.yaml
index d5e347b0..35f93e79 100644
--- a/test/config/backends/ollama.yaml
+++ b/test/config/backends/ollama.yaml
@@ -12,18 +12,20 @@ spec:
     - -c
   image: ollama/ollama
   version: latest
-  args:
+  # Do not edit the preset argument name unless you know what you're doing.
+  # Free to add more arguments with your requirements.
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - "ollama serve &
           while true; do output=$(ollama list 2>&1);
           if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;
-          ollama run {{`{{ .ModelName }}`}};
+          ollama run {{ .ModelName }};
           while true;do sleep 60;done"
-  resources:
-    requests:
-      cpu: 2
-      memory: 4Gi
-    limits:
-      cpu: 2
-      memory: 4Gi
+      resources:
+        requests:
+          cpu: 2
+          memory: 4Gi
+        limits:
+          cpu: 2
+          memory: 4Gi
diff --git a/test/config/backends/sglang.yaml b/test/config/backends/sglang.yaml
index 3eb4fab7..6a8dc60d 100644
--- a/test/config/backends/sglang.yaml
+++ b/test/config/backends/sglang.yaml
@@ -13,9 +13,11 @@ spec:
     - sglang.launch_server
   image: lmsysorg/sglang
   version: v0.2.10-cu121
-  args:
+  # Do not edit the preset argument name unless you know what you're doing.
+  # Free to add more arguments with your requirements.
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - --model-path
         - "{{ .ModelPath }}"
         - --served-model-name
@@ -24,13 +26,13 @@ spec:
         - "0.0.0.0"
         - --port
         - "8080"
-  resources:
-    requests:
-      cpu: 4
-      memory: 8Gi
-    limits:
-      cpu: 4
-      memory: 8Gi
+      resources:
+        requests:
+          cpu: 4
+          memory: 8Gi
+        limits:
+          cpu: 4
+          memory: 8Gi
   startupProbe:
     periodSeconds: 10
     failureThreshold: 30
diff --git a/test/config/backends/tgi.yaml b/test/config/backends/tgi.yaml
index 75235192..b16eed65 100644
--- a/test/config/backends/tgi.yaml
+++ b/test/config/backends/tgi.yaml
@@ -11,20 +11,20 @@ spec:
   version: 2.3.1
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
-  args:
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - --model-id
         - "{{ .ModelPath }}"
         - --port
         - "8080"
-  resources:
-    requests:
-      cpu: 4
-      memory: 8Gi
-    limits:
-      cpu: 4
-      memory: 8Gi
+      resources:
+        requests:
+          cpu: 4
+          memory: 8Gi
+        limits:
+          cpu: 4
+          memory: 8Gi
   startupProbe:
     periodSeconds: 10
     failureThreshold: 30
diff --git a/test/config/backends/vllm.yaml b/test/config/backends/vllm.yaml
index 7ecbd873..35e6cf67 100644
--- a/test/config/backends/vllm.yaml
+++ b/test/config/backends/vllm.yaml
@@ -58,9 +58,9 @@ spec:
   version: v0.6.0
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
-  args:
+  recommendedConfigs:
     - name: default
-      flags:
+      args:
         - --model
         - "{{ .ModelPath }}"
         - --served-model-name
@@ -69,8 +69,15 @@ spec:
         - "0.0.0.0"
         - --port
         - "8080"
+      resources:
+        requests:
+          cpu: 4
+          memory: 8Gi
+        limits:
+          cpu: 4
+          memory: 8Gi
     - name: speculative-decoding
-      flags:
+      args:
         - --model
         - "{{ .ModelPath }}"
         - --served-model-name
@@ -86,7 +93,7 @@ spec:
         - -tp
         - "1"
     - name: model-parallelism
-      flags:
+      args:
         - --model
         - "{{ .ModelPath }}"
         - --served-model-name
@@ -99,13 +106,6 @@ spec:
         - "{{ .TP }}"
         - --pipeline-parallel-size
         - "{{ .PP }}"
-  resources:
-    requests:
-      cpu: 4
-      memory: 8Gi
-    limits:
-      cpu: 4
-      memory: 8Gi
   startupProbe:
     periodSeconds: 10
     failureThreshold: 30
diff --git a/test/e2e/playground_test.go b/test/e2e/playground_test.go
index 3640df92..c3c05938 100644
--- a/test/e2e/playground_test.go
+++ b/test/e2e/playground_test.go
@@ -54,7 +54,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
 			Image("ollama/ollama").Version("latest").
 			Command([]string{"sh", "-c"}).
 			Arg("default", []string{"ollama serve & while true;do output=$(ollama list 2>&1);if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;ollama run {{.ModelName}};while true;do sleep 60;done"}).
-			Request("cpu", "2").Request("memory", "4Gi").Limit("cpu", "4").Limit("memory", "4Gi").Obj()
+			Request("default", "cpu", "2").Request("default", "memory", "4Gi").Limit("default", "cpu", "4").Limit("default", "memory", "4Gi").Obj()
 		gomega.Expect(k8sClient.Create(ctx, backendRuntime)).To(gomega.Succeed())
 
 		model := wrapper.MakeModel("qwen2-0--5b").FamilyName("qwen2").ModelSourceWithURI("ollama://qwen2:0.5b").Obj()
@@ -91,7 +91,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
 			Image("ghcr.io/ggerganov/llama.cpp").Version("server").
 			Command([]string{"./llama-server"}).
 			Arg("default", []string{"-m", "{{.ModelPath}}", "--host", "0.0.0.0", "--port", "8080"}).
-			Request("cpu", "2").Request("memory", "4Gi").Limit("cpu", "4").Limit("memory", "4Gi").Obj()
+			Request("default", "cpu", "2").Request("default", "memory", "4Gi").Limit("default", "cpu", "4").Limit("default", "memory", "4Gi").Obj()
 		gomega.Expect(k8sClient.Create(ctx, backendRuntime)).To(gomega.Succeed())
 
 		model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj()
diff --git a/test/integration/controller/inference/hpa_test.go b/test/integration/controller/inference/hpa_test.go
index 57a40993..afcd3b82 100644
--- a/test/integration/controller/inference/hpa_test.go
+++ b/test/integration/controller/inference/hpa_test.go
@@ -98,7 +98,7 @@ var _ = ginkgo.Describe("hpa test", func() {
 							if err := k8sClient.Get(ctx, types.NamespacedName{Name: playground.Name, Namespace: playground.Namespace}, hpa); err != nil {
 								return err
 							}
-							if diff := cmp.Diff(playground.Spec.ElasticConfig.ScaleTrigger.HPA.Metrics, hpa.Spec.Metrics); diff != "" {
+							if diff := cmp.Diff(playground.Spec.BackendRuntimeConfig.ScaleTrigger.HPA.Metrics, hpa.Spec.Metrics); diff != "" {
 								return fmt.Errorf("metrics not match: %s", diff)
 							}
 							return nil
@@ -129,7 +129,7 @@ var _ = ginkgo.Describe("hpa test", func() {
 							if err := k8sClient.Get(ctx, types.NamespacedName{Name: "fake-backend"}, backend); err != nil {
 								return err
 							}
-							if diff := cmp.Diff(backend.Spec.ScaleTriggers[0].HPA.Metrics, hpa.Spec.Metrics); diff != "" {
+							if diff := cmp.Diff(backend.Spec.RecommendedConfigs[0].ScaleTrigger.HPA.Metrics, hpa.Spec.Metrics); diff != "" {
 								return fmt.Errorf("metrics not match: %s", diff)
 							}
 							return nil
@@ -141,7 +141,8 @@ var _ = ginkgo.Describe("hpa test", func() {
 		ginkgo.Entry("playground with scaleTrigger overwrite backendRuntime's", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					ElasticConfig(1, 3).ScaleTriggerRef("hpa2").
+					ElasticConfig(1, 3).
+					HPA(util.MockASimpleHPATrigger()).
 					BackendRuntime("fake-backend").
 					Obj()
 			},
@@ -160,7 +161,7 @@ var _ = ginkgo.Describe("hpa test", func() {
 							if err := k8sClient.Get(ctx, types.NamespacedName{Name: "fake-backend"}, backend); err != nil {
 								return err
 							}
-							if diff := cmp.Diff(backend.Spec.ScaleTriggers[1].HPA.Metrics, hpa.Spec.Metrics); diff != "" {
+							if diff := cmp.Diff(util.MockASimpleHPATrigger().Metrics, hpa.Spec.Metrics); diff != "" {
 								return fmt.Errorf("metrics not match: %s", diff)
 							}
 							return nil
diff --git a/test/integration/controller/inference/playground_test.go b/test/integration/controller/inference/playground_test.go
index d62de815..fc7d7a97 100644
--- a/test/integration/controller/inference/playground_test.go
+++ b/test/integration/controller/inference/playground_test.go
@@ -194,7 +194,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with sglang", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("sglang").BackendRuntimeVersion("main").BackendRuntimeArgs("default", []string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("sglang").BackendRuntimeVersion("main").BackendRuntimeArgs([]string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -222,7 +222,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with llamacpp", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("llamacpp").BackendRuntimeVersion("main").BackendRuntimeArgs("default", []string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("llamacpp").BackendRuntimeVersion("main").BackendRuntimeArgs([]string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -250,7 +250,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with tgi", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("tgi").BackendRuntimeVersion("main").BackendRuntimeArgs("default", []string{"--model-id", "Qwen/Qwen2-0.5B-Instruct"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("tgi").BackendRuntimeVersion("main").BackendRuntimeArgs([]string{"--model-id", "Qwen/Qwen2-0.5B-Instruct"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -278,7 +278,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with ollama", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("ollama").BackendRuntimeVersion("main").BackendRuntimeArgs("default", []string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("ollama").BackendRuntimeVersion("main").BackendRuntimeArgs([]string{"--foo", "bar"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -306,7 +306,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("advance configured Playground with argName set", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(model.Name).Label(coreapi.ModelNameLabelKey, model.Name).
-					BackendRuntime("fake-backend").BackendRuntimeVersion("main").BackendRuntimeArgs("fuz", []string{"--model-id", "Qwen/Qwen2-0.5B-Instruct"}).BackendRuntimeEnv("FOO", "BAR").
+					BackendRuntime("fake-backend").BackendRuntimeVersion("main").BackendRuntimeArgs([]string{"--model-id", "Qwen/Qwen2-0.5B-Instruct"}).BackendRuntimeEnv("FOO", "BAR").
 					BackendRuntimeRequest("cpu", "1").BackendRuntimeLimit("cpu", "10").
 					Obj()
 			},
@@ -468,7 +468,7 @@ var _ = ginkgo.Describe("playground controller test", func() {
 		ginkgo.Entry("Playground with shared memory size configured", &testValidatingCase{
 			makePlayground: func() *inferenceapi.Playground {
 				return wrapper.MakePlayground("playground", ns.Name).ModelClaim(multiNodesModel.Name).Label(coreapi.ModelNameLabelKey, multiNodesModel.Name).
-					SharedMemorySize("1Gi").
+					SharedMemorySize("2Gi").
 					Obj()
 			},
 			updates: []*update{
diff --git a/test/integration/webhook/backendruntime_test.go b/test/integration/webhook/backendruntime_test.go
index 220dc2ad..92dfea4f 100644
--- a/test/integration/webhook/backendruntime_test.go
+++ b/test/integration/webhook/backendruntime_test.go
@@ -23,6 +23,7 @@ import (
 
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	"github.com/inftyai/llmaz/test/util"
+	"github.com/inftyai/llmaz/test/util/wrapper"
 )
 
 var _ = ginkgo.Describe("BackendRuntime default and validation", func() {
@@ -74,7 +75,7 @@ var _ = ginkgo.Describe("BackendRuntime default and validation", func() {
 		}),
 		ginkgo.Entry("BackendRuntime creation with limits less than requests", &testValidatingCase{
 			creationFunc: func() *inferenceapi.BackendRuntime {
-				return util.MockASampleBackendRuntime().Limit("cpu", "1").Obj()
+				return util.MockASampleBackendRuntime().Limit("default", "cpu", "1").Obj()
 			},
 			createFailed: true,
 		}),
@@ -84,11 +85,15 @@ var _ = ginkgo.Describe("BackendRuntime default and validation", func() {
 			},
 			createFailed: false,
 		}),
-		ginkgo.Entry("BackendRuntime creation with duplicated argument name", &testValidatingCase{
+		ginkgo.Entry("BackendRuntime creation with no resources", &testValidatingCase{
 			creationFunc: func() *inferenceapi.BackendRuntime {
-				return util.MockASampleBackendRuntime().Arg("default", []string{"foo", "bar"}).Obj()
+				return wrapper.MakeBackendRuntime("vllm").
+					Image("vllm/vllm-openai").Version("v0.6.0").
+					Command([]string{"python3", "-m", "vllm.entrypoints.openai.api_server"}).
+					Arg("default", []string{"--model", "{{.ModelPath}}", "--served-model-name", "{{.ModelName}}", "--host", "0.0.0.0", "--port", "8080"}).
+					Obj()
 			},
-			createFailed: true,
+			createFailed: false,
 		}),
 	)
 })
diff --git a/test/util/mock.go b/test/util/mock.go
index 9a19c161..5069bc98 100644
--- a/test/util/mock.go
+++ b/test/util/mock.go
@@ -56,7 +56,7 @@ func MockASampleBackendRuntime() *wrapper.BackendRuntimeWrapper {
 		Image("vllm/vllm-openai").Version("v0.6.0").
 		Command([]string{"python3", "-m", "vllm.entrypoints.openai.api_server"}).
 		Arg("default", []string{"--model", "{{.ModelPath}}", "--served-model-name", "{{.ModelName}}", "--host", "0.0.0.0", "--port", "8080"}).
-		Request("cpu", "4").Limit("cpu", "4")
+		Request("default", "cpu", "4").Limit("default", "cpu", "4")
 }
 
 func MockASimpleHPATrigger() *inferenceapi.HPATrigger {
diff --git a/test/util/validation/validate_playground.go b/test/util/validation/validate_playground.go
index 4bfb1fc5..9617651b 100644
--- a/test/util/validation/validate_playground.go
+++ b/test/util/validation/validate_playground.go
@@ -33,7 +33,8 @@ import (
 	coreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	helper "github.com/inftyai/llmaz/pkg/controller_helper"
-	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/model_source"
+	backendruntime "github.com/inftyai/llmaz/pkg/controller_helper/backendruntime"
+	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/modelsource"
 	pkgutil "github.com/inftyai/llmaz/pkg/util"
 	"github.com/inftyai/llmaz/test/util"
 	"github.com/inftyai/llmaz/test/util/format"
@@ -90,15 +91,15 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
 		}
 
 		backendRuntimeName := inferenceapi.DefaultBackend
-		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Name != nil {
-			backendRuntimeName = *playground.Spec.BackendRuntimeConfig.Name
+		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.BackendName != nil {
+			backendRuntimeName = *playground.Spec.BackendRuntimeConfig.BackendName
 		}
 		backendRuntime := inferenceapi.BackendRuntime{}
 		if err := k8sClient.Get(ctx, types.NamespacedName{Name: string(backendRuntimeName)}, &backendRuntime); err != nil {
 			return errors.New("failed to get backendRuntime")
 		}
 
-		parser := helper.NewBackendRuntimeParser(&backendRuntime)
+		parser := backendruntime.NewBackendRuntimeParser(&backendRuntime, models, playground)
 		multiHost := service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate != nil
 
 		if service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Name != modelSource.MODEL_RUNNER_CONTAINER_NAME {
@@ -110,144 +111,98 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
 			}
 		}
 
-		// compare the same part of leader and worker template, image, version, env, resources.
-		if playground.Spec.BackendRuntimeConfig != nil {
+		// compare fields both backendRuntime and playground can configure.
 
-			// compare image & version
-			if playground.Spec.BackendRuntimeConfig.Version != nil {
-				if parser.Image(*playground.Spec.BackendRuntimeConfig.Version) != service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image {
-					return fmt.Errorf("expected container image %s, got %s", parser.Image(*playground.Spec.BackendRuntimeConfig.Version), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image)
-				}
-				if multiHost {
-					if parser.Image(*playground.Spec.BackendRuntimeConfig.Version) != service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Image {
-						return fmt.Errorf("expected container image %s, got %s", parser.Image(*playground.Spec.BackendRuntimeConfig.Version), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Image)
-					}
-				}
-			} else {
-				if parser.Image(parser.Version()) != service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image {
-					return fmt.Errorf("expected container image %s, got %s", parser.Image(parser.Version()), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image)
-				}
-				if multiHost {
-					if parser.Image(parser.Version()) != service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Image {
-						return fmt.Errorf("expected container image %s, got %s", parser.Image(parser.Version()), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Image)
-					}
+		sharedMemorySize := parser.SharedMemorySize()
+		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.SharedMemorySize != nil {
+			sharedMemorySize = playground.Spec.BackendRuntimeConfig.SharedMemorySize
+		}
+		if sharedMemorySize != nil {
+			if multiHost {
+				if *sharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit {
+					return fmt.Errorf("expected SharedMemorySize %s, got %s", sharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String())
 				}
 			}
-
-			if playground.Spec.BackendRuntimeConfig.Envs != nil {
-				if diff := cmp.Diff(service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Env, playground.Spec.BackendRuntimeConfig.Envs); diff != "" {
-					return fmt.Errorf("unexpected envs")
-				}
-				if multiHost {
-					if diff := cmp.Diff(service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Env, playground.Spec.BackendRuntimeConfig.Envs); diff != "" {
-						return fmt.Errorf("unexpected envs")
-					}
-				}
+			if *sharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit {
+				return fmt.Errorf("expected SharedMemorySize %s, got %s", sharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String())
 			}
+		}
 
-			if playground.Spec.BackendRuntimeConfig.Resources != nil {
-				for k, v := range playground.Spec.BackendRuntimeConfig.Resources.Limits {
-					if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Limits[k].Equal(v) {
-						return fmt.Errorf("unexpected limits for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Limits[k])
-					}
-					if multiHost {
-						if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Limits[k].Equal(v) {
-							return fmt.Errorf("unexpected limits for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Limits[k])
-						}
-					}
-				}
-				for k, v := range playground.Spec.BackendRuntimeConfig.Resources.Requests {
-					if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Requests[k].Equal(v) {
-						return fmt.Errorf("unexpected requests for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Requests[k])
-					}
-					if multiHost {
-						if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Requests[k].Equal(v) {
-							return fmt.Errorf("unexpected requests for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Requests[k])
-						}
-					}
-				}
-			} else {
-				// Validate default resources requirements.
-				for k, v := range parser.Resources().Limits {
-					if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Limits[k].Equal(v) {
-						return fmt.Errorf("unexpected limit for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Limits[k])
-					}
-					if multiHost {
-						if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Limits[k].Equal(v) {
-							return fmt.Errorf("unexpected limit for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Limits[k])
-						}
-					}
-				}
-				for k, v := range parser.Resources().Requests {
-					if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Requests[k].Equal(v) {
-						return fmt.Errorf("unexpected limit for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Requests[k])
-					}
-					if multiHost {
-						if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Requests[k].Equal(v) {
-							return fmt.Errorf("unexpected limit for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Requests[k])
-						}
-					}
+		resources := parser.Resources()
+		if resources == nil {
+			resources = &inferenceapi.ResourceRequirements{}
+		}
+		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Resources != nil {
+			resources = playground.Spec.BackendRuntimeConfig.Resources
+		}
+		for k, v := range resources.Limits {
+			if multiHost {
+				if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Limits[k].Equal(v) {
+					return fmt.Errorf("unexpected limits for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Limits[k])
 				}
 			}
-
-			// compare probes
-			if backendRuntime.Spec.StartupProbe != nil {
-				if multiHost {
-					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].StartupProbe, *backendRuntime.Spec.StartupProbe); diff != "" {
-						return fmt.Errorf("unexpected startupProbe")
-					}
-				} else {
-					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].StartupProbe, *backendRuntime.Spec.StartupProbe); diff != "" {
-						return fmt.Errorf("unexpected startupProbe")
-					}
-				}
+			if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Limits[k].Equal(v) {
+				return fmt.Errorf("unexpected limits for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Limits[k])
 			}
-			if backendRuntime.Spec.LivenessProbe != nil {
-				if multiHost {
-					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].LivenessProbe, *backendRuntime.Spec.LivenessProbe); diff != "" {
-						return fmt.Errorf("unexpected livenessProbe")
-					}
-				} else {
-					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].LivenessProbe, *backendRuntime.Spec.LivenessProbe); diff != "" {
-						return fmt.Errorf("unexpected livenessProbe")
-					}
+		}
+		for k, v := range resources.Requests {
+			if multiHost {
+				if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Requests[k].Equal(v) {
+					return fmt.Errorf("unexpected requests for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Resources.Requests[k])
 				}
 			}
-			if backendRuntime.Spec.ReadinessProbe != nil {
-				if multiHost {
-					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].ReadinessProbe, *backendRuntime.Spec.ReadinessProbe); diff != "" {
-						return fmt.Errorf("unexpected readinessProbe")
-					}
-				} else {
-					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].ReadinessProbe, *backendRuntime.Spec.ReadinessProbe); diff != "" {
-						return fmt.Errorf("unexpected readinessProbe")
-					}
-				}
+			if !service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Requests[k].Equal(v) {
+				return fmt.Errorf("unexpected requests for %s, want %v, got %v", k, v, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Resources.Requests[k])
 			}
 		}
 
-		// compare the different parts.
+		version := parser.Version()
+		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Version != nil {
+			version = *playground.Spec.BackendRuntimeConfig.Version
+		}
+		if parser.Image(version) != service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image {
+			return fmt.Errorf("expected container image %s, got %s", parser.Image(version), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image)
+		}
+		if multiHost {
+			if parser.Image(version) != service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Image {
+				return fmt.Errorf("expected container image %s, got %s", parser.Image(version), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Image)
+			}
+		}
 
-		args, err := parser.Args(playground, models, multiHost)
+		envs := parser.Envs()
+		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Envs != nil {
+			envs = playground.Spec.BackendRuntimeConfig.Envs
+		}
+		if diff := cmp.Diff(envs, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Env); diff != "" {
+			return fmt.Errorf("unexpected envs")
+		}
+		if multiHost {
+			if diff := cmp.Diff(envs, service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Env); diff != "" {
+				return fmt.Errorf("unexpected envs")
+			}
+		}
+
+		args, err := parser.Args()
 		if err != nil {
 			return err
 		}
 		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.Args != nil {
-			args = append(args, playground.Spec.BackendRuntimeConfig.Args.Flags...)
+			args = append(args, playground.Spec.BackendRuntimeConfig.Args...)
 		}
 
-		for _, arg := range args {
-			if multiHost {
-				if len(service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Args) != 0 {
-					return fmt.Errorf("args should be empty, but got: %v", service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Args)
-				}
-			} else {
+		if multiHost {
+			if len(service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Args) != 0 {
+				return fmt.Errorf("args should be empty, but got: %v", service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Args)
+			}
+		} else {
+			for _, arg := range args {
 				if !slices.Contains(service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Args, arg) {
 					return fmt.Errorf("didn't contain arg: %s", arg)
 				}
 			}
 		}
 
+		// compare commands
 		if multiHost {
 			if diff := cmp.Diff(pkgutil.MergeArgsWithCommands(parser.LeaderCommands(), args), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].Command); diff != "" {
 				return errors.New("command not right")
@@ -261,14 +216,39 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
 			}
 		}
 
-		if playground.Spec.BackendRuntimeConfig != nil && playground.Spec.BackendRuntimeConfig.SharedMemorySize != nil {
+		// compare fields only can be configured in backend.
+
+		if backendRuntime.Spec.StartupProbe != nil {
 			if multiHost {
-				if *playground.Spec.BackendRuntimeConfig.SharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit {
-					return fmt.Errorf("expected SharedMemorySize %s, got %s", playground.Spec.BackendRuntimeConfig.SharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String())
+				if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].StartupProbe, *backendRuntime.Spec.StartupProbe); diff != "" {
+					return fmt.Errorf("unexpected startupProbe")
+				}
+			} else {
+				if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].StartupProbe, *backendRuntime.Spec.StartupProbe); diff != "" {
+					return fmt.Errorf("unexpected startupProbe")
 				}
 			}
-			if *playground.Spec.BackendRuntimeConfig.SharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit {
-				return fmt.Errorf("expected SharedMemorySize %s, got %s", playground.Spec.BackendRuntimeConfig.SharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String())
+		}
+		if backendRuntime.Spec.LivenessProbe != nil {
+			if multiHost {
+				if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].LivenessProbe, *backendRuntime.Spec.LivenessProbe); diff != "" {
+					return fmt.Errorf("unexpected livenessProbe")
+				}
+			} else {
+				if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].LivenessProbe, *backendRuntime.Spec.LivenessProbe); diff != "" {
+					return fmt.Errorf("unexpected livenessProbe")
+				}
+			}
+		}
+		if backendRuntime.Spec.ReadinessProbe != nil {
+			if multiHost {
+				if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].ReadinessProbe, *backendRuntime.Spec.ReadinessProbe); diff != "" {
+					return fmt.Errorf("unexpected readinessProbe")
+				}
+			} else {
+				if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].ReadinessProbe, *backendRuntime.Spec.ReadinessProbe); diff != "" {
+					return fmt.Errorf("unexpected readinessProbe")
+				}
 			}
 		}
 
diff --git a/test/util/validation/validate_service.go b/test/util/validation/validate_service.go
index aaf65236..c66ecc43 100644
--- a/test/util/validation/validate_service.go
+++ b/test/util/validation/validate_service.go
@@ -33,7 +33,7 @@ import (
 	coreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	"github.com/inftyai/llmaz/pkg"
-	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/model_source"
+	modelSource "github.com/inftyai/llmaz/pkg/controller_helper/modelsource"
 	"github.com/inftyai/llmaz/test/util"
 )
 
diff --git a/test/util/wrapper/backend.go b/test/util/wrapper/backend.go
index 26faf2b8..7d4148a8 100644
--- a/test/util/wrapper/backend.go
+++ b/test/util/wrapper/backend.go
@@ -20,7 +20,6 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/utils/ptr"
 
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
 )
@@ -64,26 +63,79 @@ func (w *BackendRuntimeWrapper) Command(commands []string) *BackendRuntimeWrappe
 }
 
 func (w *BackendRuntimeWrapper) Arg(name string, flags []string) *BackendRuntimeWrapper {
-	w.Spec.Args = append(w.Spec.Args, inferenceapi.BackendRuntimeArg{
-		Name:  ptr.To[string](name),
-		Flags: flags,
-	})
+	if w.Spec.RecommendedConfigs == nil {
+		w.Spec.RecommendedConfigs = []inferenceapi.RecommendedConfig{
+			{
+				Name: name,
+			},
+		}
+	}
+	for _, recommend := range w.Spec.RecommendedConfigs {
+		if recommend.Name == name {
+			recommend.Args = flags
+			break
+		}
+	}
 	return w
 }
 
-func (w *BackendRuntimeWrapper) Request(r, v string) *BackendRuntimeWrapper {
-	if w.Spec.Resources.Requests == nil {
-		w.Spec.Resources.Requests = corev1.ResourceList{}
+func (w *BackendRuntimeWrapper) Request(name, r, v string) *BackendRuntimeWrapper {
+	if w.Spec.RecommendedConfigs == nil {
+		w.Spec.RecommendedConfigs = []inferenceapi.RecommendedConfig{
+			{
+				Name: name,
+			},
+		}
+	}
+	for i, recommend := range w.Spec.RecommendedConfigs {
+		if recommend.Name == name {
+			if w.Spec.RecommendedConfigs[i].Resources == nil {
+				w.Spec.RecommendedConfigs[i].Resources = &inferenceapi.ResourceRequirements{}
+			}
+			if w.Spec.RecommendedConfigs[i].Resources.Requests == nil {
+				w.Spec.RecommendedConfigs[i].Resources.Requests = corev1.ResourceList{}
+			}
+			w.Spec.RecommendedConfigs[i].Resources.Requests[corev1.ResourceName(r)] = resource.MustParse(v)
+			break
+		}
 	}
-	w.Spec.Resources.Requests[corev1.ResourceName(r)] = resource.MustParse(v)
 	return w
 }
 
-func (w *BackendRuntimeWrapper) Limit(r, v string) *BackendRuntimeWrapper {
-	if w.Spec.Resources.Limits == nil {
-		w.Spec.Resources.Limits = corev1.ResourceList{}
+func (w *BackendRuntimeWrapper) Limit(name, r, v string) *BackendRuntimeWrapper {
+	if w.Spec.RecommendedConfigs == nil {
+		w.Spec.RecommendedConfigs = []inferenceapi.RecommendedConfig{
+			{
+				Name: name,
+			},
+		}
+	}
+	for i, recommend := range w.Spec.RecommendedConfigs {
+		if recommend.Name == name {
+			if w.Spec.RecommendedConfigs[i].Resources.Limits == nil {
+				w.Spec.RecommendedConfigs[i].Resources.Limits = corev1.ResourceList{}
+			}
+			w.Spec.RecommendedConfigs[i].Resources.Limits[corev1.ResourceName(r)] = resource.MustParse(v)
+			break
+		}
+	}
+	return w
+}
+
+func (w *BackendRuntimeWrapper) SharedMemorySize(name, v string) *BackendRuntimeWrapper {
+	if w.Spec.RecommendedConfigs == nil {
+		w.Spec.RecommendedConfigs = []inferenceapi.RecommendedConfig{
+			{
+				Name: name,
+			},
+		}
+	}
+	for i, recommend := range w.Spec.RecommendedConfigs {
+		if recommend.Name == name {
+			value := resource.MustParse(v)
+			w.Spec.RecommendedConfigs[i].SharedMemorySize = &value
+		}
 	}
-	w.Spec.Resources.Limits[corev1.ResourceName(r)] = resource.MustParse(v)
 	return w
 }
 
diff --git a/test/util/wrapper/playground.go b/test/util/wrapper/playground.go
index 10502ab3..0b76cdd9 100644
--- a/test/util/wrapper/playground.go
+++ b/test/util/wrapper/playground.go
@@ -97,7 +97,7 @@ func (w *PlaygroundWrapper) BackendRuntime(name string) *PlaygroundWrapper {
 		w.Spec.BackendRuntimeConfig = &inferenceapi.BackendRuntimeConfig{}
 	}
 	backendName := inferenceapi.BackendName(name)
-	w.Spec.BackendRuntimeConfig.Name = &backendName
+	w.Spec.BackendRuntimeConfig.BackendName = &backendName
 	return w
 }
 
@@ -109,15 +109,11 @@ func (w *PlaygroundWrapper) BackendRuntimeVersion(version string) *PlaygroundWra
 	return w
 }
 
-func (w *PlaygroundWrapper) BackendRuntimeArgs(name string, args []string) *PlaygroundWrapper {
+func (w *PlaygroundWrapper) BackendRuntimeArgs(args []string) *PlaygroundWrapper {
 	if w.Spec.BackendRuntimeConfig == nil {
 		w = w.BackendRuntime("vllm")
 	}
-	if w.Spec.BackendRuntimeConfig.Args == nil {
-		w.Spec.BackendRuntimeConfig.Args = &inferenceapi.BackendRuntimeArg{}
-	}
-	w.Spec.BackendRuntimeConfig.Args.Name = &name
-	w.Spec.BackendRuntimeConfig.Args.Flags = args
+	w.Spec.BackendRuntimeConfig.Args = args
 	return w
 }
 
@@ -161,32 +157,22 @@ func (w *PlaygroundWrapper) BackendRuntimeLimit(r, v string) *PlaygroundWrapper
 }
 
 func (w *PlaygroundWrapper) ElasticConfig(minReplicas, maxReplicas int32) *PlaygroundWrapper {
-	w.Spec.ElasticConfig = &inferenceapi.ElasticConfig{
-		MaxReplicas: ptr.To[int32](maxReplicas),
-		MinReplicas: ptr.To[int32](minReplicas),
-	}
-	return w
-}
-
-func (w *PlaygroundWrapper) HPA(config *inferenceapi.HPATrigger) *PlaygroundWrapper {
 	if w.Spec.ElasticConfig == nil {
 		w.Spec.ElasticConfig = &inferenceapi.ElasticConfig{}
 	}
-	if w.Spec.ElasticConfig.ScaleTrigger == nil {
-		w.Spec.ElasticConfig.ScaleTrigger = &inferenceapi.ScaleTrigger{}
-	}
-	w.Spec.ElasticConfig.ScaleTrigger.HPA = config
+	w.Spec.ElasticConfig.MaxReplicas = ptr.To[int32](maxReplicas)
+	w.Spec.ElasticConfig.MinReplicas = ptr.To[int32](minReplicas)
 	return w
 }
 
-func (w *PlaygroundWrapper) ScaleTriggerRef(name string) *PlaygroundWrapper {
-	if w.Spec.ElasticConfig == nil {
-		w.Spec.ElasticConfig = &inferenceapi.ElasticConfig{}
+func (w *PlaygroundWrapper) HPA(config *inferenceapi.HPATrigger) *PlaygroundWrapper {
+	if w.Spec.BackendRuntimeConfig == nil {
+		w.Spec.BackendRuntimeConfig = &inferenceapi.BackendRuntimeConfig{}
 	}
-	if w.Spec.ElasticConfig.ScaleTriggerRef == nil {
-		w.Spec.ElasticConfig.ScaleTriggerRef = &inferenceapi.ScaleTriggerRef{}
+	if w.Spec.BackendRuntimeConfig.ScaleTrigger == nil {
+		w.Spec.BackendRuntimeConfig.ScaleTrigger = &inferenceapi.ScaleTrigger{}
 	}
-	w.Spec.ElasticConfig.ScaleTriggerRef.Name = name
+	w.Spec.BackendRuntimeConfig.ScaleTrigger.HPA = config
 	return w
 }
 

From 1b4885fb85fda08398f701c09ee093c73315a209 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Tue, 18 Feb 2025 11:51:40 +0800
Subject: [PATCH 2/3] change E2E timeout to 5minutes

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 test/util/consts.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/util/consts.go b/test/util/consts.go
index 4d89170c..969de5e2 100644
--- a/test/util/consts.go
+++ b/test/util/consts.go
@@ -20,6 +20,6 @@ import "time"
 const (
 	IntegrationTimeout = 10 * time.Second
 	Interval           = time.Millisecond * 250
-	E2ETimeout         = 3 * time.Minute
+	E2ETimeout         = 5 * time.Minute
 	E2EInterval        = 1 * time.Second
 )

From a719916319e180b4f254651cb1d5582402154e31 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Tue, 18 Feb 2025 14:30:24 +0800
Subject: [PATCH 3/3] Fix e2e test error

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 test/util/wrapper/backend.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/util/wrapper/backend.go b/test/util/wrapper/backend.go
index 7d4148a8..3f73ec58 100644
--- a/test/util/wrapper/backend.go
+++ b/test/util/wrapper/backend.go
@@ -62,7 +62,7 @@ func (w *BackendRuntimeWrapper) Command(commands []string) *BackendRuntimeWrappe
 	return w
 }
 
-func (w *BackendRuntimeWrapper) Arg(name string, flags []string) *BackendRuntimeWrapper {
+func (w *BackendRuntimeWrapper) Arg(name string, args []string) *BackendRuntimeWrapper {
 	if w.Spec.RecommendedConfigs == nil {
 		w.Spec.RecommendedConfigs = []inferenceapi.RecommendedConfig{
 			{
@@ -70,9 +70,9 @@ func (w *BackendRuntimeWrapper) Arg(name string, flags []string) *BackendRuntime
 			},
 		}
 	}
-	for _, recommend := range w.Spec.RecommendedConfigs {
+	for i, recommend := range w.Spec.RecommendedConfigs {
 		if recommend.Name == name {
-			recommend.Args = flags
+			w.Spec.RecommendedConfigs[i].Args = args
 			break
 		}
 	}