diff --git a/README.md b/README.md
index 46c40893..eb7e846e 100644
--- a/README.md
+++ b/README.md
@@ -67,9 +67,9 @@ spec:
     modelHub:
       modelID: facebook/opt-125m
   inferenceFlavors:
-  - name: t4 # GPU type
-    requests:
-      nvidia.com/gpu: 1
+    - name: t4 # GPU type
+      requests:
+        nvidia.com/gpu: 1
 ```
 
 #### Inference Playground
@@ -124,12 +124,11 @@ If you want to learn more about this project, please refer to [develop.md](./doc
 - CLI tool support
 - Model training, fine tuning in the long-term
 
-
 ## Community
 
 Join us for more discussions:
 
-* **Slack Channel**: [#llmaz](https://inftyai.slack.com/archives/C06D0BGEQ1G)
+- **Slack Channel**: [#llmaz](https://inftyai.slack.com/archives/C06D0BGEQ1G)
 
 ## Contributions
 
diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go
index 72dc3408..f766f7e6 100644
--- a/api/inference/v1alpha1/backendruntime_types.go
+++ b/api/inference/v1alpha1/backendruntime_types.go
@@ -63,6 +63,22 @@ type BackendRuntimeSpec struct {
 	// accelerators like GPU should not be defined here, but at the model flavors,
 	// or the values here will be overwritten.
 	Resources ResourceRequirements `json:"resources"`
+	// Periodic probe of backend liveness.
+	// Backend will be restarted if the probe fails.
+	// Cannot be updated.
+	// +optional
+	LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"`
+	// Periodic probe of backend readiness.
+	// Backend will be removed from service endpoints if the probe fails.
+	// +optional
+	ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"`
+	// StartupProbe indicates that the Backend has successfully initialized.
+	// If specified, no other probes are executed until this completes successfully.
+	// If this probe fails, the backend will be restarted, just as if the livenessProbe failed.
+	// This can be used to provide different probe parameters at the beginning of a backend's lifecycle,
+	// when it might take a long time to load data or warm a cache, than during steady-state operation.
+	// +optional
+	StartupProbe *corev1.Probe `json:"startupProbe,omitempty"`
 }
 
 // BackendRuntimeStatus defines the observed state of BackendRuntime
diff --git a/chart/templates/backends/llamacpp.yaml b/chart/templates/backends/llamacpp.yaml
index 634c5e8a..2eedfab6 100644
--- a/chart/templates/backends/llamacpp.yaml
+++ b/chart/templates/backends/llamacpp.yaml
@@ -23,6 +23,7 @@ spec:
         - "0.0.0.0"
         - --port
         - "8080"
+    # TODO: not supported yet, see https://github.com/InftyAI/llmaz/issues/240.
     - name: speculative-decoding
       flags:
         - -m
@@ -40,4 +41,24 @@ spec:
     limits:
       cpu: 2
       memory: 4Gi
+  startupProbe:
+    periodSeconds: 10
+    failureThreshold: 30
+    httpGet:
+      path: /health
+      port: 8080
+  livenessProbe:
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
 {{- end }}
diff --git a/chart/templates/backends/sglang.yaml b/chart/templates/backends/sglang.yaml
index f7cc1e8c..86a5b44d 100644
--- a/chart/templates/backends/sglang.yaml
+++ b/chart/templates/backends/sglang.yaml
@@ -34,4 +34,24 @@ spec:
     limits:
       cpu: 4
       memory: 8Gi
+  startupProbe:
+    periodSeconds: 10
+    failureThreshold: 30
+    httpGet:
+      path: /health
+      port: 8080
+  livenessProbe:
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+    httpGet:
+      path: /health_generate
+      port: 8080
 {{- end }}
diff --git a/chart/templates/backends/tgi.yaml b/chart/templates/backends/tgi.yaml
index d6a67420..5a2bd87c 100644
--- a/chart/templates/backends/tgi.yaml
+++ b/chart/templates/backends/tgi.yaml
@@ -26,4 +26,24 @@ spec:
     limits:
       cpu: 4
       memory: 8Gi
+  startupProbe:
+    periodSeconds: 10
+    failureThreshold: 30
+    httpGet:
+      path: /health
+      port: 8080
+  livenessProbe:
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
 {{- end }}
diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml
index 1a52874e..2a2888c0 100644
--- a/chart/templates/backends/vllm.yaml
+++ b/chart/templates/backends/vllm.yaml
@@ -107,4 +107,24 @@ spec:
     limits:
       cpu: 4
       memory: 8Gi
+  startupProbe:
+    periodSeconds: 10
+    failureThreshold: 30
+    httpGet:
+      path: /health
+      port: 8080
+  livenessProbe:
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
 {{- end }}
diff --git a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
index 21c8639f..81967e3c 100644
--- a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
+++ b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
@@ -194,6 +194,160 @@ spec:
                   Image represents the default image registry of the backendRuntime.
                   It will work together with version to make up a real image.
                 type: string
+              livenessProbe:
+                description: |-
+                  Periodic probe of backend liveness.
+                  Backend will be restarted if the probe fails.
+                  Cannot be updated.
+                properties:
+                  exec:
+                    description: Exec specifies the action to take.
+                    properties:
+                      command:
+                        description: |-
+                          Command is the command line to execute inside the container, the working directory for the
+                          command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
+                          not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
+                          a shell, you need to explicitly call out to that shell.
+                          Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
+                        items:
+                          type: string
+                        type: array
+                        x-kubernetes-list-type: atomic
+                    type: object
+                  failureThreshold:
+                    description: |-
+                      Minimum consecutive failures for the probe to be considered failed after having succeeded.
+                      Defaults to 3. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  grpc:
+                    description: GRPC specifies an action involving a GRPC port.
+                    properties:
+                      port:
+                        description: Port number of the gRPC service. Number must
+                          be in the range 1 to 65535.
+                        format: int32
+                        type: integer
+                      service:
+                        default: ""
+                        description: |-
+                          Service is the name of the service to place in the gRPC HealthCheckRequest
+                          (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
+
+                          If this is not specified, the default behavior is defined by gRPC.
+                        type: string
+                    required:
+                    - port
+                    type: object
+                  httpGet:
+                    description: HTTPGet specifies the http request to perform.
+                    properties:
+                      host:
+                        description: |-
+                          Host name to connect to, defaults to the pod IP. You probably want to set
+                          "Host" in httpHeaders instead.
+                        type: string
+                      httpHeaders:
+                        description: Custom headers to set in the request. HTTP allows
+                          repeated headers.
+                        items:
+                          description: HTTPHeader describes a custom header to be
+                            used in HTTP probes
+                          properties:
+                            name:
+                              description: |-
+                                The header field name.
+                                This will be canonicalized upon output, so case-variant names will be understood as the same header.
+                              type: string
+                            value:
+                              description: The header field value
+                              type: string
+                          required:
+                          - name
+                          - value
+                          type: object
+                        type: array
+                        x-kubernetes-list-type: atomic
+                      path:
+                        description: Path to access on the HTTP server.
+                        type: string
+                      port:
+                        anyOf:
+                        - type: integer
+                        - type: string
+                        description: |-
+                          Name or number of the port to access on the container.
+                          Number must be in the range 1 to 65535.
+                          Name must be an IANA_SVC_NAME.
+                        x-kubernetes-int-or-string: true
+                      scheme:
+                        description: |-
+                          Scheme to use for connecting to the host.
+                          Defaults to HTTP.
+                        type: string
+                    required:
+                    - port
+                    type: object
+                  initialDelaySeconds:
+                    description: |-
+                      Number of seconds after the container has started before liveness probes are initiated.
+                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+                    format: int32
+                    type: integer
+                  periodSeconds:
+                    description: |-
+                      How often (in seconds) to perform the probe.
+                      Default to 10 seconds. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  successThreshold:
+                    description: |-
+                      Minimum consecutive successes for the probe to be considered successful after having failed.
+                      Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  tcpSocket:
+                    description: TCPSocket specifies an action involving a TCP port.
+                    properties:
+                      host:
+                        description: 'Optional: Host name to connect to, defaults
+                          to the pod IP.'
+                        type: string
+                      port:
+                        anyOf:
+                        - type: integer
+                        - type: string
+                        description: |-
+                          Number or name of the port to access on the container.
+                          Number must be in the range 1 to 65535.
+                          Name must be an IANA_SVC_NAME.
+                        x-kubernetes-int-or-string: true
+                    required:
+                    - port
+                    type: object
+                  terminationGracePeriodSeconds:
+                    description: |-
+                      Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
+                      The grace period is the duration in seconds after the processes running in the pod are sent
+                      a termination signal and the time when the processes are forcibly halted with a kill signal.
+                      Set this value longer than the expected cleanup time for your process.
+                      If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
+                      value overrides the value provided by the pod spec.
+                      Value must be non-negative integer. The value zero indicates stop immediately via
+                      the kill signal (no opportunity to shut down).
+                      This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
+                      Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
+                    format: int64
+                    type: integer
+                  timeoutSeconds:
+                    description: |-
+                      Number of seconds after which the probe times out.
+                      Defaults to 1 second. Minimum value is 1.
+                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+                    format: int32
+                    type: integer
+                type: object
               multiHostCommands:
                 description: |-
                   MultiHostCommands represents leader and worker commands for nodes with
@@ -208,6 +362,159 @@ spec:
                       type: string
                     type: array
                 type: object
+              readinessProbe:
+                description: |-
+                  Periodic probe of backend readiness.
+                  Backend will be removed from service endpoints if the probe fails.
+                properties:
+                  exec:
+                    description: Exec specifies the action to take.
+                    properties:
+                      command:
+                        description: |-
+                          Command is the command line to execute inside the container, the working directory for the
+                          command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
+                          not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
+                          a shell, you need to explicitly call out to that shell.
+                          Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
+                        items:
+                          type: string
+                        type: array
+                        x-kubernetes-list-type: atomic
+                    type: object
+                  failureThreshold:
+                    description: |-
+                      Minimum consecutive failures for the probe to be considered failed after having succeeded.
+                      Defaults to 3. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  grpc:
+                    description: GRPC specifies an action involving a GRPC port.
+                    properties:
+                      port:
+                        description: Port number of the gRPC service. Number must
+                          be in the range 1 to 65535.
+                        format: int32
+                        type: integer
+                      service:
+                        default: ""
+                        description: |-
+                          Service is the name of the service to place in the gRPC HealthCheckRequest
+                          (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
+
+                          If this is not specified, the default behavior is defined by gRPC.
+                        type: string
+                    required:
+                    - port
+                    type: object
+                  httpGet:
+                    description: HTTPGet specifies the http request to perform.
+                    properties:
+                      host:
+                        description: |-
+                          Host name to connect to, defaults to the pod IP. You probably want to set
+                          "Host" in httpHeaders instead.
+                        type: string
+                      httpHeaders:
+                        description: Custom headers to set in the request. HTTP allows
+                          repeated headers.
+                        items:
+                          description: HTTPHeader describes a custom header to be
+                            used in HTTP probes
+                          properties:
+                            name:
+                              description: |-
+                                The header field name.
+                                This will be canonicalized upon output, so case-variant names will be understood as the same header.
+                              type: string
+                            value:
+                              description: The header field value
+                              type: string
+                          required:
+                          - name
+                          - value
+                          type: object
+                        type: array
+                        x-kubernetes-list-type: atomic
+                      path:
+                        description: Path to access on the HTTP server.
+                        type: string
+                      port:
+                        anyOf:
+                        - type: integer
+                        - type: string
+                        description: |-
+                          Name or number of the port to access on the container.
+                          Number must be in the range 1 to 65535.
+                          Name must be an IANA_SVC_NAME.
+                        x-kubernetes-int-or-string: true
+                      scheme:
+                        description: |-
+                          Scheme to use for connecting to the host.
+                          Defaults to HTTP.
+                        type: string
+                    required:
+                    - port
+                    type: object
+                  initialDelaySeconds:
+                    description: |-
+                      Number of seconds after the container has started before liveness probes are initiated.
+                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+                    format: int32
+                    type: integer
+                  periodSeconds:
+                    description: |-
+                      How often (in seconds) to perform the probe.
+                      Default to 10 seconds. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  successThreshold:
+                    description: |-
+                      Minimum consecutive successes for the probe to be considered successful after having failed.
+                      Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  tcpSocket:
+                    description: TCPSocket specifies an action involving a TCP port.
+                    properties:
+                      host:
+                        description: 'Optional: Host name to connect to, defaults
+                          to the pod IP.'
+                        type: string
+                      port:
+                        anyOf:
+                        - type: integer
+                        - type: string
+                        description: |-
+                          Number or name of the port to access on the container.
+                          Number must be in the range 1 to 65535.
+                          Name must be an IANA_SVC_NAME.
+                        x-kubernetes-int-or-string: true
+                    required:
+                    - port
+                    type: object
+                  terminationGracePeriodSeconds:
+                    description: |-
+                      Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
+                      The grace period is the duration in seconds after the processes running in the pod are sent
+                      a termination signal and the time when the processes are forcibly halted with a kill signal.
+                      Set this value longer than the expected cleanup time for your process.
+                      If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
+                      value overrides the value provided by the pod spec.
+                      Value must be non-negative integer. The value zero indicates stop immediately via
+                      the kill signal (no opportunity to shut down).
+                      This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
+                      Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
+                    format: int64
+                    type: integer
+                  timeoutSeconds:
+                    description: |-
+                      Number of seconds after which the probe times out.
+                      Defaults to 1 second. Minimum value is 1.
+                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+                    format: int32
+                    type: integer
+                type: object
               resources:
                 description: |-
                   Resources represents the resource requirements for backendRuntime, like cpu/mem,
@@ -239,6 +546,162 @@ spec:
                       More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
                     type: object
                 type: object
+              startupProbe:
+                description: |-
+                  StartupProbe indicates that the Backend has successfully initialized.
+                  If specified, no other probes are executed until this completes successfully.
+                  If this probe fails, the backend will be restarted, just as if the livenessProbe failed.
+                  This can be used to provide different probe parameters at the beginning of a backend's lifecycle,
+                  when it might take a long time to load data or warm a cache, than during steady-state operation.
+                properties:
+                  exec:
+                    description: Exec specifies the action to take.
+                    properties:
+                      command:
+                        description: |-
+                          Command is the command line to execute inside the container, the working directory for the
+                          command  is root ('/') in the container's filesystem. The command is simply exec'd, it is
+                          not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
+                          a shell, you need to explicitly call out to that shell.
+                          Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
+                        items:
+                          type: string
+                        type: array
+                        x-kubernetes-list-type: atomic
+                    type: object
+                  failureThreshold:
+                    description: |-
+                      Minimum consecutive failures for the probe to be considered failed after having succeeded.
+                      Defaults to 3. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  grpc:
+                    description: GRPC specifies an action involving a GRPC port.
+                    properties:
+                      port:
+                        description: Port number of the gRPC service. Number must
+                          be in the range 1 to 65535.
+                        format: int32
+                        type: integer
+                      service:
+                        default: ""
+                        description: |-
+                          Service is the name of the service to place in the gRPC HealthCheckRequest
+                          (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md).
+
+                          If this is not specified, the default behavior is defined by gRPC.
+                        type: string
+                    required:
+                    - port
+                    type: object
+                  httpGet:
+                    description: HTTPGet specifies the http request to perform.
+                    properties:
+                      host:
+                        description: |-
+                          Host name to connect to, defaults to the pod IP. You probably want to set
+                          "Host" in httpHeaders instead.
+                        type: string
+                      httpHeaders:
+                        description: Custom headers to set in the request. HTTP allows
+                          repeated headers.
+                        items:
+                          description: HTTPHeader describes a custom header to be
+                            used in HTTP probes
+                          properties:
+                            name:
+                              description: |-
+                                The header field name.
+                                This will be canonicalized upon output, so case-variant names will be understood as the same header.
+                              type: string
+                            value:
+                              description: The header field value
+                              type: string
+                          required:
+                          - name
+                          - value
+                          type: object
+                        type: array
+                        x-kubernetes-list-type: atomic
+                      path:
+                        description: Path to access on the HTTP server.
+                        type: string
+                      port:
+                        anyOf:
+                        - type: integer
+                        - type: string
+                        description: |-
+                          Name or number of the port to access on the container.
+                          Number must be in the range 1 to 65535.
+                          Name must be an IANA_SVC_NAME.
+                        x-kubernetes-int-or-string: true
+                      scheme:
+                        description: |-
+                          Scheme to use for connecting to the host.
+                          Defaults to HTTP.
+                        type: string
+                    required:
+                    - port
+                    type: object
+                  initialDelaySeconds:
+                    description: |-
+                      Number of seconds after the container has started before liveness probes are initiated.
+                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+                    format: int32
+                    type: integer
+                  periodSeconds:
+                    description: |-
+                      How often (in seconds) to perform the probe.
+                      Default to 10 seconds. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  successThreshold:
+                    description: |-
+                      Minimum consecutive successes for the probe to be considered successful after having failed.
+                      Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+                    format: int32
+                    type: integer
+                  tcpSocket:
+                    description: TCPSocket specifies an action involving a TCP port.
+                    properties:
+                      host:
+                        description: 'Optional: Host name to connect to, defaults
+                          to the pod IP.'
+                        type: string
+                      port:
+                        anyOf:
+                        - type: integer
+                        - type: string
+                        description: |-
+                          Number or name of the port to access on the container.
+                          Number must be in the range 1 to 65535.
+                          Name must be an IANA_SVC_NAME.
+                        x-kubernetes-int-or-string: true
+                    required:
+                    - port
+                    type: object
+                  terminationGracePeriodSeconds:
+                    description: |-
+                      Optional duration in seconds the pod needs to terminate gracefully upon probe failure.
+                      The grace period is the duration in seconds after the processes running in the pod are sent
+                      a termination signal and the time when the processes are forcibly halted with a kill signal.
+                      Set this value longer than the expected cleanup time for your process.
+                      If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this
+                      value overrides the value provided by the pod spec.
+                      Value must be non-negative integer. The value zero indicates stop immediately via
+                      the kill signal (no opportunity to shut down).
+                      This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate.
+                      Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.
+                    format: int64
+                    type: integer
+                  timeoutSeconds:
+                    description: |-
+                      Number of seconds after which the probe times out.
+                      Defaults to 1 second. Minimum value is 1.
+                      More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+                    format: int32
+                    type: integer
+                type: object
               version:
                 description: |-
                   Version represents the default version of the backendRuntime.
diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml
index 045cb509..aab3a173 100644
--- a/config/manager/kustomization.yaml
+++ b/config/manager/kustomization.yaml
@@ -4,5 +4,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 images:
 - name: controller
-  newName: inftyai/llmaz
-  newTag: v0.0.9
+  newName: inftyai/test
+  newTag: llmaz-011701
diff --git a/docs/examples/multi-nodes/model.yaml b/docs/examples/multi-nodes/model.yaml
index 11bd2a30..513939fb 100644
--- a/docs/examples/multi-nodes/model.yaml
+++ b/docs/examples/multi-nodes/model.yaml
@@ -7,7 +7,7 @@ spec:
   source:
     modelHub:
       # TODO:
-      modelID: Qwen/Qwen2-0.5B
+      modelID: meta-llama/Llama-3.1-405B
   inferenceFlavors:
     - name: a100-80gb
       requests:
diff --git a/pkg/controller/inference/playground_controller.go b/pkg/controller/inference/playground_controller.go
index ea9f3780..c6174747 100644
--- a/pkg/controller/inference/playground_controller.go
+++ b/pkg/controller/inference/playground_controller.go
@@ -313,6 +313,17 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
 		args = nil
 	}
 
+	var livenessProbe, readinessProbe, startupProbe *corev1.Probe
+	if backendRuntime.Spec.StartupProbe != nil {
+		startupProbe = backendRuntime.Spec.StartupProbe
+	}
+	if backendRuntime.Spec.LivenessProbe != nil {
+		livenessProbe = backendRuntime.Spec.LivenessProbe
+	}
+	if backendRuntime.Spec.ReadinessProbe != nil {
+		readinessProbe = backendRuntime.Spec.ReadinessProbe
+	}
+
 	template := corev1.PodTemplateSpec{
 		Spec: corev1.PodSpec{
 			// TODO: should we support image pull secret here?
@@ -332,6 +343,9 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
 							ContainerPort: modelSource.DEFAULT_BACKEND_PORT,
 						},
 					},
+					StartupProbe:   startupProbe,
+					LivenessProbe:  livenessProbe,
+					ReadinessProbe: readinessProbe,
 				},
 			},
 		},
diff --git a/test/config/backends/llamacpp.yaml b/test/config/backends/llamacpp.yaml
index da57e3d3..df1fe360 100644
--- a/test/config/backends/llamacpp.yaml
+++ b/test/config/backends/llamacpp.yaml
@@ -20,6 +20,7 @@ spec:
         - "0.0.0.0"
         - --port
         - "8080"
+    # TODO: not supported yet, see https://github.com/InftyAI/llmaz/issues/240.
     - name: speculative-decoding
       flags:
         - -m
@@ -30,6 +31,10 @@ spec:
         - "0.0.0.0"
         - --port
         - "8080"
+        - --draft-max
+        - "16"
+        - --draft-min
+        - "5"
   resources:
     requests:
       cpu: 2
@@ -37,3 +42,23 @@ spec:
     limits:
       cpu: 2
       memory: 4Gi
+  startupProbe:
+    periodSeconds: 10
+    failureThreshold: 30
+    httpGet:
+      path: /health
+      port: 8080
+  livenessProbe:
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
diff --git a/test/config/backends/ollama.yaml b/test/config/backends/ollama.yaml
index 23ed462e..d5e347b0 100644
--- a/test/config/backends/ollama.yaml
+++ b/test/config/backends/ollama.yaml
@@ -16,14 +16,14 @@ spec:
     - name: default
       flags:
         - "ollama serve &
-           while true; do output=$(ollama list 2>&1);
-           if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;
-           ollama run {{`{{ .ModelName }}`}};
-           while true;do sleep 60;done"
+          while true; do output=$(ollama list 2>&1);
+          if ! echo $output | grep -q 'could not connect to ollama app' && echo $output | grep -q 'NAME';then echo 'ollama is running';break; else echo 'Waiting for the ollama to be running...';sleep 1;fi;done;
+          ollama run {{`{{ .ModelName }}`}};
+          while true;do sleep 60;done"
   resources:
     requests:
       cpu: 2
       memory: 4Gi
     limits:
       cpu: 2
-      memory: 4Gi
\ No newline at end of file
+      memory: 4Gi
diff --git a/test/config/backends/sglang.yaml b/test/config/backends/sglang.yaml
index 8d5b4eaf..3eb4fab7 100644
--- a/test/config/backends/sglang.yaml
+++ b/test/config/backends/sglang.yaml
@@ -31,3 +31,23 @@ spec:
     limits:
       cpu: 4
       memory: 8Gi
+  startupProbe:
+    periodSeconds: 10
+    failureThreshold: 30
+    httpGet:
+      path: /health
+      port: 8080
+  livenessProbe:
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+    httpGet:
+      path: /health_generate
+      port: 8080
diff --git a/test/config/backends/tgi.yaml b/test/config/backends/tgi.yaml
index 13eeed7a..75235192 100644
--- a/test/config/backends/tgi.yaml
+++ b/test/config/backends/tgi.yaml
@@ -25,3 +25,23 @@ spec:
     limits:
       cpu: 4
       memory: 8Gi
+  startupProbe:
+    periodSeconds: 10
+    failureThreshold: 30
+    httpGet:
+      path: /health
+      port: 8080
+  livenessProbe:
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
diff --git a/test/config/backends/vllm.yaml b/test/config/backends/vllm.yaml
index 384d9a97..7ecbd873 100644
--- a/test/config/backends/vllm.yaml
+++ b/test/config/backends/vllm.yaml
@@ -106,3 +106,23 @@ spec:
     limits:
       cpu: 4
       memory: 8Gi
+  startupProbe:
+    periodSeconds: 10
+    failureThreshold: 30
+    httpGet:
+      path: /health
+      port: 8080
+  livenessProbe:
+    initialDelaySeconds: 15
+    periodSeconds: 10
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 3
+    httpGet:
+      path: /health
+      port: 8080
diff --git a/test/util/validation/validate_playground.go b/test/util/validation/validate_playground.go
index 10181b4d..0ef25a89 100644
--- a/test/util/validation/validate_playground.go
+++ b/test/util/validation/validate_playground.go
@@ -112,6 +112,8 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
 
 		// compare the same part of leader and worker template, image, version, env, resources.
 		if playground.Spec.BackendRuntimeConfig != nil {
+
+			// compare image & version
 			if playground.Spec.BackendRuntimeConfig.Version != nil {
 				if parser.Image(*playground.Spec.BackendRuntimeConfig.Version) != service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image {
 					return fmt.Errorf("expected container image %s, got %s", parser.Image(*playground.Spec.BackendRuntimeConfig.Version), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].Image)
@@ -187,6 +189,41 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
 					}
 				}
 			}
+
+			// compare probes
+			if backendRuntime.Spec.StartupProbe != nil {
+				if multiHost {
+					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].StartupProbe, *backendRuntime.Spec.StartupProbe); diff != "" {
+						return fmt.Errorf("unexpected startupProbe")
+					}
+				} else {
+					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].StartupProbe, *backendRuntime.Spec.StartupProbe); diff != "" {
+						return fmt.Errorf("unexpected startupProbe")
+					}
+				}
+			}
+			if backendRuntime.Spec.LivenessProbe != nil {
+				if multiHost {
+					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].LivenessProbe, *backendRuntime.Spec.LivenessProbe); diff != "" {
+						return fmt.Errorf("unexpected livenessProbe")
+					}
+				} else {
+					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].LivenessProbe, *backendRuntime.Spec.LivenessProbe); diff != "" {
+						return fmt.Errorf("unexpected livenessProbe")
+					}
+				}
+			}
+			if backendRuntime.Spec.ReadinessProbe != nil {
+				if multiHost {
+					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Containers[0].ReadinessProbe, *backendRuntime.Spec.ReadinessProbe); diff != "" {
+						return fmt.Errorf("unexpected readinessProbe")
+					}
+				} else {
+					if diff := cmp.Diff(*service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Containers[0].ReadinessProbe, *backendRuntime.Spec.ReadinessProbe); diff != "" {
+						return fmt.Errorf("unexpected readinessProbe")
+					}
+				}
+			}
 		}
 
 		// compare the different parts.
diff --git a/test/util/wrapper/backend.go b/test/util/wrapper/backend.go
index 4deb1b52..c82a7887 100644
--- a/test/util/wrapper/backend.go
+++ b/test/util/wrapper/backend.go
@@ -17,7 +17,7 @@ limitations under the License.
 package wrapper
 
 import (
-	v1 "k8s.io/api/core/v1"
+	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
@@ -72,16 +72,29 @@ func (w *BackendRuntimeWrapper) Arg(name string, flags []string) *BackendRuntime
 
 func (w *BackendRuntimeWrapper) Request(r, v string) *BackendRuntimeWrapper {
 	if w.Spec.Resources.Requests == nil {
-		w.Spec.Resources.Requests = v1.ResourceList{}
+		w.Spec.Resources.Requests = corev1.ResourceList{}
 	}
-	w.Spec.Resources.Requests[v1.ResourceName(r)] = resource.MustParse(v)
+	w.Spec.Resources.Requests[corev1.ResourceName(r)] = resource.MustParse(v)
 	return w
 }
 
 func (w *BackendRuntimeWrapper) Limit(r, v string) *BackendRuntimeWrapper {
 	if w.Spec.Resources.Limits == nil {
-		w.Spec.Resources.Limits = v1.ResourceList{}
+		w.Spec.Resources.Limits = corev1.ResourceList{}
+	}
+	w.Spec.Resources.Limits[corev1.ResourceName(r)] = resource.MustParse(v)
+	return w
+}
+
+func (w *BackendRuntimeWrapper) Probe(name string, probe *corev1.Probe) *BackendRuntimeWrapper {
+	if name == "liveness" {
+		w.Spec.LivenessProbe = probe
+	}
+	if name == "readiness" {
+		w.Spec.ReadinessProbe = probe
+	}
+	if name == "startup" {
+		w.Spec.LivenessProbe = probe
 	}
-	w.Spec.Resources.Limits[v1.ResourceName(r)] = resource.MustParse(v)
 	return w
 }