Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions api/inference/v1alpha1/backendruntime_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ type RecommendedConfig struct {

// BackendRuntimeSpec defines the desired state of BackendRuntime
type BackendRuntimeSpec struct {
// Commands represents the default commands for the backendRuntime.
// Command represents the default command for the backendRuntime.
// +optional
Commands []string `json:"commands,omitempty"`
Command []string `json:"command,omitempty"`
// Image represents the default image registry of the backendRuntime.
// It will work together with version to make up a real image.
Image string `json:"image"`
Expand Down
4 changes: 2 additions & 2 deletions api/inference/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

224 changes: 222 additions & 2 deletions chart/crds/backendruntime-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ spec:
spec:
description: BackendRuntimeSpec defines the desired state of BackendRuntime
properties:
commands:
description: Commands represents the default commands for the backendRuntime.
command:
description: Command represents the default command for the backendRuntime.
items:
type: string
type: array
Expand Down Expand Up @@ -168,6 +168,226 @@ spec:
Image represents the default image registry of the backendRuntime.
It will work together with version to make up a real image.
type: string
lifecycle:
description: Lifecycle represents hooks executed during the lifecycle
of the container.
properties:
postStart:
description: |-
PostStart is called immediately after a container is created. If the handler fails,
the container is terminated and restarted according to its restart policy.
Other management of the container blocks until the hook completes.
More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks
properties:
exec:
description: Exec specifies a command to execute in the container.
properties:
command:
description: |-
Command is the command line to execute inside the container, the working directory for the
command is root ('/') in the container's filesystem. The command is simply exec'd, it is
not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
a shell, you need to explicitly call out to that shell.
Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
httpGet:
description: HTTPGet specifies an HTTP GET request to perform.
properties:
host:
description: |-
Host name to connect to, defaults to the pod IP. You probably want to set
"Host" in httpHeaders instead.
type: string
httpHeaders:
description: Custom headers to set in the request. HTTP
allows repeated headers.
items:
description: HTTPHeader describes a custom header to
be used in HTTP probes
properties:
name:
description: |-
The header field name.
This will be canonicalized upon output, so case-variant names will be understood as the same header.
type: string
value:
description: The header field value
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
description: Path to access on the HTTP server.
type: string
port:
anyOf:
- type: integer
- type: string
description: |-
Name or number of the port to access on the container.
Number must be in the range 1 to 65535.
Name must be an IANA_SVC_NAME.
x-kubernetes-int-or-string: true
scheme:
description: |-
Scheme to use for connecting to the host.
Defaults to HTTP.
type: string
required:
- port
type: object
sleep:
description: Sleep represents a duration that the container
should sleep.
properties:
seconds:
description: Seconds is the number of seconds to sleep.
format: int64
type: integer
required:
- seconds
type: object
tcpSocket:
description: |-
Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept
for backward compatibility. There is no validation of this field and
lifecycle hooks will fail at runtime when it is specified.
properties:
host:
description: 'Optional: Host name to connect to, defaults
to the pod IP.'
type: string
port:
anyOf:
- type: integer
- type: string
description: |-
Number or name of the port to access on the container.
Number must be in the range 1 to 65535.
Name must be an IANA_SVC_NAME.
x-kubernetes-int-or-string: true
required:
- port
type: object
type: object
preStop:
description: |-
PreStop is called immediately before a container is terminated due to an
API request or management event such as liveness/startup probe failure,
preemption, resource contention, etc. The handler is not called if the
container crashes or exits. The Pod's termination grace period countdown begins before the
PreStop hook is executed. Regardless of the outcome of the handler, the
container will eventually terminate within the Pod's termination grace
period (unless delayed by finalizers). Other management of the container blocks until the hook completes
or until the termination grace period is reached.
More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks
properties:
exec:
description: Exec specifies a command to execute in the container.
properties:
command:
description: |-
Command is the command line to execute inside the container, the working directory for the
command is root ('/') in the container's filesystem. The command is simply exec'd, it is
not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use
a shell, you need to explicitly call out to that shell.
Exit status of 0 is treated as live/healthy and non-zero is unhealthy.
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
httpGet:
description: HTTPGet specifies an HTTP GET request to perform.
properties:
host:
description: |-
Host name to connect to, defaults to the pod IP. You probably want to set
"Host" in httpHeaders instead.
type: string
httpHeaders:
description: Custom headers to set in the request. HTTP
allows repeated headers.
items:
description: HTTPHeader describes a custom header to
be used in HTTP probes
properties:
name:
description: |-
The header field name.
This will be canonicalized upon output, so case-variant names will be understood as the same header.
type: string
value:
description: The header field value
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
description: Path to access on the HTTP server.
type: string
port:
anyOf:
- type: integer
- type: string
description: |-
Name or number of the port to access on the container.
Number must be in the range 1 to 65535.
Name must be an IANA_SVC_NAME.
x-kubernetes-int-or-string: true
scheme:
description: |-
Scheme to use for connecting to the host.
Defaults to HTTP.
type: string
required:
- port
type: object
sleep:
description: Sleep represents a duration that the container
should sleep.
properties:
seconds:
description: Seconds is the number of seconds to sleep.
format: int64
type: integer
required:
- seconds
type: object
tcpSocket:
description: |-
Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept
for backward compatibility. There is no validation of this field and
lifecycle hooks will fail at runtime when it is specified.
properties:
host:
description: 'Optional: Host name to connect to, defaults
to the pod IP.'
type: string
port:
anyOf:
- type: integer
- type: string
description: |-
Number or name of the port to access on the container.
Number must be in the range 1 to 65535.
Name must be an IANA_SVC_NAME.
x-kubernetes-int-or-string: true
required:
- port
type: object
type: object
type: object
livenessProbe:
description: |-
Periodic probe of backend liveness.
Expand Down
5 changes: 5 additions & 0 deletions chart/crds/service-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ spec:
format: int32
type: integer
rolloutStrategy:
default:
rollingUpdateConfiguration:
maxSurge: 0
maxUnavailable: 1
type: RollingUpdate
description: |-
RolloutStrategy defines the strategy that will be applied to update replicas
when a revision is made to the leaderWorkerTemplate.
Expand Down
2 changes: 1 addition & 1 deletion chart/templates/backends/llamacpp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
app.kubernetes.io/created-by: llmaz
name: llamacpp
spec:
commands:
command:
- ./llama-server
image: ghcr.io/ggerganov/llama.cpp
version: server
Expand Down
2 changes: 1 addition & 1 deletion chart/templates/backends/ollama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
app.kubernetes.io/created-by: llmaz
name: ollama
spec:
commands:
command:
- sh
- -c
image: ollama/ollama
Expand Down
2 changes: 1 addition & 1 deletion chart/templates/backends/sglang.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
app.kubernetes.io/created-by: llmaz
name: sglang
spec:
commands:
command:
- python3
- -m
- sglang.launch_server
Expand Down
2 changes: 1 addition & 1 deletion chart/templates/backends/vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
app.kubernetes.io/created-by: llmaz
name: vllm
spec:
commands:
command:
- python3
- -m
- vllm.entrypoints.openai.api_server
Expand Down
4 changes: 2 additions & 2 deletions config/crd/bases/inference.llmaz.io_backendruntimes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ spec:
spec:
description: BackendRuntimeSpec defines the desired state of BackendRuntime
properties:
commands:
description: Commands represents the default commands for the backendRuntime.
command:
description: Command represents the default command for the backendRuntime.
items:
type: string
type: array
Expand Down
4 changes: 2 additions & 2 deletions docs/reference/inference.v1alpha1.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,11 @@ SharedMemorySize defined here will "overwrite" the sharedMemorySize in
<tbody>


<tr><td><code>commands</code><br/>
<tr><td><code>command</code><br/>
<code>[]string</code>
</td>
<td>
<p>Commands represents the default commands for the backendRuntime.</p>
<p>Command represents the default command for the backendRuntime.</p>
</td>
</tr>
<tr><td><code>image</code> <B>[Required]</B><br/>
Expand Down
6 changes: 3 additions & 3 deletions pkg/controller/inference/playground_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,8 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
version = *playground.Spec.BackendRuntimeConfig.Version
}

// commands
commands := parser.Commands()
// command
command := parser.Command()

// lifecycle
lifecycle := parser.Lifecycle()
Expand All @@ -337,7 +337,7 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
Name: modelSource.MODEL_RUNNER_CONTAINER_NAME,
Image: parser.Image(version),
Resources: resources,
Command: commands,
Command: command,
Args: args,
Env: envs,
Lifecycle: lifecycle,
Expand Down
4 changes: 2 additions & 2 deletions pkg/controller_helper/backendruntime/backendruntime.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ func NewBackendRuntimeParser(backendRuntime *inferenceapi.BackendRuntime, models
}
}

func (p *BackendRuntimeParser) Commands() []string {
return p.backendRuntime.Spec.Commands
func (p *BackendRuntimeParser) Command() []string {
return p.backendRuntime.Spec.Command
}

func (p *BackendRuntimeParser) Envs() []corev1.EnvVar {
Expand Down
8 changes: 4 additions & 4 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ func In(strings []string, s string) bool {
return false
}

// MergeArgsWithCommands will merge the arguments with the commands, the arguments will be append
// MergeArgsWithCommand will merge the arguments with the command, the arguments will be append
// sequentially and separated with spaces. If the last command has "/n", it will be trimmed.
// has "/n", it will be trimmed.
func MergeArgsWithCommands(commands []string, args []string) []string {
func MergeArgsWithCommand(command []string, args []string) []string {
fullArgs := strings.Join(args, " ")
commands[len(commands)-1] = fmt.Sprintf("%s %s", strings.TrimSuffix(commands[len(commands)-1], "\n"), fullArgs)
return commands
command[len(command)-1] = fmt.Sprintf("%s %s", strings.TrimSuffix(command[len(command)-1], "\n"), fullArgs)
return command
}
Loading