From a750b49d2c11b0b2eb233cac4dd228e53803955f Mon Sep 17 00:00:00 2001 From: googs1025 Date: Thu, 13 Mar 2025 19:43:01 +0800 Subject: [PATCH 1/3] chore: fix BackendRuntime crds field Commands -> Command --- .../v1alpha1/backendruntime_types.go | 4 +-- .../v1alpha1/zz_generated.deepcopy.go | 4 +-- .../inference.llmaz.io_backendruntimes.yaml | 4 +-- docs/reference/inference.v1alpha1.md | 4 +-- .../inference/playground_controller.go | 6 ++-- .../backendruntime/backendruntime.go | 4 +-- pkg/util/util.go | 8 ++--- pkg/util/util_test.go | 32 +++++++++---------- test/config/backends/fake_backend.yaml | 2 +- test/config/backends/llamacpp.yaml | 2 +- test/config/backends/ollama.yaml | 2 +- test/config/backends/sglang.yaml | 2 +- test/config/backends/vllm.yaml | 2 +- test/util/validation/validate_playground.go | 4 +-- test/util/wrapper/backend.go | 4 +-- 15 files changed, 42 insertions(+), 42 deletions(-) diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go index dc313d8c..c790402c 100644 --- a/api/inference/v1alpha1/backendruntime_types.go +++ b/api/inference/v1alpha1/backendruntime_types.go @@ -76,9 +76,9 @@ type RecommendedConfig struct { // BackendRuntimeSpec defines the desired state of BackendRuntime type BackendRuntimeSpec struct { - // Commands represents the default commands for the backendRuntime. + // Command represents the default command for the backendRuntime. // +optional - Commands []string `json:"commands,omitempty"` + Command []string `json:"command,omitempty"` // Image represents the default image registry of the backendRuntime. // It will work together with version to make up a real image. Image string `json:"image"` diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go index 6735dba5..b5bea51c 100644 --- a/api/inference/v1alpha1/zz_generated.deepcopy.go +++ b/api/inference/v1alpha1/zz_generated.deepcopy.go @@ -143,8 +143,8 @@ func (in *BackendRuntimeList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BackendRuntimeSpec) DeepCopyInto(out *BackendRuntimeSpec) { *out = *in - if in.Commands != nil { - in, out := &in.Commands, &out.Commands + if in.Command != nil { + in, out := &in.Command, &out.Command *out = make([]string, len(*in)) copy(*out, *in) } diff --git a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml index d6458991..a626c0af 100644 --- a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml +++ b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml @@ -41,8 +41,8 @@ spec: spec: description: BackendRuntimeSpec defines the desired state of BackendRuntime properties: - commands: - description: Commands represents the default commands for the backendRuntime. + command: + description: Command represents the default command for the backendRuntime. items: type: string type: array diff --git a/docs/reference/inference.v1alpha1.md b/docs/reference/inference.v1alpha1.md index 9a25b9ea..e9c775ac 100644 --- a/docs/reference/inference.v1alpha1.md +++ b/docs/reference/inference.v1alpha1.md @@ -213,11 +213,11 @@ SharedMemorySize defined here will "overwrite" the sharedMemorySize in -commands
+command
[]string -

Commands represents the default commands for the backendRuntime.

+

Command represents the default command for the backendRuntime.

image [Required]
diff --git a/pkg/controller/inference/playground_controller.go b/pkg/controller/inference/playground_controller.go index 19649eb5..fe45f7b8 100644 --- a/pkg/controller/inference/playground_controller.go +++ b/pkg/controller/inference/playground_controller.go @@ -310,8 +310,8 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro version = *playground.Spec.BackendRuntimeConfig.Version } - // commands - commands := parser.Commands() + // command + command := parser.Command() // lifecycle lifecycle := parser.Lifecycle() @@ -337,7 +337,7 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro Name: modelSource.MODEL_RUNNER_CONTAINER_NAME, Image: parser.Image(version), Resources: resources, - Command: commands, + Command: command, Args: args, Env: envs, Lifecycle: lifecycle, diff --git a/pkg/controller_helper/backendruntime/backendruntime.go b/pkg/controller_helper/backendruntime/backendruntime.go index 9a36f6ad..8358d3f6 100644 --- a/pkg/controller_helper/backendruntime/backendruntime.go +++ b/pkg/controller_helper/backendruntime/backendruntime.go @@ -48,8 +48,8 @@ func NewBackendRuntimeParser(backendRuntime *inferenceapi.BackendRuntime, models } } -func (p *BackendRuntimeParser) Commands() []string { - return p.backendRuntime.Spec.Commands +func (p *BackendRuntimeParser) Command() []string { + return p.backendRuntime.Spec.Command } func (p *BackendRuntimeParser) Envs() []corev1.EnvVar { diff --git a/pkg/util/util.go b/pkg/util/util.go index 23fe9b74..bf651a59 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -60,11 +60,11 @@ func In(strings []string, s string) bool { return false } -// MergeArgsWithCommands will merge the arguments with the commands, the arguments will be append +// MergeArgsWithCommand will merge the arguments with the command, the arguments will be append // sequentially and separated with spaces. If the last command has "/n", it will be trimmed. // has "/n", it will be trimmed. -func MergeArgsWithCommands(commands []string, args []string) []string { +func MergeArgsWithCommand(command []string, args []string) []string { fullArgs := strings.Join(args, " ") - commands[len(commands)-1] = fmt.Sprintf("%s %s", strings.TrimSuffix(commands[len(commands)-1], "\n"), fullArgs) - return commands + command[len(command)-1] = fmt.Sprintf("%s %s", strings.TrimSuffix(command[len(command)-1], "\n"), fullArgs) + return command } diff --git a/pkg/util/util_test.go b/pkg/util/util_test.go index 5c8f5728..9db27ea2 100644 --- a/pkg/util/util_test.go +++ b/pkg/util/util_test.go @@ -143,32 +143,32 @@ func TestIn(t *testing.T) { } } -func TestMergeArgsWithCommands(t *testing.T) { +func TestMergeArgsWithCommand(t *testing.T) { testCases := []struct { - name string - commands []string - args []string - wantCommands []string + name string + command []string + args []string + wantCommand []string }{ { - name: "commands with no line break", - commands: []string{"run server"}, - args: []string{"--host", "localhost"}, - wantCommands: []string{"run server --host localhost"}, + name: "command with no line break", + command: []string{"run server"}, + args: []string{"--host", "localhost"}, + wantCommand: []string{"run server --host localhost"}, }, { - name: "commands with line break", - commands: []string{"go", "run server\n"}, - args: []string{"--port", "8080"}, - wantCommands: []string{"go", "run server --port 8080"}, + name: "command with line break", + command: []string{"go", "run server\n"}, + args: []string{"--port", "8080"}, + wantCommand: []string{"go", "run server --port 8080"}, }, } for _, test := range testCases { t.Run(test.name, func(t *testing.T) { - got := MergeArgsWithCommands(test.commands, test.args) - if diff := cmp.Diff(got, test.wantCommands); diff != "" { - t.Fatalf("unexpected commands: %s", diff) + got := MergeArgsWithCommand(test.command, test.args) + if diff := cmp.Diff(got, test.wantCommand); diff != "" { + t.Fatalf("unexpected command: %s", diff) } }) } diff --git a/test/config/backends/fake_backend.yaml b/test/config/backends/fake_backend.yaml index e1d4657f..6b00216c 100644 --- a/test/config/backends/fake_backend.yaml +++ b/test/config/backends/fake_backend.yaml @@ -7,7 +7,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: fake-backend spec: - commands: + command: - sh - -c - echo "hello" diff --git a/test/config/backends/llamacpp.yaml b/test/config/backends/llamacpp.yaml index 757cd0c3..dfd81d8e 100644 --- a/test/config/backends/llamacpp.yaml +++ b/test/config/backends/llamacpp.yaml @@ -7,7 +7,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: llamacpp spec: - commands: + command: - ./llama-server image: ghcr.io/ggerganov/llama.cpp version: server diff --git a/test/config/backends/ollama.yaml b/test/config/backends/ollama.yaml index 35f93e79..c6649e08 100644 --- a/test/config/backends/ollama.yaml +++ b/test/config/backends/ollama.yaml @@ -7,7 +7,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: ollama spec: - commands: + command: - sh - -c image: ollama/ollama diff --git a/test/config/backends/sglang.yaml b/test/config/backends/sglang.yaml index 1550e745..e07001c9 100644 --- a/test/config/backends/sglang.yaml +++ b/test/config/backends/sglang.yaml @@ -7,7 +7,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: sglang spec: - commands: + command: - python3 - -m - sglang.launch_server diff --git a/test/config/backends/vllm.yaml b/test/config/backends/vllm.yaml index 8830c69a..b60b5fb1 100644 --- a/test/config/backends/vllm.yaml +++ b/test/config/backends/vllm.yaml @@ -7,7 +7,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: vllm spec: - commands: + command: - python3 - -m - vllm.entrypoints.openai.api_server diff --git a/test/util/validation/validate_playground.go b/test/util/validation/validate_playground.go index 5336c008..749affdb 100644 --- a/test/util/validation/validate_playground.go +++ b/test/util/validation/validate_playground.go @@ -159,8 +159,8 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground } } - // compare commands - if diff := cmp.Diff(parser.Commands(), service.Spec.WorkloadTemplate.WorkerTemplate.Spec.Containers[0].Command); diff != "" { + // compare command + if diff := cmp.Diff(parser.Command(), service.Spec.WorkloadTemplate.WorkerTemplate.Spec.Containers[0].Command); diff != "" { return errors.New("command not right") } diff --git a/test/util/wrapper/backend.go b/test/util/wrapper/backend.go index 57573df5..55f78f17 100644 --- a/test/util/wrapper/backend.go +++ b/test/util/wrapper/backend.go @@ -57,8 +57,8 @@ func (w *BackendRuntimeWrapper) Version(version string) *BackendRuntimeWrapper { return w } -func (w *BackendRuntimeWrapper) Command(commands []string) *BackendRuntimeWrapper { - w.Spec.Commands = commands +func (w *BackendRuntimeWrapper) Command(command []string) *BackendRuntimeWrapper { + w.Spec.Command = command return w } From 964c54ea086776c18af2d1fcf0a5de125f3c322e Mon Sep 17 00:00:00 2001 From: googs1025 Date: Fri, 14 Mar 2025 11:19:07 +0800 Subject: [PATCH 2/3] add field in chart/ --- chart/crds/backendruntime-crd.yaml | 224 ++++++++++++++++++++++++- chart/crds/service-crd.yaml | 5 + chart/templates/backends/llamacpp.yaml | 2 +- chart/templates/backends/ollama.yaml | 2 +- chart/templates/backends/sglang.yaml | 2 +- chart/templates/backends/vllm.yaml | 2 +- chart/values.yaml | 2 +- index.yaml | 6 +- 8 files changed, 235 insertions(+), 10 deletions(-) diff --git a/chart/crds/backendruntime-crd.yaml b/chart/crds/backendruntime-crd.yaml index 045e0c6b..29222c6c 100644 --- a/chart/crds/backendruntime-crd.yaml +++ b/chart/crds/backendruntime-crd.yaml @@ -40,8 +40,8 @@ spec: spec: description: BackendRuntimeSpec defines the desired state of BackendRuntime properties: - commands: - description: Commands represents the default commands for the backendRuntime. + command: + description: Command represents the default command for the backendRuntime. items: type: string type: array @@ -168,6 +168,226 @@ spec: Image represents the default image registry of the backendRuntime. It will work together with version to make up a real image. type: string + lifecycle: + description: Lifecycle represents hooks executed during the lifecycle + of the container. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies a command to execute in the container. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies an HTTP GET request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents a duration that the container + should sleep. + properties: + seconds: + description: Seconds is the number of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for backward compatibility. There is no validation of this field and + lifecycle hooks will fail at runtime when it is specified. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies a command to execute in the container. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies an HTTP GET request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents a duration that the container + should sleep. + properties: + seconds: + description: Seconds is the number of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for backward compatibility. There is no validation of this field and + lifecycle hooks will fail at runtime when it is specified. + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object livenessProbe: description: |- Periodic probe of backend liveness. diff --git a/chart/crds/service-crd.yaml b/chart/crds/service-crd.yaml index e43f9890..5974a1cd 100644 --- a/chart/crds/service-crd.yaml +++ b/chart/crds/service-crd.yaml @@ -91,6 +91,11 @@ spec: format: int32 type: integer rolloutStrategy: + default: + rollingUpdateConfiguration: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate description: |- RolloutStrategy defines the strategy that will be applied to update replicas when a revision is made to the leaderWorkerTemplate. diff --git a/chart/templates/backends/llamacpp.yaml b/chart/templates/backends/llamacpp.yaml index 2923f07d..cb6483d2 100644 --- a/chart/templates/backends/llamacpp.yaml +++ b/chart/templates/backends/llamacpp.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: llamacpp spec: - commands: + command: - ./llama-server image: ghcr.io/ggerganov/llama.cpp version: server diff --git a/chart/templates/backends/ollama.yaml b/chart/templates/backends/ollama.yaml index 83efb1d7..8de715ba 100644 --- a/chart/templates/backends/ollama.yaml +++ b/chart/templates/backends/ollama.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: ollama spec: - commands: + command: - sh - -c image: ollama/ollama diff --git a/chart/templates/backends/sglang.yaml b/chart/templates/backends/sglang.yaml index 0046db12..c2fa1e80 100644 --- a/chart/templates/backends/sglang.yaml +++ b/chart/templates/backends/sglang.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: sglang spec: - commands: + command: - python3 - -m - sglang.launch_server diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml index ca99bc1d..b0691225 100644 --- a/chart/templates/backends/vllm.yaml +++ b/chart/templates/backends/vllm.yaml @@ -8,7 +8,7 @@ metadata: app.kubernetes.io/created-by: llmaz name: vllm spec: - commands: + command: - python3 - -m - vllm.entrypoints.openai.api_server diff --git a/chart/values.yaml b/chart/values.yaml index 6a3649dd..596baa20 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -33,7 +33,7 @@ controllerManager: - ALL image: repository: inftyai/llmaz - tag: v0.1.1 + tag: main resources: limits: cpu: 500m diff --git a/index.yaml b/index.yaml index 7c347581..fd2587f1 100644 --- a/index.yaml +++ b/index.yaml @@ -3,9 +3,9 @@ entries: llmaz: - apiVersion: v2 appVersion: 0.1.1 - created: "2025-02-18T14:46:30.474789+08:00" + created: "2025-03-14T11:13:11.799864+08:00" description: A Helm chart for llmaz - digest: b30ba8a78986cba95256d4869f4f5bd0bd79c5d25867497021b80ae5f1ee04f0 + digest: 2f50fea6de4c1c0dfd03199c11e602f3119947860eb318c6e237bb8acf42e4fd name: llmaz type: application urls: @@ -71,4 +71,4 @@ entries: urls: - https://inftyai.github.io/llmaz/llmaz-0.0.1.tgz version: 0.0.1 -generated: "2025-02-18T14:46:30.460221+08:00" +generated: "2025-03-14T11:13:11.792282+08:00" From 70c3dd09e67bd228b4f11c50e6c3997d909c86cf Mon Sep 17 00:00:00 2001 From: googs1025 Date: Fri, 14 Mar 2025 11:41:39 +0800 Subject: [PATCH 3/3] revert yaml file Signed-off-by: googs1025 --- chart/values.yaml | 2 +- index.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/chart/values.yaml b/chart/values.yaml index 596baa20..6a3649dd 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -33,7 +33,7 @@ controllerManager: - ALL image: repository: inftyai/llmaz - tag: main + tag: v0.1.1 resources: limits: cpu: 500m diff --git a/index.yaml b/index.yaml index fd2587f1..7c347581 100644 --- a/index.yaml +++ b/index.yaml @@ -3,9 +3,9 @@ entries: llmaz: - apiVersion: v2 appVersion: 0.1.1 - created: "2025-03-14T11:13:11.799864+08:00" + created: "2025-02-18T14:46:30.474789+08:00" description: A Helm chart for llmaz - digest: 2f50fea6de4c1c0dfd03199c11e602f3119947860eb318c6e237bb8acf42e4fd + digest: b30ba8a78986cba95256d4869f4f5bd0bd79c5d25867497021b80ae5f1ee04f0 name: llmaz type: application urls: @@ -71,4 +71,4 @@ entries: urls: - https://inftyai.github.io/llmaz/llmaz-0.0.1.tgz version: 0.0.1 -generated: "2025-03-14T11:13:11.792282+08:00" +generated: "2025-02-18T14:46:30.460221+08:00"