diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go index 41672842..7246fd12 100644 --- a/api/inference/v1alpha1/backendruntime_types.go +++ b/api/inference/v1alpha1/backendruntime_types.go @@ -35,7 +35,8 @@ type BackendRuntimeArg struct { // BackendRuntimeSpec defines the desired state of BackendRuntime type BackendRuntimeSpec struct { // Commands represents the default command of the backendRuntime. - Commands []string `json:"commands"` + // +optional + Commands []string `json:"commands,omitempty"` // Image represents the default image registry of the backendRuntime. // It will work together with version to make up a real image. Image string `json:"image"` diff --git a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml index 77d6d580..50cb9778 100644 --- a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml +++ b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml @@ -231,7 +231,6 @@ spec: It will be appended to the image as a tag. type: string required: - - commands - image - resources - version diff --git a/docs/examples/sglang/playground.yaml b/docs/examples/sglang/playground.yaml index 8bb8601c..a94a55f8 100644 --- a/docs/examples/sglang/playground.yaml +++ b/docs/examples/sglang/playground.yaml @@ -1,10 +1,10 @@ apiVersion: inference.llmaz.io/v1alpha1 kind: Playground metadata: - name: qwen2-05b + name: qwen2-0--5b spec: replicas: 1 modelClaim: - modelName: qwen2-05b + modelName: qwen2-0--5b backendRuntimeConfig: name: sglang diff --git a/docs/support-backends.md b/docs/support-backends.md index 8aead1e3..49a57484 100644 --- a/docs/support-backends.md +++ b/docs/support-backends.md @@ -1,13 +1,13 @@ # All Kinds of Supported Inference Backends -## vLLM +## llama.cpp -[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs +[llama.cpp](https://github.com/ggerganov/llama.cpp) is to enable LLM inference with minimal setup and state-of-the-art performance on a wide variety of hardware - locally and in the cloud. ## SGLang [SGLang](https://github.com/sgl-project/sglang) is yet another fast serving framework for large language models and vision language models. -## llama.cpp +## vLLM -[llama.cpp](https://github.com/ggerganov/llama.cpp) is to enable LLM inference with minimal setup and state-of-the-art performance on a wide variety of hardware - locally and in the cloud. +[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs