From 990dd7448ec1d3ede9444de1cb08f8fa4ac6a429 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 21 Apr 2025 17:05:49 +0800 Subject: [PATCH 1/4] Add open-webui dependence Signed-off-by: kerthcet --- Makefile | 2 +- chart/Chart.lock | 6 ++++++ chart/Chart.yaml | 6 ++++++ chart/values.global.yaml | 18 ++++++++++++++++++ docs/examples/llamacpp/playground.yaml | 1 - 5 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 chart/Chart.lock diff --git a/Makefile b/Makefile index 41387839..651bdda7 100644 --- a/Makefile +++ b/Makefile @@ -316,7 +316,7 @@ helm: manifests kustomize helmify .PHONY: helm-install helm-install: helm - helm upgrade --install llmaz ./chart -f ./chart/values.global.yaml + helm upgrade --install llmaz ./chart -f ./chart/values.global.yaml --dependency-update .PHONY: helm-upgrade helm-upgrade: image-push artifacts helm-install diff --git a/chart/Chart.lock b/chart/Chart.lock new file mode 100644 index 00000000..a0da65ee --- /dev/null +++ b/chart/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: open-webui + repository: https://helm.openwebui.com/ + version: 6.4.0 +digest: sha256:2520f6e26f2e6fd3e51c5f7f940eef94217c125a9828b0f59decedbecddcdb29 +generated: "2025-04-21T00:50:06.532039+08:00" diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 2aec1a89..f9725101 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -19,3 +19,9 @@ version: 0.0.8 # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. appVersion: 0.1.2 + +dependencies: + - name: open-webui + version: "6.4.0" + repository: "https://helm.openwebui.com/" + condition: open-webui.enable diff --git a/chart/values.global.yaml b/chart/values.global.yaml index 58722e2c..32835024 100644 --- a/chart/values.global.yaml +++ b/chart/values.global.yaml @@ -29,3 +29,21 @@ leaderWorkerSet: prometheus: # Prometheus is required to enable smart routing. enable: true + +open-webui: + enable: true + persistence: + enabled: false + enableOpenaiApi: true + openaiBaseApiUrl: "https://api.openai.com/v1" + extraEnvVars: + - name: OPENAI_API_KEY + value: "ChangeMe" + ollama: + enabled: false + pipelines: + enabled: false + tika: + enabled: false + redis-cluster: + enabled: false diff --git a/docs/examples/llamacpp/playground.yaml b/docs/examples/llamacpp/playground.yaml index 95e6524f..62cd3dc2 100644 --- a/docs/examples/llamacpp/playground.yaml +++ b/docs/examples/llamacpp/playground.yaml @@ -8,6 +8,5 @@ spec: modelName: qwen2-0--5b-gguf backendRuntimeConfig: backendName: llamacpp - configName: default args: - -fa # use flash attention From fe2289d2afed53a191a69400cfa8fb005a153354 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 21 Apr 2025 17:46:06 +0800 Subject: [PATCH 2/4] Add open-webui as the default chatbot Signed-off-by: kerthcet --- Makefile | 3 ++ chart/Chart.yaml | 2 +- chart/templates/backends/llamacpp.yaml | 2 +- chart/templates/backends/ollama.yaml | 2 +- chart/templates/backends/sglang.yaml | 2 +- chart/templates/backends/tgi.yaml | 2 +- chart/templates/backends/vllm.yaml | 2 +- chart/templates/lws/leaderworkerset.yaml | 2 +- chart/templates/prometheus/prometheus.yaml | 2 +- .../templates/prometheus/service-monitor.yaml | 2 +- .../templates/prometheus/serviceaccount.yaml | 2 +- chart/values.global.yaml | 8 ++-- docs/open-webui.md | 47 +++++++++++++++++++ 13 files changed, 64 insertions(+), 14 deletions(-) create mode 100644 docs/open-webui.md diff --git a/Makefile b/Makefile index 651bdda7..65f28516 100644 --- a/Makefile +++ b/Makefile @@ -321,6 +321,9 @@ helm-install: helm .PHONY: helm-upgrade helm-upgrade: image-push artifacts helm-install +.PHONY: install-chatbot +install-chatbot: helm-install + .PHONY: helm-package helm-package: helm # Make sure will alwasy start with a new line. diff --git a/chart/Chart.yaml b/chart/Chart.yaml index f9725101..02128132 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -24,4 +24,4 @@ dependencies: - name: open-webui version: "6.4.0" repository: "https://helm.openwebui.com/" - condition: open-webui.enable + condition: open-webui.enabled diff --git a/chart/templates/backends/llamacpp.yaml b/chart/templates/backends/llamacpp.yaml index eaeecd8e..150e2378 100644 --- a/chart/templates/backends/llamacpp.yaml +++ b/chart/templates/backends/llamacpp.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/backends/ollama.yaml b/chart/templates/backends/ollama.yaml index 70b68fda..097e7ba4 100644 --- a/chart/templates/backends/ollama.yaml +++ b/chart/templates/backends/ollama.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/backends/sglang.yaml b/chart/templates/backends/sglang.yaml index 710382c5..2c5a9238 100644 --- a/chart/templates/backends/sglang.yaml +++ b/chart/templates/backends/sglang.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/backends/tgi.yaml b/chart/templates/backends/tgi.yaml index dd9af4a1..693964ee 100644 --- a/chart/templates/backends/tgi.yaml +++ b/chart/templates/backends/tgi.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml index 35d257ab..a65f6a5c 100644 --- a/chart/templates/backends/vllm.yaml +++ b/chart/templates/backends/vllm.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/lws/leaderworkerset.yaml b/chart/templates/lws/leaderworkerset.yaml index 37c11474..ff5555b9 100644 --- a/chart/templates/lws/leaderworkerset.yaml +++ b/chart/templates/lws/leaderworkerset.yaml @@ -1,4 +1,4 @@ -{{- if .Values.leaderWorkerSet.enable -}} +{{- if .Values.leaderWorkerSet.enabled -}} apiVersion: v1 kind: Namespace metadata: diff --git a/chart/templates/prometheus/prometheus.yaml b/chart/templates/prometheus/prometheus.yaml index a82bda4a..cbb44f6a 100644 --- a/chart/templates/prometheus/prometheus.yaml +++ b/chart/templates/prometheus/prometheus.yaml @@ -1,4 +1,4 @@ -{{- if .Values.prometheus.enable }} +{{- if .Values.prometheus.enabled }} {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }} {{- end }} diff --git a/chart/templates/prometheus/service-monitor.yaml b/chart/templates/prometheus/service-monitor.yaml index 37fd07c0..77b9c387 100644 --- a/chart/templates/prometheus/service-monitor.yaml +++ b/chart/templates/prometheus/service-monitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.prometheus.enable }} +{{- if .Values.prometheus.enabled }} {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }} {{- end }} diff --git a/chart/templates/prometheus/serviceaccount.yaml b/chart/templates/prometheus/serviceaccount.yaml index 1d200445..0849c20f 100644 --- a/chart/templates/prometheus/serviceaccount.yaml +++ b/chart/templates/prometheus/serviceaccount.yaml @@ -1,4 +1,4 @@ -{{- if .Values.prometheus.enable }} +{{- if .Values.prometheus.enabled }} {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }} {{- end }} diff --git a/chart/values.global.yaml b/chart/values.global.yaml index 32835024..8d6ed9a3 100644 --- a/chart/values.global.yaml +++ b/chart/values.global.yaml @@ -1,7 +1,7 @@ fullnameOverride: "llmaz" backendRuntime: - enable: true + enabled: true llamacpp: image: repository: ghcr.io/ggerganov/llama.cpp @@ -24,14 +24,14 @@ backendRuntime: tag: v0.7.3 leaderWorkerSet: - enable: true + enabled: true prometheus: # Prometheus is required to enable smart routing. - enable: true + enabled: true open-webui: - enable: true + enabled: false persistence: enabled: false enableOpenaiApi: true diff --git a/docs/open-webui.md b/docs/open-webui.md new file mode 100644 index 00000000..638a2310 --- /dev/null +++ b/docs/open-webui.md @@ -0,0 +1,47 @@ +# Open-WebUI + +[Open WebUI](https://github.com/open-webui/open-webui) is a user-friendly AI interface with OpenAI-compatible APIs, serving as the default chatbot for llmaz. + +## Prerequisites + +- Make sure you're located in **llmaz-system** namespace, haven't tested with other namespaces. +- Make sure [EnvoyGateway](https://github.com/envoyproxy/gateway) and [Envoy AI Gateway](https://github.com/envoyproxy/ai-gateway) are installed, both of them are installed by default in llmaz. + +## How to use + +1. Enable Open WebUI in the `values.global.yaml` file, open-webui is disabled by default. + + ```yaml + open-webui: + enabled: true + ``` + + > Optional to set the `persistence=true` to persist the data, recommended for production. + +2. Run `kubectl get svc -n envoy-gateway-system` to list out the services, the output looks like: + + ```cmd + envoy-default-default-envoy-ai-gateway-dbec795a LoadBalancer 10.96.145.150 80:30548/TCP 132m + envoy-gateway ClusterIP 10.96.52.76 18000/TCP,18001/TCP,18002/TCP,19001/TCP 172m + ``` + +3. Set `openaiBaseApiUrl` in the `values.global.yaml` like: + + ```yaml + open-webui: + enabled: true + openaiBaseApiUrl: http://envoy-default-default-envoy-ai-gateway-dbec795a.envoy-gateway-system.svc.cluster.local/v1 + ``` + +4. Run `make install-chatbot` to install the chatbot. + +5. Port forwarding by: + ``` + kubectl port-forward svc/open-webui 8080:80 + ``` + +6. Visit [http://localhost:8080](http://localhost:8080) to access the Open WebUI. + +7. Configure the administrator for the first time. + +**That's it! You can now chat with llmaz models with Open WebUI.** From 7c3bbd30be17d0ef81a11e349db4b1647ed9b646 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 21 Apr 2025 17:54:17 +0800 Subject: [PATCH 3/4] Update readme.md Signed-off-by: kerthcet --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1645ae75..c39ba816 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Easy, advanced inference platform for large language models on Kubernetes - **Various Model Providers**: llmaz supports a wide range of model providers, such as [HuggingFace](https://huggingface.co/), [ModelScope](https://www.modelscope.cn), ObjectStores. llmaz will automatically handle the model loading, requiring no effort from users. - **Multi-Host Support**: llmaz supports both single-host and multi-host scenarios with [LWS](https://github.com/kubernetes-sigs/lws) from day 0. - **Scaling Efficiency**: llmaz supports horizontal scaling with [HPA](./docs/examples/hpa/README.md) by default and will integrate with autoscaling components like [Cluster-Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) or [Karpenter](https://github.com/kubernetes-sigs/karpenter) for smart scaling across different clouds. +- **Build-in ChatUI**: Out-of-the-box chatbot support with the integration of [Open WebUI](https://github.com/open-webui/open-webui), see configuration [here](./docs/open-webui.md). ## Quick Start From 3ea8f5c05f6a118108dafdebb98f69c82aa5075f Mon Sep 17 00:00:00 2001 From: kerthcet Date: Mon, 21 Apr 2025 18:01:40 +0800 Subject: [PATCH 4/4] Update readme.md Signed-off-by: kerthcet --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c39ba816..e2f8e439 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Easy, advanced inference platform for large language models on Kubernetes - **Various Model Providers**: llmaz supports a wide range of model providers, such as [HuggingFace](https://huggingface.co/), [ModelScope](https://www.modelscope.cn), ObjectStores. llmaz will automatically handle the model loading, requiring no effort from users. - **Multi-Host Support**: llmaz supports both single-host and multi-host scenarios with [LWS](https://github.com/kubernetes-sigs/lws) from day 0. - **Scaling Efficiency**: llmaz supports horizontal scaling with [HPA](./docs/examples/hpa/README.md) by default and will integrate with autoscaling components like [Cluster-Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) or [Karpenter](https://github.com/kubernetes-sigs/karpenter) for smart scaling across different clouds. -- **Build-in ChatUI**: Out-of-the-box chatbot support with the integration of [Open WebUI](https://github.com/open-webui/open-webui), see configuration [here](./docs/open-webui.md). +- **Build-in ChatUI**: Out-of-the-box chatbot support with the integration of [Open WebUI](https://github.com/open-webui/open-webui), see configurations [here](./docs/open-webui.md). ## Quick Start