From 990dd7448ec1d3ede9444de1cb08f8fa4ac6a429 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Mon, 21 Apr 2025 17:05:49 +0800
Subject: [PATCH 1/4] Add open-webui dependence

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 Makefile                               |  2 +-
 chart/Chart.lock                       |  6 ++++++
 chart/Chart.yaml                       |  6 ++++++
 chart/values.global.yaml               | 18 ++++++++++++++++++
 docs/examples/llamacpp/playground.yaml |  1 -
 5 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 chart/Chart.lock

diff --git a/Makefile b/Makefile
index 41387839..651bdda7 100644
--- a/Makefile
+++ b/Makefile
@@ -316,7 +316,7 @@ helm: manifests kustomize helmify
 
 .PHONY: helm-install
 helm-install: helm
-	helm upgrade --install llmaz ./chart -f ./chart/values.global.yaml
+	helm upgrade --install llmaz ./chart -f ./chart/values.global.yaml --dependency-update
 
 .PHONY: helm-upgrade
 helm-upgrade: image-push artifacts helm-install
diff --git a/chart/Chart.lock b/chart/Chart.lock
new file mode 100644
index 00000000..a0da65ee
--- /dev/null
+++ b/chart/Chart.lock
@@ -0,0 +1,6 @@
+dependencies:
+- name: open-webui
+  repository: https://helm.openwebui.com/
+  version: 6.4.0
+digest: sha256:2520f6e26f2e6fd3e51c5f7f940eef94217c125a9828b0f59decedbecddcdb29
+generated: "2025-04-21T00:50:06.532039+08:00"
diff --git a/chart/Chart.yaml b/chart/Chart.yaml
index 2aec1a89..f9725101 100644
--- a/chart/Chart.yaml
+++ b/chart/Chart.yaml
@@ -19,3 +19,9 @@ version: 0.0.8
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: 0.1.2
+
+dependencies:
+  - name: open-webui
+    version: "6.4.0"
+    repository: "https://helm.openwebui.com/"
+    condition: open-webui.enable
diff --git a/chart/values.global.yaml b/chart/values.global.yaml
index 58722e2c..32835024 100644
--- a/chart/values.global.yaml
+++ b/chart/values.global.yaml
@@ -29,3 +29,21 @@ leaderWorkerSet:
 prometheus:
   # Prometheus is required to enable smart routing.
   enable: true
+
+open-webui:
+  enable: true
+  persistence:
+    enabled: false
+  enableOpenaiApi: true
+  openaiBaseApiUrl: "https://api.openai.com/v1"
+  extraEnvVars:
+  - name: OPENAI_API_KEY
+    value: "ChangeMe"
+  ollama:
+    enabled: false
+  pipelines:
+    enabled: false
+  tika:
+    enabled: false
+  redis-cluster:
+    enabled: false
diff --git a/docs/examples/llamacpp/playground.yaml b/docs/examples/llamacpp/playground.yaml
index 95e6524f..62cd3dc2 100644
--- a/docs/examples/llamacpp/playground.yaml
+++ b/docs/examples/llamacpp/playground.yaml
@@ -8,6 +8,5 @@ spec:
     modelName: qwen2-0--5b-gguf
   backendRuntimeConfig:
     backendName: llamacpp
-    configName: default
     args:
       - -fa # use flash attention

From fe2289d2afed53a191a69400cfa8fb005a153354 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Mon, 21 Apr 2025 17:46:06 +0800
Subject: [PATCH 2/4] Add open-webui as the default chatbot

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 Makefile                                      |  3 ++
 chart/Chart.yaml                              |  2 +-
 chart/templates/backends/llamacpp.yaml        |  2 +-
 chart/templates/backends/ollama.yaml          |  2 +-
 chart/templates/backends/sglang.yaml          |  2 +-
 chart/templates/backends/tgi.yaml             |  2 +-
 chart/templates/backends/vllm.yaml            |  2 +-
 chart/templates/lws/leaderworkerset.yaml      |  2 +-
 chart/templates/prometheus/prometheus.yaml    |  2 +-
 .../templates/prometheus/service-monitor.yaml |  2 +-
 .../templates/prometheus/serviceaccount.yaml  |  2 +-
 chart/values.global.yaml                      |  8 ++--
 docs/open-webui.md                            | 47 +++++++++++++++++++
 13 files changed, 64 insertions(+), 14 deletions(-)
 create mode 100644 docs/open-webui.md

diff --git a/Makefile b/Makefile
index 651bdda7..65f28516 100644
--- a/Makefile
+++ b/Makefile
@@ -321,6 +321,9 @@ helm-install: helm
 .PHONY: helm-upgrade
 helm-upgrade: image-push artifacts helm-install
 
+.PHONY: install-chatbot
+install-chatbot: helm-install
+
 .PHONY: helm-package
 helm-package: helm
 	# Make sure will alwasy start with a new line.
diff --git a/chart/Chart.yaml b/chart/Chart.yaml
index f9725101..02128132 100644
--- a/chart/Chart.yaml
+++ b/chart/Chart.yaml
@@ -24,4 +24,4 @@ dependencies:
   - name: open-webui
     version: "6.4.0"
     repository: "https://helm.openwebui.com/"
-    condition: open-webui.enable
+    condition: open-webui.enabled
diff --git a/chart/templates/backends/llamacpp.yaml b/chart/templates/backends/llamacpp.yaml
index eaeecd8e..150e2378 100644
--- a/chart/templates/backends/llamacpp.yaml
+++ b/chart/templates/backends/llamacpp.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:
diff --git a/chart/templates/backends/ollama.yaml b/chart/templates/backends/ollama.yaml
index 70b68fda..097e7ba4 100644
--- a/chart/templates/backends/ollama.yaml
+++ b/chart/templates/backends/ollama.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:
diff --git a/chart/templates/backends/sglang.yaml b/chart/templates/backends/sglang.yaml
index 710382c5..2c5a9238 100644
--- a/chart/templates/backends/sglang.yaml
+++ b/chart/templates/backends/sglang.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:
diff --git a/chart/templates/backends/tgi.yaml b/chart/templates/backends/tgi.yaml
index dd9af4a1..693964ee 100644
--- a/chart/templates/backends/tgi.yaml
+++ b/chart/templates/backends/tgi.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:
diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml
index 35d257ab..a65f6a5c 100644
--- a/chart/templates/backends/vllm.yaml
+++ b/chart/templates/backends/vllm.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:
diff --git a/chart/templates/lws/leaderworkerset.yaml b/chart/templates/lws/leaderworkerset.yaml
index 37c11474..ff5555b9 100644
--- a/chart/templates/lws/leaderworkerset.yaml
+++ b/chart/templates/lws/leaderworkerset.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.leaderWorkerSet.enable -}}
+{{- if .Values.leaderWorkerSet.enabled -}}
 apiVersion: v1
 kind: Namespace
 metadata:
diff --git a/chart/templates/prometheus/prometheus.yaml b/chart/templates/prometheus/prometheus.yaml
index a82bda4a..cbb44f6a 100644
--- a/chart/templates/prometheus/prometheus.yaml
+++ b/chart/templates/prometheus/prometheus.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.prometheus.enable }}
+{{- if .Values.prometheus.enabled }}
 {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
 {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
 {{- end }}
diff --git a/chart/templates/prometheus/service-monitor.yaml b/chart/templates/prometheus/service-monitor.yaml
index 37fd07c0..77b9c387 100644
--- a/chart/templates/prometheus/service-monitor.yaml
+++ b/chart/templates/prometheus/service-monitor.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.prometheus.enable }}
+{{- if .Values.prometheus.enabled }}
 {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
 {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
 {{- end }}
diff --git a/chart/templates/prometheus/serviceaccount.yaml b/chart/templates/prometheus/serviceaccount.yaml
index 1d200445..0849c20f 100644
--- a/chart/templates/prometheus/serviceaccount.yaml
+++ b/chart/templates/prometheus/serviceaccount.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.prometheus.enable }}
+{{- if .Values.prometheus.enabled }}
 {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
 {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
 {{- end }}
diff --git a/chart/values.global.yaml b/chart/values.global.yaml
index 32835024..8d6ed9a3 100644
--- a/chart/values.global.yaml
+++ b/chart/values.global.yaml
@@ -1,7 +1,7 @@
 fullnameOverride: "llmaz"
 
 backendRuntime:
-  enable: true
+  enabled: true
   llamacpp:
     image:
       repository: ghcr.io/ggerganov/llama.cpp
@@ -24,14 +24,14 @@ backendRuntime:
       tag: v0.7.3
 
 leaderWorkerSet:
-  enable: true
+  enabled: true
 
 prometheus:
   # Prometheus is required to enable smart routing.
-  enable: true
+  enabled: true
 
 open-webui:
-  enable: true
+  enabled: false
   persistence:
     enabled: false
   enableOpenaiApi: true
diff --git a/docs/open-webui.md b/docs/open-webui.md
new file mode 100644
index 00000000..638a2310
--- /dev/null
+++ b/docs/open-webui.md
@@ -0,0 +1,47 @@
+# Open-WebUI
+
+[Open WebUI](https://github.com/open-webui/open-webui) is a user-friendly AI interface with OpenAI-compatible APIs, serving as the default chatbot for llmaz.
+
+## Prerequisites
+
+- Make sure you're located in **llmaz-system** namespace, haven't tested with other namespaces.
+- Make sure [EnvoyGateway](https://github.com/envoyproxy/gateway) and [Envoy AI Gateway](https://github.com/envoyproxy/ai-gateway) are installed, both of them are installed by default in llmaz.
+
+## How to use
+
+1. Enable Open WebUI in the `values.global.yaml` file, open-webui is disabled by default.
+
+    ```yaml
+    open-webui:
+      enabled: true
+    ```
+
+    > Optional to set the `persistence=true` to persist the data, recommended for production.
+
+2. Run `kubectl get svc -n envoy-gateway-system` to list out the services, the output looks like:
+
+    ```cmd
+    envoy-default-default-envoy-ai-gateway-dbec795a   LoadBalancer   10.96.145.150   <pending>     80:30548/TCP                              132m
+    envoy-gateway                                     ClusterIP      10.96.52.76     <none>        18000/TCP,18001/TCP,18002/TCP,19001/TCP   172m
+    ```
+
+3. Set `openaiBaseApiUrl` in the `values.global.yaml` like:
+
+    ```yaml
+    open-webui:
+      enabled: true
+      openaiBaseApiUrl: http://envoy-default-default-envoy-ai-gateway-dbec795a.envoy-gateway-system.svc.cluster.local/v1
+    ```
+
+4. Run `make install-chatbot` to install the chatbot.
+
+5. Port forwarding by:
+    ```
+    kubectl port-forward svc/open-webui 8080:80
+    ```
+
+6. Visit [http://localhost:8080](http://localhost:8080) to access the Open WebUI.
+
+7. Configure the administrator for the first time.
+
+**That's it! You can now chat with llmaz models with Open WebUI.**

From 7c3bbd30be17d0ef81a11e349db4b1647ed9b646 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Mon, 21 Apr 2025 17:54:17 +0800
Subject: [PATCH 3/4] Update readme.md

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 1645ae75..c39ba816 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,7 @@ Easy, advanced inference platform for large language models on Kubernetes
 - **Various Model Providers**: llmaz supports a wide range of model providers, such as [HuggingFace](https://huggingface.co/), [ModelScope](https://www.modelscope.cn), ObjectStores. llmaz will automatically handle the model loading, requiring no effort from users.
 - **Multi-Host Support**: llmaz supports both single-host and multi-host scenarios with [LWS](https://github.com/kubernetes-sigs/lws) from day 0.
 - **Scaling Efficiency**: llmaz supports horizontal scaling with [HPA](./docs/examples/hpa/README.md) by default and will integrate with autoscaling components like [Cluster-Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) or [Karpenter](https://github.com/kubernetes-sigs/karpenter) for smart scaling across different clouds.
+- **Build-in ChatUI**: Out-of-the-box chatbot support with the integration of [Open WebUI](https://github.com/open-webui/open-webui), see configuration [here](./docs/open-webui.md).
 
 ## Quick Start
 

From 3ea8f5c05f6a118108dafdebb98f69c82aa5075f Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Mon, 21 Apr 2025 18:01:40 +0800
Subject: [PATCH 4/4] Update readme.md

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c39ba816..e2f8e439 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ Easy, advanced inference platform for large language models on Kubernetes
 - **Various Model Providers**: llmaz supports a wide range of model providers, such as [HuggingFace](https://huggingface.co/), [ModelScope](https://www.modelscope.cn), ObjectStores. llmaz will automatically handle the model loading, requiring no effort from users.
 - **Multi-Host Support**: llmaz supports both single-host and multi-host scenarios with [LWS](https://github.com/kubernetes-sigs/lws) from day 0.
 - **Scaling Efficiency**: llmaz supports horizontal scaling with [HPA](./docs/examples/hpa/README.md) by default and will integrate with autoscaling components like [Cluster-Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) or [Karpenter](https://github.com/kubernetes-sigs/karpenter) for smart scaling across different clouds.
-- **Build-in ChatUI**: Out-of-the-box chatbot support with the integration of [Open WebUI](https://github.com/open-webui/open-webui), see configuration [here](./docs/open-webui.md).
+- **Build-in ChatUI**: Out-of-the-box chatbot support with the integration of [Open WebUI](https://github.com/open-webui/open-webui), see configurations [here](./docs/open-webui.md).
 
 ## Quick Start