From 235dd0289a9b88776103150ee07df29e2674e726 Mon Sep 17 00:00:00 2001 From: Seki Xu Date: Mon, 29 Jun 2026 17:21:04 +0800 Subject: [PATCH 1/2] feat(grafana): add gpu workload history deep-link endpoint (#824) Add GET /grafana/gpuWorkloadHistory/:hostname returning the device dashboard deep-links for a physical node's GPU Utilization (panel 50) and VRAM (panel 51), filtered by var-GPU_HOST (not var-HOST). enabled reflects node existence via nodes.IsExist; GetNodeGpusMap is avoided because it is local-only and reports the wrong node for remote ones. Co-Authored-By: Claude Opus 4.8 Signed-off-by: Seki Xu --- internal/apis/v1/handlers/grafana/handlers.go | 24 +++++++++++++++++++ internal/apis/v1/handlers/grafana/links.go | 19 +++++++++++++++ internal/definition/v1/grafana/grafana.go | 6 +++++ 3 files changed, 49 insertions(+) diff --git a/internal/apis/v1/handlers/grafana/handlers.go b/internal/apis/v1/handlers/grafana/handlers.go index 97a2f7de..89209ae0 100644 --- a/internal/apis/v1/handlers/grafana/handlers.go +++ b/internal/apis/v1/handlers/grafana/handlers.go @@ -7,6 +7,7 @@ import ( "github.com/bigstack-oss/cube-cos-api/internal/apis/v1/bodies" "github.com/bigstack-oss/cube-cos-api/internal/cubecos" "github.com/bigstack-oss/cube-cos-api/internal/definition/v1/grafana" + "github.com/bigstack-oss/cube-cos-api/internal/definition/v1/nodes" "github.com/gin-gonic/gin" ) @@ -54,6 +55,12 @@ var ( Path: "/grafana/storages", Func: forwardStoragesLink, }, + { + Version: apis.V1, + Method: http.MethodGet, + Path: "/grafana/gpuWorkloadHistory/:hostname", + Func: forwardGpuWorkloadHistoryLinks, + }, } ) @@ -133,3 +140,20 @@ func forwardStoragesLink(c *gin.Context) { }, ) } + +// Returns the device dashboard deep-links for a physical node's GPU workload +// history (panel 50 = GPU Util, 51 = VRAM). Both filter by var-GPU_HOST, whose +// value must equal the gpu.host `host` tag (verified equal to Node.Hostname). +// Enabled is gated on node existence (cluster-wide); GetNodeGpusMap is NOT used +// here because it is local-only and would report the wrong node for remote ones. +func forwardGpuWorkloadHistoryLinks(c *gin.Context) { + bodies.SetOk( + c, + "fetch gpu workload history links successfully", + grafana.GpuWorkloadHistory{ + GpuUtilizationUrl: genGpuUtilizationHistoryLink(c), + VramUrl: genGpuVramHistoryLink(c), + Enabled: nodes.IsExist(c.Param("hostname")), + }, + ) +} diff --git a/internal/apis/v1/handlers/grafana/links.go b/internal/apis/v1/handlers/grafana/links.go index ffb43af9..6c799e51 100644 --- a/internal/apis/v1/handlers/grafana/links.go +++ b/internal/apis/v1/handlers/grafana/links.go @@ -57,3 +57,22 @@ func genStoragesLink() string { base.DataCenterVip, ) } + +// panel 50 = GPU Utilization on the device dashboard (UID i-device). +// Filtered by the hidden $GPU_HOST variable (gpu.host's `host` tag), NOT $HOST. +func genGpuUtilizationHistoryLink(c *gin.Context) string { + return fmt.Sprintf( + "https://%s/grafana/d/i-device/device?orgId=1&var-GPU_HOST=%s&from=now-3h&to=now&viewPanel=50", + base.DataCenterVip, + c.Param("hostname"), + ) +} + +// panel 51 = GPU VRAM Usage on the device dashboard (UID i-device). +func genGpuVramHistoryLink(c *gin.Context) string { + return fmt.Sprintf( + "https://%s/grafana/d/i-device/device?orgId=1&var-GPU_HOST=%s&from=now-3h&to=now&viewPanel=51", + base.DataCenterVip, + c.Param("hostname"), + ) +} diff --git a/internal/definition/v1/grafana/grafana.go b/internal/definition/v1/grafana/grafana.go index 4e39b824..ae250f91 100644 --- a/internal/definition/v1/grafana/grafana.go +++ b/internal/definition/v1/grafana/grafana.go @@ -8,3 +8,9 @@ type Dashboard struct { Link string `json:"link"` Enabled bool `json:"enabled"` } + +type GpuWorkloadHistory struct { + GpuUtilizationUrl string `json:"gpuUtilizationUrl"` + VramUrl string `json:"vramUrl"` + Enabled bool `json:"enabled"` +} From 8863be582fa29e83f3538fb1f77e0621c9a43fa4 Mon Sep 17 00:00:00 2001 From: Seki Xu Date: Mon, 29 Jun 2026 17:37:51 +0800 Subject: [PATCH 2/2] chore(api): bump cube-cos-openapi submodule for gpuWorkloadHistory docs (#824) Points to the openapi commit that documents GET .../grafana/gpuWorkloadHistory/{hostname}, so the generated api docs include the new endpoint. Co-Authored-By: Claude Opus 4.8 Signed-off-by: Seki Xu --- api/cube-cos-openapi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/cube-cos-openapi b/api/cube-cos-openapi index 221c16f2..c8ff77ef 160000 --- a/api/cube-cos-openapi +++ b/api/cube-cos-openapi @@ -1 +1 @@ -Subproject commit 221c16f26079be819bd357b9bf76f51b1f1724ff +Subproject commit c8ff77ef107e0a56de4024e4faf0fd2c5fa5c170