Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/cube-cos-openapi
Submodule cube-cos-openapi updated 1 files
+71 −0 docs.yaml
24 changes: 24 additions & 0 deletions internal/apis/v1/handlers/grafana/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/bigstack-oss/cube-cos-api/internal/apis/v1/bodies"
"github.com/bigstack-oss/cube-cos-api/internal/cubecos"
"github.com/bigstack-oss/cube-cos-api/internal/definition/v1/grafana"
"github.com/bigstack-oss/cube-cos-api/internal/definition/v1/nodes"
"github.com/gin-gonic/gin"
)

Expand Down Expand Up @@ -54,6 +55,12 @@ var (
Path: "/grafana/storages",
Func: forwardStoragesLink,
},
{
Version: apis.V1,
Method: http.MethodGet,
Path: "/grafana/gpuWorkloadHistory/:hostname",
Func: forwardGpuWorkloadHistoryLinks,
},
}
)

Expand Down Expand Up @@ -133,3 +140,20 @@ func forwardStoragesLink(c *gin.Context) {
},
)
}

// Returns the device dashboard deep-links for a physical node's GPU workload
// history (panel 50 = GPU Util, 51 = VRAM). Both filter by var-GPU_HOST, whose
// value must equal the gpu.host `host` tag (verified equal to Node.Hostname).
// Enabled is gated on node existence (cluster-wide); GetNodeGpusMap is NOT used
// here because it is local-only and would report the wrong node for remote ones.
func forwardGpuWorkloadHistoryLinks(c *gin.Context) {
bodies.SetOk(
c,
"fetch gpu workload history links successfully",
grafana.GpuWorkloadHistory{
GpuUtilizationUrl: genGpuUtilizationHistoryLink(c),
VramUrl: genGpuVramHistoryLink(c),
Enabled: nodes.IsExist(c.Param("hostname")),
},
)
}
19 changes: 19 additions & 0 deletions internal/apis/v1/handlers/grafana/links.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,22 @@ func genStoragesLink() string {
base.DataCenterVip,
)
}

// panel 50 = GPU Utilization on the device dashboard (UID i-device).
// Filtered by the hidden $GPU_HOST variable (gpu.host's `host` tag), NOT $HOST.
func genGpuUtilizationHistoryLink(c *gin.Context) string {
return fmt.Sprintf(
"https://%s/grafana/d/i-device/device?orgId=1&var-GPU_HOST=%s&from=now-3h&to=now&viewPanel=50",
base.DataCenterVip,
c.Param("hostname"),
)
}

// panel 51 = GPU VRAM Usage on the device dashboard (UID i-device).
func genGpuVramHistoryLink(c *gin.Context) string {
return fmt.Sprintf(
"https://%s/grafana/d/i-device/device?orgId=1&var-GPU_HOST=%s&from=now-3h&to=now&viewPanel=51",
base.DataCenterVip,
c.Param("hostname"),
)
}
6 changes: 6 additions & 0 deletions internal/definition/v1/grafana/grafana.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ type Dashboard struct {
Link string `json:"link"`
Enabled bool `json:"enabled"`
}

type GpuWorkloadHistory struct {
GpuUtilizationUrl string `json:"gpuUtilizationUrl"`
VramUrl string `json:"vramUrl"`
Enabled bool `json:"enabled"`
}
Loading