Skip to content

Commit 771d2cf

Browse files
authored
feat(helm): add OpenSearch and Redis queues Grafana dashboards (onyx-dot-app#10042)
1 parent 7ec5028 commit 771d2cf

4 files changed

Lines changed: 982 additions & 1 deletion

File tree

deployment/helm/charts/onyx/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ home: https://www.onyx.app/
55
sources:
66
- "https://github.com/onyx-dot-app/onyx"
77
type: application
8-
version: 0.4.41
8+
version: 0.4.42
99
appVersion: latest
1010
annotations:
1111
category: Productivity
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
{
2+
"annotations": {
3+
"list": [
4+
{
5+
"builtIn": 1,
6+
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
7+
"enable": true,
8+
"hide": true,
9+
"iconColor": "rgba(0, 211, 255, 1)",
10+
"name": "Annotations & Alerts",
11+
"type": "dashboard"
12+
}
13+
]
14+
},
15+
"editable": true,
16+
"fiscalYearStartMonth": 0,
17+
"graphTooltip": 1,
18+
"id": null,
19+
"links": [],
20+
"liveNow": true,
21+
"panels": [
22+
{
23+
"title": "Client-Side Search Latency (P50 / P95 / P99)",
24+
"description": "End-to-end latency as measured by the Python client, including network round-trip and serialization overhead.",
25+
"type": "timeseries",
26+
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
27+
"id": 1,
28+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
29+
"fieldConfig": {
30+
"defaults": {
31+
"color": { "mode": "palette-classic" },
32+
"custom": {
33+
"axisBorderShow": false,
34+
"axisCenteredZero": false,
35+
"axisLabel": "seconds",
36+
"axisPlacement": "auto",
37+
"drawStyle": "line",
38+
"fillOpacity": 0,
39+
"gradientMode": "none",
40+
"lineInterpolation": "smooth",
41+
"lineWidth": 2,
42+
"pointSize": 5,
43+
"scaleDistribution": { "type": "linear" },
44+
"showPoints": "never",
45+
"spanNulls": false,
46+
"stacking": { "group": "A", "mode": "none" },
47+
"thresholdsStyle": { "mode": "dashed" }
48+
},
49+
"thresholds": {
50+
"mode": "absolute",
51+
"steps": [
52+
{ "color": "green", "value": null },
53+
{ "color": "yellow", "value": 0.5 },
54+
{ "color": "red", "value": 2.0 }
55+
]
56+
},
57+
"unit": "s",
58+
"min": 0
59+
},
60+
"overrides": []
61+
},
62+
"targets": [
63+
{
64+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
65+
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
66+
"legendFormat": "P50",
67+
"refId": "A"
68+
},
69+
{
70+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
71+
"expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
72+
"legendFormat": "P95",
73+
"refId": "B"
74+
},
75+
{
76+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
77+
"expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
78+
"legendFormat": "P99",
79+
"refId": "C"
80+
}
81+
]
82+
},
83+
{
84+
"title": "Server-Side Search Latency (P50 / P95 / P99)",
85+
"description": "OpenSearch server-side execution time from the 'took' field in the response. Does not include network or client-side overhead.",
86+
"type": "timeseries",
87+
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
88+
"id": 2,
89+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
90+
"fieldConfig": {
91+
"defaults": {
92+
"color": { "mode": "palette-classic" },
93+
"custom": {
94+
"axisBorderShow": false,
95+
"axisCenteredZero": false,
96+
"axisLabel": "seconds",
97+
"axisPlacement": "auto",
98+
"drawStyle": "line",
99+
"fillOpacity": 0,
100+
"gradientMode": "none",
101+
"lineInterpolation": "smooth",
102+
"lineWidth": 2,
103+
"pointSize": 5,
104+
"scaleDistribution": { "type": "linear" },
105+
"showPoints": "never",
106+
"spanNulls": false,
107+
"stacking": { "group": "A", "mode": "none" },
108+
"thresholdsStyle": { "mode": "dashed" }
109+
},
110+
"thresholds": {
111+
"mode": "absolute",
112+
"steps": [
113+
{ "color": "green", "value": null },
114+
{ "color": "yellow", "value": 0.5 },
115+
{ "color": "red", "value": 2.0 }
116+
]
117+
},
118+
"unit": "s",
119+
"min": 0
120+
},
121+
"overrides": []
122+
},
123+
"targets": [
124+
{
125+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
126+
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
127+
"legendFormat": "P50",
128+
"refId": "A"
129+
},
130+
{
131+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
132+
"expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
133+
"legendFormat": "P95",
134+
"refId": "B"
135+
},
136+
{
137+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
138+
"expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
139+
"legendFormat": "P99",
140+
"refId": "C"
141+
}
142+
]
143+
},
144+
{
145+
"title": "Client-Side Latency by Search Type (P95)",
146+
"description": "P95 client-side latency broken down by search type (hybrid, keyword, semantic, random, doc_id_retrieval).",
147+
"type": "timeseries",
148+
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
149+
"id": 3,
150+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
151+
"fieldConfig": {
152+
"defaults": {
153+
"color": { "mode": "palette-classic" },
154+
"custom": {
155+
"axisBorderShow": false,
156+
"axisCenteredZero": false,
157+
"axisLabel": "seconds",
158+
"axisPlacement": "auto",
159+
"drawStyle": "line",
160+
"fillOpacity": 0,
161+
"gradientMode": "none",
162+
"lineInterpolation": "smooth",
163+
"lineWidth": 2,
164+
"pointSize": 5,
165+
"scaleDistribution": { "type": "linear" },
166+
"showPoints": "never",
167+
"spanNulls": false,
168+
"stacking": { "group": "A", "mode": "none" },
169+
"thresholdsStyle": { "mode": "off" }
170+
},
171+
"unit": "s",
172+
"min": 0
173+
},
174+
"overrides": []
175+
},
176+
"targets": [
177+
{
178+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
179+
"expr": "histogram_quantile(0.95, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
180+
"legendFormat": "{{ search_type }}",
181+
"refId": "A"
182+
}
183+
]
184+
},
185+
{
186+
"title": "Search Throughput by Type",
187+
"description": "Searches per second broken down by search type.",
188+
"type": "timeseries",
189+
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
190+
"id": 4,
191+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
192+
"fieldConfig": {
193+
"defaults": {
194+
"color": { "mode": "palette-classic" },
195+
"custom": {
196+
"axisBorderShow": false,
197+
"axisCenteredZero": false,
198+
"axisLabel": "searches/s",
199+
"axisPlacement": "auto",
200+
"drawStyle": "line",
201+
"fillOpacity": 0,
202+
"gradientMode": "none",
203+
"lineInterpolation": "smooth",
204+
"lineWidth": 2,
205+
"pointSize": 5,
206+
"scaleDistribution": { "type": "linear" },
207+
"showPoints": "never",
208+
"spanNulls": false,
209+
"stacking": { "group": "A", "mode": "normal" },
210+
"thresholdsStyle": { "mode": "off" }
211+
},
212+
"unit": "ops",
213+
"min": 0
214+
},
215+
"overrides": []
216+
},
217+
"targets": [
218+
{
219+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
220+
"expr": "sum by (search_type) (rate(onyx_opensearch_search_total[5m]))",
221+
"legendFormat": "{{ search_type }}",
222+
"refId": "A"
223+
}
224+
]
225+
},
226+
{
227+
"title": "Concurrent Searches In Progress",
228+
"description": "Number of OpenSearch searches currently in flight, broken down by search type. Summed across all instances.",
229+
"type": "timeseries",
230+
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
231+
"id": 5,
232+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
233+
"fieldConfig": {
234+
"defaults": {
235+
"color": { "mode": "palette-classic" },
236+
"custom": {
237+
"axisBorderShow": false,
238+
"axisCenteredZero": false,
239+
"axisLabel": "searches",
240+
"axisPlacement": "auto",
241+
"drawStyle": "line",
242+
"fillOpacity": 0,
243+
"gradientMode": "none",
244+
"lineInterpolation": "smooth",
245+
"lineWidth": 2,
246+
"pointSize": 5,
247+
"scaleDistribution": { "type": "linear" },
248+
"showPoints": "never",
249+
"spanNulls": false,
250+
"stacking": { "group": "A", "mode": "normal" },
251+
"thresholdsStyle": { "mode": "off" }
252+
},
253+
"min": 0
254+
},
255+
"overrides": []
256+
},
257+
"targets": [
258+
{
259+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
260+
"expr": "sum by (search_type) (onyx_opensearch_searches_in_progress)",
261+
"legendFormat": "{{ search_type }}",
262+
"refId": "A"
263+
}
264+
]
265+
},
266+
{
267+
"title": "Client vs Server Latency Overhead (P50)",
268+
"description": "Difference between client-side and server-side P50 latency. Reveals network, serialization, and untracked OpenSearch overhead.",
269+
"type": "timeseries",
270+
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
271+
"id": 6,
272+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
273+
"fieldConfig": {
274+
"defaults": {
275+
"color": { "mode": "palette-classic" },
276+
"custom": {
277+
"axisBorderShow": false,
278+
"axisCenteredZero": false,
279+
"axisLabel": "seconds",
280+
"axisPlacement": "auto",
281+
"drawStyle": "line",
282+
"fillOpacity": 0,
283+
"gradientMode": "none",
284+
"lineInterpolation": "smooth",
285+
"lineWidth": 2,
286+
"pointSize": 5,
287+
"scaleDistribution": { "type": "linear" },
288+
"showPoints": "never",
289+
"spanNulls": false,
290+
"stacking": { "group": "A", "mode": "none" },
291+
"thresholdsStyle": { "mode": "off" }
292+
},
293+
"unit": "s",
294+
"min": 0
295+
},
296+
"overrides": []
297+
},
298+
"targets": [
299+
{
300+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
301+
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m]))) - histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
302+
"legendFormat": "Client - Server overhead (P50)",
303+
"refId": "A"
304+
},
305+
{
306+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
307+
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
308+
"legendFormat": "Client P50",
309+
"refId": "B"
310+
},
311+
{
312+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
313+
"expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
314+
"legendFormat": "Server P50",
315+
"refId": "C"
316+
}
317+
]
318+
}
319+
],
320+
"refresh": "5s",
321+
"schemaVersion": 37,
322+
"style": "dark",
323+
"tags": ["onyx", "opensearch", "search", "latency"],
324+
"templating": {
325+
"list": [
326+
{
327+
"current": {
328+
"text": "Prometheus",
329+
"value": "prometheus"
330+
},
331+
"includeAll": false,
332+
"name": "DS_PROMETHEUS",
333+
"options": [],
334+
"query": "prometheus",
335+
"refresh": 1,
336+
"type": "datasource"
337+
}
338+
]
339+
},
340+
"time": { "from": "now-60m", "to": "now" },
341+
"timepicker": {
342+
"refresh_intervals": ["5s", "10s", "30s", "1m"]
343+
},
344+
"timezone": "",
345+
"title": "Onyx OpenSearch Search Latency",
346+
"uid": "onyx-opensearch-search-latency",
347+
"version": 0,
348+
"weekStart": ""
349+
}

0 commit comments

Comments
 (0)