|
25 | 25 | "type": "prometheus", |
26 | 26 | "uid": "vm" |
27 | 27 | }, |
28 | | - "description": "Ratio of successful image fetches to total fetches, per isolation type.", |
| 28 | + "description": "Ratio of image fetch attempts where the image was already cached on the executor, per isolation type.", |
29 | 29 | "fieldConfig": { |
30 | 30 | "defaults": { |
31 | 31 | "color": { |
|
85 | 85 | "x": 0, |
86 | 86 | "y": 0 |
87 | 87 | }, |
88 | | - "id": 1, |
| 88 | + "id": 5, |
89 | 89 | "options": { |
90 | 90 | "legend": { |
91 | 91 | "calcs": [ |
|
109 | 109 | "uid": "vm" |
110 | 110 | }, |
111 | 111 | "editorMode": "code", |
112 | | - "expr": "sum by (isolation_type) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{status=\"ok\"}[$__rate_interval])) / sum by (isolation_type) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count[$__rate_interval]))", |
113 | | - "legendFormat": "{{isolation_type}}", |
| 112 | + "expr": "sum by (isolation_type) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{region=\"$region\", on_disk=\"true\"}[$__rate_interval])) / sum by (isolation_type) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{region=\"$region\"}[$__rate_interval]))", |
| 113 | + "legendFormat": "{{isolation_type}} on-disk ratio", |
114 | 114 | "range": true, |
115 | 115 | "refId": "A" |
116 | 116 | } |
117 | 117 | ], |
118 | | - "title": "OCI Image Fetch Success Ratio", |
| 118 | + "title": "OCI Image On-Disk Ratio", |
119 | 119 | "type": "timeseries" |
120 | 120 | }, |
121 | 121 | { |
122 | 122 | "datasource": { |
123 | 123 | "type": "prometheus", |
124 | 124 | "uid": "vm" |
125 | 125 | }, |
126 | | - "description": "p90 image fetch latency by isolation type, for successful fetches only. Metric is in microseconds, converted to seconds for display.", |
| 126 | + "description": "Ratio of successful image fetches to total fetches, per isolation type. Excludes images already cached on-disk.", |
127 | 127 | "fieldConfig": { |
128 | 128 | "defaults": { |
129 | 129 | "color": { |
|
138 | 138 | "barAlignment": 0, |
139 | 139 | "barWidthFactor": 0.6, |
140 | 140 | "drawStyle": "line", |
141 | | - "fillOpacity": 5, |
| 141 | + "fillOpacity": 10, |
142 | 142 | "gradientMode": "none", |
143 | 143 | "hideFrom": { |
144 | 144 | "legend": false, |
|
163 | 163 | } |
164 | 164 | }, |
165 | 165 | "mappings": [], |
| 166 | + "max": 1, |
| 167 | + "min": 0, |
166 | 168 | "thresholds": { |
167 | 169 | "mode": "absolute", |
168 | 170 | "steps": [ |
|
171 | 173 | } |
172 | 174 | ] |
173 | 175 | }, |
174 | | - "unit": "s" |
| 176 | + "unit": "percentunit" |
175 | 177 | }, |
176 | 178 | "overrides": [] |
177 | 179 | }, |
|
181 | 183 | "x": 0, |
182 | 184 | "y": 10 |
183 | 185 | }, |
184 | | - "id": 2, |
| 186 | + "id": 1, |
185 | 187 | "options": { |
186 | 188 | "legend": { |
187 | 189 | "calcs": [ |
188 | 190 | "median", |
189 | | - "max" |
| 191 | + "min" |
190 | 192 | ], |
191 | 193 | "displayMode": "table", |
192 | 194 | "placement": "bottom", |
|
205 | 207 | "uid": "vm" |
206 | 208 | }, |
207 | 209 | "editorMode": "code", |
208 | | - "expr": "histogram_quantile(0.90, sum by (isolation_type, le) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_bucket{status=\"ok\"}[$__rate_interval]))) / 1e6", |
209 | | - "legendFormat": "{{isolation_type}}", |
| 210 | + "expr": "sum by (isolation_type) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{region=\"$region\", status=\"ok\", on_disk!=\"true\"}[$__rate_interval])) / sum by (isolation_type) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{region=\"$region\", on_disk!=\"true\"}[$__rate_interval]))", |
| 211 | + "legendFormat": "{{isolation_type}} successful fetch percentage", |
210 | 212 | "range": true, |
211 | 213 | "refId": "A" |
212 | 214 | } |
213 | 215 | ], |
214 | | - "title": "OCI Image Fetch Latency (p90)", |
| 216 | + "title": "OCI Image Fetch Success Ratio by Isolation Type (excl. on-disk)", |
215 | 217 | "type": "timeseries" |
216 | 218 | }, |
217 | 219 | { |
218 | 220 | "datasource": { |
219 | 221 | "type": "prometheus", |
220 | 222 | "uid": "vm" |
221 | 223 | }, |
222 | | - "description": "Ratio of successful image fetches to total fetches, per upstream registry (eTLD+1).", |
| 224 | + "description": "p90 image fetch latency by isolation type, for successful fetches only. Excludes images already cached on-disk. Metric is in microseconds, converted to seconds for display.", |
223 | 225 | "fieldConfig": { |
224 | 226 | "defaults": { |
225 | 227 | "color": { |
|
234 | 236 | "barAlignment": 0, |
235 | 237 | "barWidthFactor": 0.6, |
236 | 238 | "drawStyle": "line", |
237 | | - "fillOpacity": 10, |
| 239 | + "fillOpacity": 5, |
238 | 240 | "gradientMode": "none", |
239 | 241 | "hideFrom": { |
240 | 242 | "legend": false, |
|
259 | 261 | } |
260 | 262 | }, |
261 | 263 | "mappings": [], |
262 | | - "max": 1, |
263 | | - "min": 0, |
264 | 264 | "thresholds": { |
265 | 265 | "mode": "absolute", |
266 | 266 | "steps": [ |
|
269 | 269 | } |
270 | 270 | ] |
271 | 271 | }, |
272 | | - "unit": "percentunit" |
| 272 | + "unit": "s" |
273 | 273 | }, |
274 | 274 | "overrides": [] |
275 | 275 | }, |
|
279 | 279 | "x": 0, |
280 | 280 | "y": 20 |
281 | 281 | }, |
282 | | - "id": 3, |
| 282 | + "id": 2, |
283 | 283 | "options": { |
284 | 284 | "legend": { |
285 | 285 | "calcs": [ |
286 | 286 | "median", |
287 | | - "min" |
| 287 | + "max" |
288 | 288 | ], |
289 | 289 | "displayMode": "table", |
290 | 290 | "placement": "bottom", |
|
303 | 303 | "uid": "vm" |
304 | 304 | }, |
305 | 305 | "editorMode": "code", |
306 | | - "expr": "sum by (registry) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{status=\"ok\"}[$__rate_interval])) / sum by (registry) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count[$__rate_interval]))", |
307 | | - "legendFormat": "{{registry}}", |
| 306 | + "expr": "histogram_quantile(0.90, sum by (isolation_type, le) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_bucket{region=\"$region\", status=\"ok\", on_disk!=\"true\"}[$__rate_interval]))) / 1e6", |
| 307 | + "legendFormat": "{{isolation_type}} p90 latency", |
308 | 308 | "range": true, |
309 | 309 | "refId": "A" |
310 | 310 | } |
311 | 311 | ], |
312 | | - "title": "OCI Image Fetch Success Ratio by Registry", |
| 312 | + "title": "OCI Image Fetch Latency p90 by Isolation Type (excl. on-disk)", |
313 | 313 | "type": "timeseries" |
314 | 314 | }, |
315 | 315 | { |
316 | 316 | "datasource": { |
317 | 317 | "type": "prometheus", |
318 | 318 | "uid": "vm" |
319 | 319 | }, |
320 | | - "description": "p90 image fetch latency by upstream registry (eTLD+1), for successful fetches only. Metric is in microseconds, converted to seconds for display.", |
| 320 | + "description": "Ratio of successful image fetches to total fetches, per upstream registry (eTLD+1).", |
321 | 321 | "fieldConfig": { |
322 | 322 | "defaults": { |
323 | 323 | "color": { |
|
332 | 332 | "barAlignment": 0, |
333 | 333 | "barWidthFactor": 0.6, |
334 | 334 | "drawStyle": "line", |
335 | | - "fillOpacity": 5, |
| 335 | + "fillOpacity": 10, |
336 | 336 | "gradientMode": "none", |
337 | 337 | "hideFrom": { |
338 | 338 | "legend": false, |
|
357 | 357 | } |
358 | 358 | }, |
359 | 359 | "mappings": [], |
| 360 | + "max": 1, |
| 361 | + "min": 0, |
360 | 362 | "thresholds": { |
361 | 363 | "mode": "absolute", |
362 | 364 | "steps": [ |
|
365 | 367 | } |
366 | 368 | ] |
367 | 369 | }, |
368 | | - "unit": "s" |
| 370 | + "unit": "percentunit" |
369 | 371 | }, |
370 | 372 | "overrides": [] |
371 | 373 | }, |
|
375 | 377 | "x": 0, |
376 | 378 | "y": 30 |
377 | 379 | }, |
378 | | - "id": 4, |
| 380 | + "id": 3, |
379 | 381 | "options": { |
380 | 382 | "legend": { |
381 | 383 | "calcs": [ |
382 | 384 | "median", |
383 | | - "max" |
| 385 | + "min" |
384 | 386 | ], |
385 | 387 | "displayMode": "table", |
386 | 388 | "placement": "bottom", |
|
399 | 401 | "uid": "vm" |
400 | 402 | }, |
401 | 403 | "editorMode": "code", |
402 | | - "expr": "histogram_quantile(0.90, sum by (registry, le) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_bucket{status=\"ok\"}[$__rate_interval]))) / 1e6", |
403 | | - "legendFormat": "{{registry}}", |
| 404 | + "expr": "sum by (registry) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{region=\"$region\", status=\"ok\"}[$__rate_interval])) / sum by (registry) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{region=\"$region\"}[$__rate_interval]))", |
| 405 | + "legendFormat": "{{registry}} successful fetch percentage", |
404 | 406 | "range": true, |
405 | 407 | "refId": "A" |
406 | 408 | } |
407 | 409 | ], |
408 | | - "title": "OCI Image Fetch Latency (p90) by Registry", |
| 410 | + "title": "OCI Image Fetch Success Ratio by Registry", |
409 | 411 | "type": "timeseries" |
410 | 412 | }, |
411 | 413 | { |
412 | 414 | "datasource": { |
413 | 415 | "type": "prometheus", |
414 | 416 | "uid": "vm" |
415 | 417 | }, |
416 | | - "description": "Ratio of image fetch attempts where the image was already cached on the executor, per isolation type.", |
| 418 | + "description": "p90 image fetch latency by upstream registry (eTLD+1), for successful fetches only. Metric is in microseconds, converted to seconds for display.", |
417 | 419 | "fieldConfig": { |
418 | 420 | "defaults": { |
419 | 421 | "color": { |
|
428 | 430 | "barAlignment": 0, |
429 | 431 | "barWidthFactor": 0.6, |
430 | 432 | "drawStyle": "line", |
431 | | - "fillOpacity": 10, |
| 433 | + "fillOpacity": 5, |
432 | 434 | "gradientMode": "none", |
433 | 435 | "hideFrom": { |
434 | 436 | "legend": false, |
|
453 | 455 | } |
454 | 456 | }, |
455 | 457 | "mappings": [], |
456 | | - "max": 1, |
457 | | - "min": 0, |
458 | 458 | "thresholds": { |
459 | 459 | "mode": "absolute", |
460 | 460 | "steps": [ |
|
463 | 463 | } |
464 | 464 | ] |
465 | 465 | }, |
466 | | - "unit": "percentunit" |
| 466 | + "unit": "s" |
467 | 467 | }, |
468 | 468 | "overrides": [] |
469 | 469 | }, |
|
473 | 473 | "x": 0, |
474 | 474 | "y": 40 |
475 | 475 | }, |
476 | | - "id": 5, |
| 476 | + "id": 4, |
477 | 477 | "options": { |
478 | 478 | "legend": { |
479 | 479 | "calcs": [ |
480 | 480 | "median", |
481 | | - "min" |
| 481 | + "max" |
482 | 482 | ], |
483 | 483 | "displayMode": "table", |
484 | 484 | "placement": "bottom", |
|
497 | 497 | "uid": "vm" |
498 | 498 | }, |
499 | 499 | "editorMode": "code", |
500 | | - "expr": "sum by (isolation_type) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count{on_disk=\"true\"}[$__rate_interval])) / sum by (isolation_type) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_count[$__rate_interval]))", |
501 | | - "legendFormat": "{{isolation_type}}", |
| 500 | + "expr": "histogram_quantile(0.90, sum by (registry, le) (rate(buildbuddy_remote_execution_image_fetch_duration_usec_bucket{region=\"$region\", status=\"ok\"}[$__rate_interval]))) / 1e6", |
| 501 | + "legendFormat": "{{registry}} p90 latency", |
502 | 502 | "range": true, |
503 | 503 | "refId": "A" |
504 | 504 | } |
505 | 505 | ], |
506 | | - "title": "OCI Image On-Disk Ratio", |
| 506 | + "title": "OCI Image Fetch Latency (p90) by Registry", |
507 | 507 | "type": "timeseries" |
508 | 508 | } |
509 | 509 | ], |
|
513 | 513 | "file:oci-image-fetches.json" |
514 | 514 | ], |
515 | 515 | "templating": { |
516 | | - "list": [] |
| 516 | + "list": [ |
| 517 | + { |
| 518 | + "current": { |
| 519 | + "text": "us-west1", |
| 520 | + "value": "us-west1" |
| 521 | + }, |
| 522 | + "datasource": { |
| 523 | + "type": "prometheus", |
| 524 | + "uid": "vm" |
| 525 | + }, |
| 526 | + "definition": "label_values(up, region)", |
| 527 | + "includeAll": false, |
| 528 | + "name": "region", |
| 529 | + "options": [], |
| 530 | + "query": { |
| 531 | + "query": "label_values(up, region)", |
| 532 | + "refId": "Prometheus-region-Variable-Query" |
| 533 | + }, |
| 534 | + "refresh": 1, |
| 535 | + "regex": "", |
| 536 | + "sort": 1, |
| 537 | + "type": "query" |
| 538 | + } |
| 539 | + ] |
517 | 540 | }, |
518 | 541 | "time": { |
519 | 542 | "from": "now-6h", |
|
0 commit comments