Skip to content

Commit ae221d2

Browse files
committed
feat(prometheus.exporter.databricks): add Databricks exporter component
Add prometheus.exporter.databricks component for collecting metrics from Databricks System Tables including billing, jobs, pipelines, and SQL warehouse performance data. Features: - OAuth2 M2M authentication via Service Principal - Configurable lookback windows for each metric domain - Optional high-cardinality task retry metrics - 19 metrics covering 3 personas: Finance, SRE/Platform, Analytics/BI Arguments: - server_hostname, warehouse_http_path (required) - client_id, client_secret (required, secret) - query_timeout (default: 5m) - billing_lookback (default: 24h) - jobs_lookback, pipelines_lookback (default: 2h) - queries_lookback (default: 1h) - sla_threshold_seconds (default: 3600) - collect_task_retries (default: false) Depends on: github.com/grafana/databricks-prometheus-exporter
1 parent 0e58252 commit ae221d2

File tree

6 files changed

+502
-0
lines changed

6 files changed

+502
-0
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
---
2+
canonical: https://grafana.com/docs/alloy/latest/reference/components/prometheus/prometheus.exporter.databricks/
3+
aliases:
4+
- ../prometheus.exporter.databricks/ # /docs/alloy/latest/reference/components/prometheus.exporter.databricks/
5+
description: Learn about prometheus.exporter.databricks
6+
labels:
7+
stage: general-availability
8+
products:
9+
- oss
10+
title: prometheus.exporter.databricks
11+
---
12+
13+
# `prometheus.exporter.databricks`
14+
15+
The `prometheus.exporter.databricks` component embeds the [`databricks_exporter`](https://github.com/grafana/databricks-prometheus-exporter) for collecting billing, jobs, pipelines, and SQL warehouse metrics from Databricks System Tables via HTTP for Prometheus consumption.
16+
17+
## Usage
18+
19+
```alloy
20+
prometheus.exporter.databricks "LABEL" {
21+
server_hostname = "<DATABRICKS_SERVER_HOSTNAME>"
22+
warehouse_http_path = "<DATABRICKS_WAREHOUSE_HTTP_PATH>"
23+
client_id = "<DATABRICKS_CLIENT_ID>"
24+
client_secret = "<DATABRICKS_CLIENT_SECRET>"
25+
}
26+
```
27+
28+
## Arguments
29+
30+
You can use the following arguments with `prometheus.exporter.databricks`:
31+
32+
| Name | Type | Description | Default | Required |
33+
|-------------------------|------------|---------------------------------------------------------------------------------|---------|----------|
34+
| `server_hostname` | `string` | The Databricks workspace hostname (e.g., `dbc-xxx.cloud.databricks.com`). | | yes |
35+
| `warehouse_http_path` | `string` | The HTTP path of the SQL Warehouse (e.g., `/sql/1.0/warehouses/abc123`). | | yes |
36+
| `client_id` | `string` | The OAuth2 Application ID (Client ID) of your Service Principal. | | yes |
37+
| `client_secret` | `secret` | The OAuth2 Client Secret of your Service Principal. | | yes |
38+
| `query_timeout` | `duration` | Timeout for individual SQL queries. | `"5m"` | no |
39+
| `billing_lookback` | `duration` | How far back to look for billing data. | `"24h"` | no |
40+
| `jobs_lookback` | `duration` | How far back to look for job runs. | `"2h"` | no |
41+
| `pipelines_lookback` | `duration` | How far back to look for pipeline runs. | `"2h"` | no |
42+
| `queries_lookback` | `duration` | How far back to look for SQL warehouse queries. | `"1h"` | no |
43+
| `sla_threshold_seconds` | `int` | Duration threshold (seconds) for job SLA miss detection. | `3600` | no |
44+
| `collect_task_retries` | `bool` | Collect task retry metrics (high cardinality due to `task_key` label). | `false` | no |
45+
46+
## Blocks
47+
48+
The `prometheus.exporter.databricks` component doesn't support any blocks. You can configure this component with arguments.
49+
50+
## Exported fields
51+
52+
{{< docs/shared lookup="reference/components/exporter-component-exports.md" source="alloy" version="<ALLOY_VERSION>" >}}
53+
54+
## Component health
55+
56+
`prometheus.exporter.databricks` is only reported as unhealthy if given an invalid configuration.
57+
In those cases, exported fields retain their last healthy values.
58+
59+
## Debug information
60+
61+
`prometheus.exporter.databricks` doesn't expose any component-specific debug information.
62+
63+
## Debug metrics
64+
65+
`prometheus.exporter.databricks` doesn't expose any component-specific debug metrics.
66+
67+
## Prerequisites
68+
69+
Before using this component, you need:
70+
71+
1. **Databricks Workspace** with Unity Catalog and System Tables enabled
72+
2. **Service Principal** with OAuth2 M2M authentication configured
73+
3. **SQL Warehouse** for querying System Tables (serverless recommended for cost efficiency)
74+
75+
See the [Databricks documentation](https://docs.databricks.com/en/dev-tools/auth/oauth-m2m.html) for detailed OAuth2 M2M setup instructions.
76+
77+
## Example
78+
79+
The following example uses a [`prometheus.scrape`][scrape] component to collect metrics from `prometheus.exporter.databricks`:
80+
81+
```alloy
82+
prometheus.exporter.databricks "example" {
83+
server_hostname = "dbc-abc123-def456.cloud.databricks.com"
84+
warehouse_http_path = "/sql/1.0/warehouses/xyz789"
85+
client_id = "my-service-principal-id"
86+
client_secret = "my-service-principal-secret"
87+
}
88+
89+
// Configure a prometheus.scrape component to collect databricks metrics.
90+
prometheus.scrape "demo" {
91+
targets = prometheus.exporter.databricks.example.targets
92+
forward_to = [prometheus.remote_write.demo.receiver]
93+
scrape_interval = "5m"
94+
scrape_timeout = "4m"
95+
}
96+
97+
prometheus.remote_write "demo" {
98+
endpoint {
99+
url = "<PROMETHEUS_REMOTE_WRITE_URL>"
100+
101+
basic_auth {
102+
username = "<USERNAME>"
103+
password = "<PASSWORD>"
104+
}
105+
}
106+
}
107+
```
108+
109+
Replace the following:
110+
111+
- _`<PROMETHEUS_REMOTE_WRITE_URL>`_: The URL of the Prometheus `remote_write` compatible server to send metrics to.
112+
- _`<USERNAME>`_: The username to use for authentication to the `remote_write` API.
113+
- _`<PASSWORD>`_: The password to use for authentication to the `remote_write` API.
114+
115+
[scrape]: ../prometheus.scrape/
116+
117+
## Tuning recommendations
118+
119+
- **`scrape_interval`**: Default is 5 minutes. The exporter queries Databricks System Tables which can be slow. Increase to reduce SQL Warehouse costs.
120+
- **`scrape_timeout`**: Default is 4 minutes. The exporter typically takes 90-120 seconds per scrape depending on data volume.
121+
122+
## High cardinality warning
123+
124+
The `collect_task_retries` flag adds task-level retry metrics which can significantly increase cardinality for workspaces with many jobs. Only enable if needed.
125+
126+
<!-- START GENERATED COMPATIBLE COMPONENTS -->
127+
128+
## Compatible components
129+
130+
`prometheus.exporter.databricks` has exports that can be consumed by the following components:
131+
132+
- Components that consume [Targets](../../../compatibility/#targets-consumers)
133+
134+
{{< admonition type="note" >}}
135+
Connecting some components may not be sensible or components may require further configuration to make the connection work correctly.
136+
Refer to the linked documentation for more details.
137+
{{< /admonition >}}
138+
139+
<!-- END GENERATED COMPATIBLE COMPONENTS -->
140+

internal/component/all/all.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ import (
135135
_ "github.com/grafana/alloy/internal/component/prometheus/exporter/catchpoint" // Import prometheus.exporter.catchpoint
136136
_ "github.com/grafana/alloy/internal/component/prometheus/exporter/cloudwatch" // Import prometheus.exporter.cloudwatch
137137
_ "github.com/grafana/alloy/internal/component/prometheus/exporter/consul" // Import prometheus.exporter.consul
138+
_ "github.com/grafana/alloy/internal/component/prometheus/exporter/databricks" // Import prometheus.exporter.databricks
138139
_ "github.com/grafana/alloy/internal/component/prometheus/exporter/dnsmasq" // Import prometheus.exporter.dnsmasq
139140
_ "github.com/grafana/alloy/internal/component/prometheus/exporter/elasticsearch" // Import prometheus.exporter.elasticsearch
140141
_ "github.com/grafana/alloy/internal/component/prometheus/exporter/gcp" // Import prometheus.exporter.gcp
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package databricks
2+
3+
import (
4+
"time"
5+
6+
"github.com/grafana/alloy/internal/component"
7+
"github.com/grafana/alloy/internal/component/prometheus/exporter"
8+
"github.com/grafana/alloy/internal/featuregate"
9+
"github.com/grafana/alloy/internal/static/integrations"
10+
"github.com/grafana/alloy/internal/static/integrations/databricks_exporter"
11+
"github.com/grafana/alloy/syntax/alloytypes"
12+
config_util "github.com/prometheus/common/config"
13+
)
14+
15+
func init() {
16+
component.Register(component.Registration{
17+
Name: "prometheus.exporter.databricks",
18+
Stability: featuregate.StabilityGenerallyAvailable,
19+
Args: Arguments{},
20+
Exports: exporter.Exports{},
21+
22+
Build: exporter.New(createExporter, "databricks"),
23+
})
24+
}
25+
26+
func createExporter(opts component.Options, args component.Arguments) (integrations.Integration, string, error) {
27+
a := args.(Arguments)
28+
defaultInstanceKey := opts.ID // if cannot resolve instance key, use the component ID
29+
return integrations.NewIntegrationWithInstanceKey(opts.Logger, a.Convert(), defaultInstanceKey)
30+
}
31+
32+
// DefaultArguments holds the default settings for the databricks exporter
33+
var DefaultArguments = Arguments{
34+
QueryTimeout: 5 * time.Minute,
35+
BillingLookback: 24 * time.Hour,
36+
JobsLookback: 2 * time.Hour,
37+
PipelinesLookback: 2 * time.Hour,
38+
QueriesLookback: 1 * time.Hour,
39+
SLAThresholdSeconds: 3600,
40+
CollectTaskRetries: false,
41+
}
42+
43+
// Arguments controls the databricks exporter.
44+
type Arguments struct {
45+
ServerHostname string `alloy:"server_hostname,attr"`
46+
WarehouseHTTPPath string `alloy:"warehouse_http_path,attr"`
47+
ClientID string `alloy:"client_id,attr"`
48+
ClientSecret alloytypes.Secret `alloy:"client_secret,attr"`
49+
QueryTimeout time.Duration `alloy:"query_timeout,attr,optional"`
50+
BillingLookback time.Duration `alloy:"billing_lookback,attr,optional"`
51+
JobsLookback time.Duration `alloy:"jobs_lookback,attr,optional"`
52+
PipelinesLookback time.Duration `alloy:"pipelines_lookback,attr,optional"`
53+
QueriesLookback time.Duration `alloy:"queries_lookback,attr,optional"`
54+
SLAThresholdSeconds int `alloy:"sla_threshold_seconds,attr,optional"`
55+
CollectTaskRetries bool `alloy:"collect_task_retries,attr,optional"`
56+
}
57+
58+
// SetToDefault implements syntax.Defaulter.
59+
func (a *Arguments) SetToDefault() {
60+
*a = DefaultArguments
61+
}
62+
63+
func (a *Arguments) Convert() *databricks_exporter.Config {
64+
return &databricks_exporter.Config{
65+
ServerHostname: a.ServerHostname,
66+
WarehouseHTTPPath: a.WarehouseHTTPPath,
67+
ClientID: a.ClientID,
68+
ClientSecret: config_util.Secret(a.ClientSecret),
69+
QueryTimeout: a.QueryTimeout,
70+
BillingLookback: a.BillingLookback,
71+
JobsLookback: a.JobsLookback,
72+
PipelinesLookback: a.PipelinesLookback,
73+
QueriesLookback: a.QueriesLookback,
74+
SLAThresholdSeconds: a.SLAThresholdSeconds,
75+
CollectTaskRetries: a.CollectTaskRetries,
76+
}
77+
}
78+
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package databricks
2+
3+
import (
4+
"testing"
5+
"time"
6+
7+
"github.com/grafana/alloy/internal/static/integrations/databricks_exporter"
8+
"github.com/grafana/alloy/syntax"
9+
"github.com/grafana/alloy/syntax/alloytypes"
10+
config_util "github.com/prometheus/common/config"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
func TestAlloyUnmarshal(t *testing.T) {
15+
alloyConfig := `
16+
server_hostname = "dbc-abc123.cloud.databricks.com"
17+
warehouse_http_path = "/sql/1.0/warehouses/xyz789"
18+
client_id = "my-client-id"
19+
client_secret = "my-client-secret"
20+
query_timeout = "10m"
21+
billing_lookback = "48h"
22+
jobs_lookback = "4h"
23+
pipelines_lookback = "4h"
24+
queries_lookback = "2h"
25+
sla_threshold_seconds = 7200
26+
collect_task_retries = true
27+
`
28+
29+
var args Arguments
30+
err := syntax.Unmarshal([]byte(alloyConfig), &args)
31+
require.NoError(t, err)
32+
33+
expected := Arguments{
34+
ServerHostname: "dbc-abc123.cloud.databricks.com",
35+
WarehouseHTTPPath: "/sql/1.0/warehouses/xyz789",
36+
ClientID: "my-client-id",
37+
ClientSecret: alloytypes.Secret("my-client-secret"),
38+
QueryTimeout: 10 * time.Minute,
39+
BillingLookback: 48 * time.Hour,
40+
JobsLookback: 4 * time.Hour,
41+
PipelinesLookback: 4 * time.Hour,
42+
QueriesLookback: 2 * time.Hour,
43+
SLAThresholdSeconds: 7200,
44+
CollectTaskRetries: true,
45+
}
46+
47+
require.Equal(t, expected, args)
48+
}
49+
50+
func TestAlloyUnmarshal_Defaults(t *testing.T) {
51+
alloyConfig := `
52+
server_hostname = "dbc-abc123.cloud.databricks.com"
53+
warehouse_http_path = "/sql/1.0/warehouses/xyz789"
54+
client_id = "my-client-id"
55+
client_secret = "my-client-secret"
56+
`
57+
58+
var args Arguments
59+
err := syntax.Unmarshal([]byte(alloyConfig), &args)
60+
require.NoError(t, err)
61+
62+
// Check that defaults are applied
63+
require.Equal(t, 5*time.Minute, args.QueryTimeout)
64+
require.Equal(t, 24*time.Hour, args.BillingLookback)
65+
require.Equal(t, 2*time.Hour, args.JobsLookback)
66+
require.Equal(t, 2*time.Hour, args.PipelinesLookback)
67+
require.Equal(t, 1*time.Hour, args.QueriesLookback)
68+
require.Equal(t, 3600, args.SLAThresholdSeconds)
69+
require.False(t, args.CollectTaskRetries)
70+
}
71+
72+
func TestConvert(t *testing.T) {
73+
alloyConfig := `
74+
server_hostname = "dbc-abc123.cloud.databricks.com"
75+
warehouse_http_path = "/sql/1.0/warehouses/xyz789"
76+
client_id = "my-client-id"
77+
client_secret = "my-client-secret"
78+
query_timeout = "10m"
79+
billing_lookback = "48h"
80+
jobs_lookback = "4h"
81+
pipelines_lookback = "4h"
82+
queries_lookback = "2h"
83+
sla_threshold_seconds = 7200
84+
collect_task_retries = true
85+
`
86+
var args Arguments
87+
err := syntax.Unmarshal([]byte(alloyConfig), &args)
88+
require.NoError(t, err)
89+
90+
res := args.Convert()
91+
92+
expected := databricks_exporter.Config{
93+
ServerHostname: "dbc-abc123.cloud.databricks.com",
94+
WarehouseHTTPPath: "/sql/1.0/warehouses/xyz789",
95+
ClientID: "my-client-id",
96+
ClientSecret: config_util.Secret("my-client-secret"),
97+
QueryTimeout: 10 * time.Minute,
98+
BillingLookback: 48 * time.Hour,
99+
JobsLookback: 4 * time.Hour,
100+
PipelinesLookback: 4 * time.Hour,
101+
QueriesLookback: 2 * time.Hour,
102+
SLAThresholdSeconds: 7200,
103+
CollectTaskRetries: true,
104+
}
105+
require.Equal(t, expected, *res)
106+
}
107+

0 commit comments

Comments
 (0)