Skip to content

Commit 0f48b7a

Browse files
authored
[HCP Telemetry] Move first TelemetryConfig Fetch into the TelemetryConfigProvider (hashicorp#18318)
* Add Enabler interface to turn sink on/off * Use h for hcpProviderImpl vars, fix PR feeback and fix errors * Keep nil check in exporter and fix tests * Clarify comment and fix function name * Use disable instead of enable * Fix errors nit in otlp_transform * Add test for refreshInterval of updateConfig * Add disabled field in MetricsConfig struct * Fix PR feedback: improve comment and remove double colons * Fix deps test which requires a maybe * Update hcp-sdk-go to v0.61.0 * use disabled flag in telemetry_config.go * Handle 4XX errors in telemetry_provider * Fix deps test * Check 4XX instead * Run make go-mod-tidy
1 parent 58e5658 commit 0f48b7a

File tree

19 files changed

+321
-310
lines changed

19 files changed

+321
-310
lines changed

agent/hcp/client/telemetry_config.go

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import (
2020

2121
var (
2222
// defaultMetricFilters is a regex that matches all metric names.
23-
defaultMetricFilters = regexp.MustCompile(".+")
23+
DefaultMetricFilters = regexp.MustCompile(".+")
2424

2525
// Validation errors for AgentTelemetryConfigOK response.
2626
errMissingPayload = errors.New("missing payload")
@@ -29,6 +29,7 @@ var (
2929
errMissingMetricsConfig = errors.New("missing metrics config")
3030
errInvalidRefreshInterval = errors.New("invalid refresh interval")
3131
errInvalidEndpoint = errors.New("invalid metrics endpoint")
32+
errEmptyEndpoint = errors.New("empty metrics endpoint")
3233
)
3334

3435
// TelemetryConfig contains configuration for telemetry data forwarded by Consul servers
@@ -43,18 +44,14 @@ type MetricsConfig struct {
4344
Labels map[string]string
4445
Filters *regexp.Regexp
4546
Endpoint *url.URL
47+
Disabled bool
4648
}
4749

4850
// RefreshConfig contains configuration for the periodic fetch of configuration from HCP.
4951
type RefreshConfig struct {
5052
RefreshInterval time.Duration
5153
}
5254

53-
// MetricsEnabled returns true if metrics export is enabled, i.e. a valid metrics endpoint exists.
54-
func (t *TelemetryConfig) MetricsEnabled() bool {
55-
return t.MetricsConfig.Endpoint != nil
56-
}
57-
5855
// validateAgentTelemetryConfigPayload ensures the returned payload from HCP is valid.
5956
func validateAgentTelemetryConfigPayload(resp *hcptelemetry.AgentTelemetryConfigOK) error {
6057
if resp.Payload == nil {
@@ -86,7 +83,7 @@ func convertAgentTelemetryResponse(ctx context.Context, resp *hcptelemetry.Agent
8683
telemetryConfig := resp.Payload.TelemetryConfig
8784
metricsEndpoint, err := convertMetricEndpoint(telemetryConfig.Endpoint, telemetryConfig.Metrics.Endpoint)
8885
if err != nil {
89-
return nil, errInvalidEndpoint
86+
return nil, err
9087
}
9188

9289
metricsFilters := convertMetricFilters(ctx, telemetryConfig.Metrics.IncludeList)
@@ -97,6 +94,7 @@ func convertAgentTelemetryResponse(ctx context.Context, resp *hcptelemetry.Agent
9794
Endpoint: metricsEndpoint,
9895
Labels: metricLabels,
9996
Filters: metricsFilters,
97+
Disabled: telemetryConfig.Metrics.Disabled,
10098
},
10199
RefreshConfig: &RefreshConfig{
102100
RefreshInterval: refreshInterval,
@@ -114,9 +112,8 @@ func convertMetricEndpoint(telemetryEndpoint string, metricsEndpoint string) (*u
114112
endpoint = metricsEndpoint
115113
}
116114

117-
// If endpoint is empty, server not registered with CCM, no error returned.
118115
if endpoint == "" {
119-
return nil, nil
116+
return nil, errEmptyEndpoint
120117
}
121118

122119
// Endpoint from CTW has no metrics path, so it must be added.
@@ -145,15 +142,15 @@ func convertMetricFilters(ctx context.Context, payloadFilters []string) *regexp.
145142

146143
if len(validFilters) == 0 {
147144
logger.Error("no valid filters")
148-
return defaultMetricFilters
145+
return DefaultMetricFilters
149146
}
150147

151148
// Combine the valid regex strings with OR.
152149
finalRegex := strings.Join(validFilters, "|")
153150
composedRegex, err := regexp.Compile(finalRegex)
154151
if err != nil {
155152
logger.Error("failed to compile final regex", "error", err)
156-
return defaultMetricFilters
153+
return DefaultMetricFilters
157154
}
158155

159156
return composedRegex

agent/hcp/client/telemetry_config_test.go

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ func TestConvertAgentTelemetryResponse(t *testing.T) {
8888
resp *consul_telemetry_service.AgentTelemetryConfigOK
8989
expectedTelemetryCfg *TelemetryConfig
9090
wantErr error
91-
expectedEnabled bool
9291
}{
9392
"success": {
9493
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
@@ -115,34 +114,6 @@ func TestConvertAgentTelemetryResponse(t *testing.T) {
115114
RefreshInterval: 2 * time.Second,
116115
},
117116
},
118-
expectedEnabled: true,
119-
},
120-
"successNoEndpoint": {
121-
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
122-
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{
123-
TelemetryConfig: &models.HashicorpCloudConsulTelemetry20230414TelemetryConfig{
124-
Endpoint: "",
125-
Labels: map[string]string{"test": "test"},
126-
Metrics: &models.HashicorpCloudConsulTelemetry20230414TelemetryMetricsConfig{
127-
IncludeList: []string{"test", "consul"},
128-
},
129-
},
130-
RefreshConfig: &models.HashicorpCloudConsulTelemetry20230414RefreshConfig{
131-
RefreshInterval: "2s",
132-
},
133-
},
134-
},
135-
expectedTelemetryCfg: &TelemetryConfig{
136-
MetricsConfig: &MetricsConfig{
137-
Endpoint: nil,
138-
Labels: map[string]string{"test": "test"},
139-
Filters: validTestFilters,
140-
},
141-
RefreshConfig: &RefreshConfig{
142-
RefreshInterval: 2 * time.Second,
143-
},
144-
},
145-
expectedEnabled: false,
146117
},
147118
"successBadFilters": {
148119
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
@@ -163,13 +134,12 @@ func TestConvertAgentTelemetryResponse(t *testing.T) {
163134
MetricsConfig: &MetricsConfig{
164135
Endpoint: validTestURL,
165136
Labels: map[string]string{"test": "test"},
166-
Filters: defaultMetricFilters,
137+
Filters: DefaultMetricFilters,
167138
},
168139
RefreshConfig: &RefreshConfig{
169140
RefreshInterval: 2 * time.Second,
170141
},
171142
},
172-
expectedEnabled: true,
173143
},
174144
"errorsWithInvalidRefreshInterval": {
175145
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
@@ -209,7 +179,6 @@ func TestConvertAgentTelemetryResponse(t *testing.T) {
209179
}
210180
require.NoError(t, err)
211181
require.Equal(t, tc.expectedTelemetryCfg, telemetryCfg)
212-
require.Equal(t, tc.expectedEnabled, telemetryCfg.MetricsEnabled())
213182
})
214183
}
215184
}
@@ -231,10 +200,10 @@ func TestConvertMetricEndpoint(t *testing.T) {
231200
override: "https://override.com",
232201
expected: "https://override.com/v1/metrics",
233202
},
234-
"noErrorWithEmptyEndpoints": {
203+
"errorWithEmptyEndpoints": {
235204
endpoint: "",
236205
override: "",
237-
expected: "",
206+
wantErr: errEmptyEndpoint,
238207
},
239208
"errorWithInvalidURL": {
240209
endpoint: " ",
@@ -252,12 +221,6 @@ func TestConvertMetricEndpoint(t *testing.T) {
252221
return
253222
}
254223

255-
if tc.expected == "" {
256-
require.Nil(t, u)
257-
require.NoError(t, err)
258-
return
259-
}
260-
261224
require.NotNil(t, u)
262225
require.NoError(t, err)
263226
require.Equal(t, tc.expected, u.String())
@@ -277,13 +240,13 @@ func TestConvertMetricFilters(t *testing.T) {
277240
}{
278241
"badFilterRegex": {
279242
filters: []string{"(*LF)"},
280-
expectedRegexString: defaultMetricFilters.String(),
243+
expectedRegexString: DefaultMetricFilters.String(),
281244
matches: []string{"consul.raft.peers", "consul.mem.heap_size"},
282245
wantMatch: true,
283246
},
284247
"emptyRegex": {
285248
filters: []string{},
286-
expectedRegexString: defaultMetricFilters.String(),
249+
expectedRegexString: DefaultMetricFilters.String(),
287250
matches: []string{"consul.raft.peers", "consul.mem.heap_size"},
288251
wantMatch: true,
289252
},

agent/hcp/deps.go

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,19 @@ package hcp
66
import (
77
"context"
88
"fmt"
9-
"time"
109

1110
"github.com/armon/go-metrics"
12-
hcpclient "github.com/hashicorp/consul/agent/hcp/client"
11+
"github.com/hashicorp/go-hclog"
12+
13+
"github.com/hashicorp/consul/agent/hcp/client"
1314
"github.com/hashicorp/consul/agent/hcp/config"
1415
"github.com/hashicorp/consul/agent/hcp/scada"
1516
"github.com/hashicorp/consul/agent/hcp/telemetry"
16-
"github.com/hashicorp/go-hclog"
1717
)
1818

1919
// Deps contains the interfaces that the rest of Consul core depends on for HCP integration.
2020
type Deps struct {
21-
Client hcpclient.Client
21+
Client client.Client
2222
Provider scada.Provider
2323
Sink metrics.MetricSink
2424
}
@@ -27,7 +27,7 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (Deps, error) {
2727
ctx := context.Background()
2828
ctx = hclog.WithContext(ctx, logger)
2929

30-
client, err := hcpclient.NewClient(cfg)
30+
hcpClient, err := client.NewClient(cfg)
3131
if err != nil {
3232
return Deps{}, fmt.Errorf("failed to init client: %w", err)
3333
}
@@ -37,50 +37,33 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (Deps, error) {
3737
return Deps{}, fmt.Errorf("failed to init scada: %w", err)
3838
}
3939

40-
metricsClient, err := hcpclient.NewMetricsClient(ctx, &cfg)
40+
metricsClient, err := client.NewMetricsClient(ctx, &cfg)
4141
if err != nil {
4242
logger.Error("failed to init metrics client", "error", err)
4343
return Deps{}, fmt.Errorf("failed to init metrics client: %w", err)
4444
}
4545

46-
sink, err := sink(ctx, client, metricsClient)
46+
sink, err := sink(ctx, metricsClient, NewHCPProvider(ctx, hcpClient))
4747
if err != nil {
4848
// Do not prevent server start if sink init fails, only log error.
4949
logger.Error("failed to init sink", "error", err)
5050
}
5151

5252
return Deps{
53-
Client: client,
53+
Client: hcpClient,
5454
Provider: provider,
5555
Sink: sink,
5656
}, nil
5757
}
5858

5959
// sink initializes an OTELSink which forwards Consul metrics to HCP.
60-
// The sink is only initialized if the server is registered with the management plane (CCM).
6160
// This step should not block server initialization, errors are returned, only to be logged.
6261
func sink(
6362
ctx context.Context,
64-
hcpClient hcpclient.Client,
6563
metricsClient telemetry.MetricsClient,
64+
cfgProvider *hcpProviderImpl,
6665
) (metrics.MetricSink, error) {
67-
logger := hclog.FromContext(ctx).Named("sink")
68-
reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
69-
defer cancel()
70-
71-
telemetryCfg, err := hcpClient.FetchTelemetryConfig(reqCtx)
72-
if err != nil {
73-
return nil, fmt.Errorf("failed to fetch telemetry config: %w", err)
74-
}
75-
76-
if !telemetryCfg.MetricsEnabled() {
77-
return nil, nil
78-
}
79-
80-
cfgProvider, err := NewHCPProvider(ctx, hcpClient, telemetryCfg)
81-
if err != nil {
82-
return nil, fmt.Errorf("failed to init config provider: %w", err)
83-
}
66+
logger := hclog.FromContext(ctx)
8467

8568
reader := telemetry.NewOTELReader(metricsClient, cfgProvider)
8669
sinkOpts := &telemetry.OTELSinkOpts{
@@ -90,7 +73,7 @@ func sink(
9073

9174
sink, err := telemetry.NewOTELSink(ctx, sinkOpts)
9275
if err != nil {
93-
return nil, fmt.Errorf("failed create OTELSink: %w", err)
76+
return nil, fmt.Errorf("failed to create OTELSink: %w", err)
9477
}
9578

9679
logger.Debug("initialized HCP metrics sink")

agent/hcp/deps_test.go

Lines changed: 5 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,10 @@ package hcp
55

66
import (
77
"context"
8-
"fmt"
9-
"net/url"
10-
"regexp"
118
"testing"
12-
"time"
139

14-
"github.com/stretchr/testify/mock"
1510
"github.com/stretchr/testify/require"
1611

17-
"github.com/hashicorp/consul/agent/hcp/client"
1812
"github.com/hashicorp/consul/agent/hcp/telemetry"
1913
)
2014

@@ -24,79 +18,11 @@ type mockMetricsClient struct {
2418

2519
func TestSink(t *testing.T) {
2620
t.Parallel()
27-
for name, test := range map[string]struct {
28-
expect func(*client.MockClient)
29-
wantErr string
30-
expectedSink bool
31-
}{
32-
"success": {
33-
expect: func(mockClient *client.MockClient) {
34-
u, _ := url.Parse("https://test.com/v1/metrics")
35-
filters, _ := regexp.Compile("test")
36-
mt := mockTelemetryConfig(1*time.Second, u, filters)
37-
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(mt, nil)
38-
},
39-
expectedSink: true,
40-
},
41-
"noSinkWhenFetchTelemetryConfigFails": {
42-
expect: func(mockClient *client.MockClient) {
43-
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(nil, fmt.Errorf("fetch failed"))
44-
},
45-
wantErr: "failed to fetch telemetry config",
46-
},
47-
"noSinkWhenServerNotRegisteredWithCCM": {
48-
expect: func(mockClient *client.MockClient) {
49-
mt := mockTelemetryConfig(1*time.Second, nil, nil)
50-
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(mt, nil)
51-
},
52-
},
53-
"noSinkWhenTelemetryConfigProviderInitFails": {
54-
expect: func(mockClient *client.MockClient) {
55-
u, _ := url.Parse("https://test.com/v1/metrics")
56-
// Bad refresh interval forces ConfigProvider creation failure.
57-
mt := mockTelemetryConfig(0*time.Second, u, nil)
58-
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(mt, nil)
59-
},
60-
wantErr: "failed to init config provider",
61-
},
62-
} {
63-
test := test
64-
t.Run(name, func(t *testing.T) {
65-
t.Parallel()
66-
c := client.NewMockClient(t)
67-
mc := mockMetricsClient{}
6821

69-
test.expect(c)
70-
ctx := context.Background()
22+
ctx, cancel := context.WithCancel(context.Background())
23+
defer cancel()
24+
s, err := sink(ctx, mockMetricsClient{}, &hcpProviderImpl{})
7125

72-
s, err := sink(ctx, c, mc)
73-
74-
if test.wantErr != "" {
75-
require.NotNil(t, err)
76-
require.Contains(t, err.Error(), test.wantErr)
77-
require.Nil(t, s)
78-
return
79-
}
80-
81-
if !test.expectedSink {
82-
require.Nil(t, s)
83-
require.Nil(t, err)
84-
return
85-
}
86-
87-
require.NotNil(t, s)
88-
})
89-
}
90-
}
91-
92-
func mockTelemetryConfig(refreshInterval time.Duration, metricsEndpoint *url.URL, filters *regexp.Regexp) *client.TelemetryConfig {
93-
return &client.TelemetryConfig{
94-
MetricsConfig: &client.MetricsConfig{
95-
Endpoint: metricsEndpoint,
96-
Filters: filters,
97-
},
98-
RefreshConfig: &client.RefreshConfig{
99-
RefreshInterval: refreshInterval,
100-
},
101-
}
26+
require.NotNil(t, s)
27+
require.NoError(t, err)
10228
}

0 commit comments

Comments
 (0)