Skip to content

Commit 828567c

Browse files
authored
[HCP Telemetry] Periodic Refresh for Dynamic Telemetry Configuration (hashicorp#18168)
* OTElExporter now uses an EndpointProvider to discover the endpoint * OTELSink uses a ConfigProvider to obtain filters and labels configuration * improve tests for otel_sink * Regex logic is moved into client for a method on the TelemetryConfig object * Create a telemetry_config_provider and update deps to use it * Fix conversion * fix import newline * Add logger to hcp client and move telemetry_config out of the client.go file * Add a telemetry_config.go to refactor client.go * Update deps * update hcp deps test * Modify telemetry_config_providers * Check for nil filters * PR review updates * Fix comments and move around pieces * Fix comments * Remove context from client struct * Moved ctx out of sink struct and fixed filters, added a test * Remove named imports, use errors.New if not fformatting * Remove HCP dependencies in telemetry package * Add success metric and move lock only to grab the t.cfgHahs * Update hash * fix nits * Create an equals method and add tests * Improve telemetry_config_provider.go tests * Add race test * Add missing godoc * Remove mock for MetricsClient * Avoid goroutine test panics * trying to kick CI lint issues by upgrading mod * imprve test code and add hasher for testing * Use structure logging for filters, fix error constants, and default to allow all regex * removed hashin and modify logic to simplify * Improve race test and fix PR feedback by removing hash equals and avoid testing the timer.Ticker logic, and instead unit test * Ran make go-mod-tidy * Use errtypes in the test * Add changelog * add safety check for exporter endpoint * remove require.Contains by using error types, fix structure logging, and fix success metric typo in exporter * Fixed race test to have changing config values * Send success metric before modifying config * Avoid the defer and move the success metric under
1 parent 2a8bf5d commit 828567c

25 files changed

+1581
-523
lines changed

.changelog/18168.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:improvement
2+
hcp: Add dynamic configuration support for the export of server metrics to HCP.
3+
```

agent/hcp/client/client.go

Lines changed: 6 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -35,21 +35,6 @@ type Client interface {
3535
DiscoverServers(ctx context.Context) ([]string, error)
3636
}
3737

38-
// MetricsConfig holds metrics specific configuration for the TelemetryConfig.
39-
// The endpoint field overrides the TelemetryConfig endpoint.
40-
type MetricsConfig struct {
41-
Filters []string
42-
Endpoint string
43-
}
44-
45-
// TelemetryConfig contains configuration for telemetry data forwarded by Consul servers
46-
// to the HCP Telemetry gateway.
47-
type TelemetryConfig struct {
48-
Endpoint string
49-
Labels map[string]string
50-
MetricsConfig *MetricsConfig
51-
}
52-
5338
type BootstrapConfig struct {
5439
Name string
5540
BootstrapExpect int
@@ -112,10 +97,14 @@ func (c *hcpClient) FetchTelemetryConfig(ctx context.Context) (*TelemetryConfig,
11297

11398
resp, err := c.tgw.AgentTelemetryConfig(params, nil)
11499
if err != nil {
115-
return nil, err
100+
return nil, fmt.Errorf("failed to fetch from HCP: %w", err)
101+
}
102+
103+
if err := validateAgentTelemetryConfigPayload(resp); err != nil {
104+
return nil, fmt.Errorf("invalid response payload: %w", err)
116105
}
117106

118-
return convertTelemetryConfig(resp)
107+
return convertAgentTelemetryResponse(ctx, resp, c.cfg)
119108
}
120109

121110
func (c *hcpClient) FetchBootstrap(ctx context.Context) (*BootstrapConfig, error) {
@@ -272,60 +261,3 @@ func (c *hcpClient) DiscoverServers(ctx context.Context) ([]string, error) {
272261

273262
return servers, nil
274263
}
275-
276-
// convertTelemetryConfig validates the AgentTelemetryConfig payload and converts it into a TelemetryConfig object.
277-
func convertTelemetryConfig(resp *hcptelemetry.AgentTelemetryConfigOK) (*TelemetryConfig, error) {
278-
if resp.Payload == nil {
279-
return nil, fmt.Errorf("missing payload")
280-
}
281-
282-
if resp.Payload.TelemetryConfig == nil {
283-
return nil, fmt.Errorf("missing telemetry config")
284-
}
285-
286-
payloadConfig := resp.Payload.TelemetryConfig
287-
var metricsConfig MetricsConfig
288-
if payloadConfig.Metrics != nil {
289-
metricsConfig.Endpoint = payloadConfig.Metrics.Endpoint
290-
metricsConfig.Filters = payloadConfig.Metrics.IncludeList
291-
}
292-
return &TelemetryConfig{
293-
Endpoint: payloadConfig.Endpoint,
294-
Labels: payloadConfig.Labels,
295-
MetricsConfig: &metricsConfig,
296-
}, nil
297-
}
298-
299-
// Enabled verifies if telemetry is enabled by ensuring a valid endpoint has been retrieved.
300-
// It returns full metrics endpoint and true if a valid endpoint was obtained.
301-
func (t *TelemetryConfig) Enabled() (string, bool) {
302-
endpoint := t.Endpoint
303-
if override := t.MetricsConfig.Endpoint; override != "" {
304-
endpoint = override
305-
}
306-
307-
if endpoint == "" {
308-
return "", false
309-
}
310-
311-
// The endpoint from Telemetry Gateway is a domain without scheme, and without the metrics path, so they must be added.
312-
return endpoint + metricsGatewayPath, true
313-
}
314-
315-
// DefaultLabels returns a set of <key, value> string pairs that must be added as attributes to all exported telemetry data.
316-
func (t *TelemetryConfig) DefaultLabels(cfg config.CloudConfig) map[string]string {
317-
labels := make(map[string]string)
318-
nodeID := string(cfg.NodeID)
319-
if nodeID != "" {
320-
labels["node_id"] = nodeID
321-
}
322-
if cfg.NodeName != "" {
323-
labels["node_name"] = cfg.NodeName
324-
}
325-
326-
for k, v := range t.Labels {
327-
labels[k] = v
328-
}
329-
330-
return labels
331-
}

agent/hcp/client/client_test.go

Lines changed: 81 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -2,200 +2,122 @@ package client
22

33
import (
44
"context"
5+
"fmt"
6+
"net/url"
7+
"regexp"
58
"testing"
9+
"time"
610

7-
"github.com/hashicorp/consul/agent/hcp/config"
8-
"github.com/hashicorp/consul/types"
9-
"github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/client/consul_telemetry_service"
11+
"github.com/go-openapi/runtime"
12+
hcptelemetry "github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/client/consul_telemetry_service"
1013
"github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/models"
11-
"github.com/stretchr/testify/mock"
1214
"github.com/stretchr/testify/require"
1315
)
1416

15-
func TestFetchTelemetryConfig(t *testing.T) {
16-
t.Parallel()
17-
for name, test := range map[string]struct {
18-
metricsEndpoint string
19-
expect func(*MockClient)
20-
disabled bool
21-
}{
22-
"success": {
23-
expect: func(mockClient *MockClient) {
24-
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{
25-
Endpoint: "https://test.com",
26-
MetricsConfig: &MetricsConfig{
27-
Endpoint: "",
28-
},
29-
}, nil)
30-
},
31-
metricsEndpoint: "https://test.com/v1/metrics",
32-
},
33-
"overrideMetricsEndpoint": {
34-
expect: func(mockClient *MockClient) {
35-
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{
36-
Endpoint: "https://test.com",
37-
MetricsConfig: &MetricsConfig{
38-
Endpoint: "https://test.com",
39-
},
40-
}, nil)
41-
},
42-
metricsEndpoint: "https://test.com/v1/metrics",
43-
},
44-
"disabledWithEmptyEndpoint": {
45-
expect: func(mockClient *MockClient) {
46-
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{
47-
Endpoint: "",
48-
MetricsConfig: &MetricsConfig{
49-
Endpoint: "",
50-
},
51-
}, nil)
52-
},
53-
disabled: true,
54-
},
55-
} {
56-
test := test
57-
t.Run(name, func(t *testing.T) {
58-
t.Parallel()
59-
60-
mock := NewMockClient(t)
61-
test.expect(mock)
62-
63-
telemetryCfg, err := mock.FetchTelemetryConfig(context.Background())
64-
require.NoError(t, err)
65-
66-
if test.disabled {
67-
endpoint, ok := telemetryCfg.Enabled()
68-
require.False(t, ok)
69-
require.Empty(t, endpoint)
70-
return
71-
}
17+
type mockTGW struct {
18+
mockResponse *hcptelemetry.AgentTelemetryConfigOK
19+
mockError error
20+
}
7221

73-
endpoint, ok := telemetryCfg.Enabled()
22+
func (m *mockTGW) AgentTelemetryConfig(params *hcptelemetry.AgentTelemetryConfigParams, authInfo runtime.ClientAuthInfoWriter, opts ...hcptelemetry.ClientOption) (*hcptelemetry.AgentTelemetryConfigOK, error) {
23+
return m.mockResponse, m.mockError
24+
}
25+
func (m *mockTGW) GetLabelValues(params *hcptelemetry.GetLabelValuesParams, authInfo runtime.ClientAuthInfoWriter, opts ...hcptelemetry.ClientOption) (*hcptelemetry.GetLabelValuesOK, error) {
26+
return hcptelemetry.NewGetLabelValuesOK(), nil
27+
}
28+
func (m *mockTGW) QueryRangeBatch(params *hcptelemetry.QueryRangeBatchParams, authInfo runtime.ClientAuthInfoWriter, opts ...hcptelemetry.ClientOption) (*hcptelemetry.QueryRangeBatchOK, error) {
29+
return hcptelemetry.NewQueryRangeBatchOK(), nil
30+
}
31+
func (m *mockTGW) SetTransport(transport runtime.ClientTransport) {}
7432

75-
require.True(t, ok)
76-
require.Equal(t, test.metricsEndpoint, endpoint)
77-
})
78-
}
33+
type expectedTelemetryCfg struct {
34+
endpoint string
35+
labels map[string]string
36+
filters string
37+
refreshInterval time.Duration
7938
}
8039

81-
func TestConvertTelemetryConfig(t *testing.T) {
40+
func TestFetchTelemetryConfig(t *testing.T) {
8241
t.Parallel()
83-
for name, test := range map[string]struct {
84-
resp *consul_telemetry_service.AgentTelemetryConfigOK
85-
expectedTelemetryCfg *TelemetryConfig
86-
wantErr string
42+
for name, tc := range map[string]struct {
43+
mockResponse *hcptelemetry.AgentTelemetryConfigOK
44+
mockError error
45+
wantErr string
46+
expected *expectedTelemetryCfg
8747
}{
88-
"success": {
89-
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
90-
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{
91-
TelemetryConfig: &models.HashicorpCloudConsulTelemetry20230414TelemetryConfig{
92-
Endpoint: "https://test.com",
93-
Labels: map[string]string{"test": "test"},
94-
},
95-
},
96-
},
97-
expectedTelemetryCfg: &TelemetryConfig{
98-
Endpoint: "https://test.com",
99-
Labels: map[string]string{"test": "test"},
100-
MetricsConfig: &MetricsConfig{},
48+
"errorsWithFetchFailure": {
49+
mockError: fmt.Errorf("failed to fetch from HCP"),
50+
mockResponse: nil,
51+
wantErr: "failed to fetch from HCP",
52+
},
53+
"errorsWithInvalidPayload": {
54+
mockResponse: &hcptelemetry.AgentTelemetryConfigOK{
55+
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{},
10156
},
57+
mockError: nil,
58+
wantErr: "invalid response payload",
10259
},
103-
"successWithMetricsConfig": {
104-
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
60+
"success:": {
61+
mockResponse: &hcptelemetry.AgentTelemetryConfigOK{
10562
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{
63+
RefreshConfig: &models.HashicorpCloudConsulTelemetry20230414RefreshConfig{
64+
RefreshInterval: "1s",
65+
},
10666
TelemetryConfig: &models.HashicorpCloudConsulTelemetry20230414TelemetryConfig{
10767
Endpoint: "https://test.com",
108-
Labels: map[string]string{"test": "test"},
68+
Labels: map[string]string{"test": "123"},
10969
Metrics: &models.HashicorpCloudConsulTelemetry20230414TelemetryMetricsConfig{
110-
Endpoint: "https://metrics-test.com",
111-
IncludeList: []string{"consul.raft.apply"},
70+
IncludeList: []string{"consul", "test"},
11271
},
11372
},
11473
},
11574
},
116-
expectedTelemetryCfg: &TelemetryConfig{
117-
Endpoint: "https://test.com",
118-
Labels: map[string]string{"test": "test"},
119-
MetricsConfig: &MetricsConfig{
120-
Endpoint: "https://metrics-test.com",
121-
Filters: []string{"consul.raft.apply"},
122-
},
123-
},
124-
},
125-
"errorsWithNilPayload": {
126-
resp: &consul_telemetry_service.AgentTelemetryConfigOK{},
127-
wantErr: "missing payload",
128-
},
129-
"errorsWithNilTelemetryConfig": {
130-
resp: &consul_telemetry_service.AgentTelemetryConfigOK{
131-
Payload: &models.HashicorpCloudConsulTelemetry20230414AgentTelemetryConfigResponse{},
75+
expected: &expectedTelemetryCfg{
76+
endpoint: "https://test.com/v1/metrics",
77+
labels: map[string]string{"test": "123"},
78+
filters: "consul|test",
79+
refreshInterval: 1 * time.Second,
13280
},
133-
wantErr: "missing telemetry config",
13481
},
13582
} {
136-
test := test
83+
tc := tc
13784
t.Run(name, func(t *testing.T) {
13885
t.Parallel()
139-
telemetryCfg, err := convertTelemetryConfig(test.resp)
140-
if test.wantErr != "" {
86+
c := &hcpClient{
87+
tgw: &mockTGW{
88+
mockError: tc.mockError,
89+
mockResponse: tc.mockResponse,
90+
},
91+
}
92+
93+
telemetryCfg, err := c.FetchTelemetryConfig(context.Background())
94+
95+
if tc.wantErr != "" {
14196
require.Error(t, err)
97+
require.Contains(t, err.Error(), tc.wantErr)
14298
require.Nil(t, telemetryCfg)
143-
require.Contains(t, err.Error(), test.wantErr)
14499
return
145100
}
146101

102+
urlEndpoint, err := url.Parse(tc.expected.endpoint)
147103
require.NoError(t, err)
148-
require.Equal(t, test.expectedTelemetryCfg, telemetryCfg)
149-
})
150-
}
151-
}
152104

153-
func Test_DefaultLabels(t *testing.T) {
154-
for name, tc := range map[string]struct {
155-
cfg config.CloudConfig
156-
expectedLabels map[string]string
157-
}{
158-
"Success": {
159-
cfg: config.CloudConfig{
160-
NodeID: types.NodeID("nodeyid"),
161-
NodeName: "nodey",
162-
},
163-
expectedLabels: map[string]string{
164-
"node_id": "nodeyid",
165-
"node_name": "nodey",
166-
},
167-
},
105+
regexFilters, err := regexp.Compile(tc.expected.filters)
106+
require.NoError(t, err)
168107

169-
"NoNodeID": {
170-
cfg: config.CloudConfig{
171-
NodeID: types.NodeID(""),
172-
NodeName: "nodey",
173-
},
174-
expectedLabels: map[string]string{
175-
"node_name": "nodey",
176-
},
177-
},
178-
"NoNodeName": {
179-
cfg: config.CloudConfig{
180-
NodeID: types.NodeID("nodeyid"),
181-
NodeName: "",
182-
},
183-
expectedLabels: map[string]string{
184-
"node_id": "nodeyid",
185-
},
186-
},
187-
"Empty": {
188-
cfg: config.CloudConfig{
189-
NodeID: "",
190-
NodeName: "",
191-
},
192-
expectedLabels: map[string]string{},
193-
},
194-
} {
195-
t.Run(name, func(t *testing.T) {
196-
tCfg := &TelemetryConfig{}
197-
labels := tCfg.DefaultLabels(tc.cfg)
198-
require.Equal(t, labels, tc.expectedLabels)
108+
expectedCfg := &TelemetryConfig{
109+
MetricsConfig: &MetricsConfig{
110+
Endpoint: urlEndpoint,
111+
Filters: regexFilters,
112+
Labels: tc.expected.labels,
113+
},
114+
RefreshConfig: &RefreshConfig{
115+
RefreshInterval: tc.expected.refreshInterval,
116+
},
117+
}
118+
119+
require.NoError(t, err)
120+
require.Equal(t, expectedCfg, telemetryCfg)
199121
})
200122
}
201123
}

0 commit comments

Comments
 (0)