Skip to content

Commit e6c6e57

Browse files
committed
Add telemetry for dropped data due to exporter sending queue overflow
This change adds internal metrics for dropped spans, metric points and log records when exporter sending queue is full: - exporter/enqueue_failed_metric_points - exporter/enqueue_failed_spans - exporter/enqueue_failed_log_records
1 parent 917be66 commit e6c6e57

File tree

14 files changed

+184
-20
lines changed

14 files changed

+184
-20
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
## 💡 Enhancements 💡
1414

1515
- Add `doc.go` files to the consumer package and its subpackages (#3270)
16+
- Add telemetry for dropped data due to exporter sending queue overflow (#3328)
1617

1718
## v0.27.0 Beta
1819

exporter/exporterhelper/common.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ import (
2323
"go.opentelemetry.io/collector/component"
2424
"go.opentelemetry.io/collector/component/componenthelper"
2525
"go.opentelemetry.io/collector/config"
26+
"go.opentelemetry.io/collector/config/configtelemetry"
2627
"go.opentelemetry.io/collector/consumer"
2728
"go.opentelemetry.io/collector/consumer/consumerhelper"
29+
"go.opentelemetry.io/collector/obsreport"
2830
)
2931

3032
// TimeoutSettings for timeout. The timeout applies to individual attempts to send data to the backend.
@@ -164,6 +166,7 @@ func WithResourceToTelemetryConversion(resourceToTelemetrySettings ResourceToTel
164166
// baseExporter contains common fields between different exporter types.
165167
type baseExporter struct {
166168
component.Component
169+
obsrep *obsreport.Exporter
167170
sender requestSender
168171
qrSender *queuedRetrySender
169172
}
@@ -173,6 +176,10 @@ func newBaseExporter(cfg config.Exporter, logger *zap.Logger, bs *baseSettings)
173176
Component: componenthelper.New(bs.componentOptions...),
174177
}
175178

179+
be.obsrep = obsreport.NewExporter(obsreport.ExporterSettings{
180+
Level: configtelemetry.GetMetricsLevelFlagValue(),
181+
ExporterID: cfg.ID(),
182+
})
176183
be.qrSender = newQueuedRetrySender(cfg.ID().String(), bs.QueueSettings, bs.RetrySettings, &timeoutSender{cfg: bs.TimeoutSettings}, logger)
177184
be.sender = be.qrSender
178185

exporter/exporterhelper/logs.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ package exporterhelper
1616

1717
import (
1818
"context"
19+
"errors"
1920

2021
"go.uber.org/zap"
2122

2223
"go.opentelemetry.io/collector/component"
2324
"go.opentelemetry.io/collector/config"
24-
"go.opentelemetry.io/collector/config/configtelemetry"
2525
"go.opentelemetry.io/collector/consumer"
2626
"go.opentelemetry.io/collector/consumer/consumererror"
2727
"go.opentelemetry.io/collector/consumer/consumerhelper"
@@ -87,16 +87,18 @@ func NewLogsExporter(
8787
be := newBaseExporter(cfg, logger, bs)
8888
be.wrapConsumerSender(func(nextSender requestSender) requestSender {
8989
return &logsExporterWithObservability{
90-
obsrep: obsreport.NewExporter(obsreport.ExporterSettings{
91-
Level: configtelemetry.GetMetricsLevelFlagValue(),
92-
ExporterID: cfg.ID(),
93-
}),
90+
obsrep: be.obsrep,
9491
nextSender: nextSender,
9592
}
9693
})
9794

9895
lc, err := consumerhelper.NewLogs(func(ctx context.Context, ld pdata.Logs) error {
99-
return be.sender.send(newLogsRequest(ctx, ld, pusher))
96+
req := newLogsRequest(ctx, ld, pusher)
97+
err := be.sender.send(req)
98+
if errors.Is(err, errSendingQueueIsFull) {
99+
be.obsrep.RecordLogsEnqueueFailure(req.context(), req.count())
100+
}
101+
return err
100102
}, bs.consumerOptions...)
101103

102104
return &logsExporter{

exporter/exporterhelper/logs_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,30 @@ func TestLogsExporter_WithRecordLogs_ReturnError(t *testing.T) {
121121
checkRecordedMetricsForLogsExporter(t, le, want)
122122
}
123123

124+
func TestLogsExporter_WithRecordEnqueueFailedMetrics(t *testing.T) {
125+
doneFn, err := obsreporttest.SetupRecordedMetricsTest()
126+
require.NoError(t, err)
127+
defer doneFn()
128+
129+
rCfg := DefaultRetrySettings()
130+
qCfg := DefaultQueueSettings()
131+
qCfg.NumConsumers = 1
132+
qCfg.QueueSize = 2
133+
wantErr := errors.New("some-error")
134+
te, err := NewLogsExporter(&fakeLogsExporterConfig, zap.NewNop(), newPushLogsData(wantErr), WithRetry(rCfg), WithQueue(qCfg))
135+
require.NoError(t, err)
136+
require.NotNil(t, te)
137+
138+
md := testdata.GenerateLogsTwoLogRecordsSameResourceOneDifferent()
139+
const numBatches = 7
140+
for i := 0; i < numBatches; i++ {
141+
te.ConsumeLogs(context.Background(), md)
142+
}
143+
144+
// 2 batched must be in queue, and 5 batches (15 log records) rejected due to queue overflow
145+
obsreporttest.CheckExporterEnqueueFailedLogs(t, fakeLogsExporterName, int64(15))
146+
}
147+
124148
func TestLogsExporter_WithSpan(t *testing.T) {
125149
le, err := NewLogsExporter(&fakeLogsExporterConfig, zap.NewNop(), newPushLogsData(nil))
126150
require.Nil(t, err)

exporter/exporterhelper/metrics.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ package exporterhelper
1616

1717
import (
1818
"context"
19+
"errors"
1920

2021
"go.uber.org/zap"
2122

2223
"go.opentelemetry.io/collector/component"
2324
"go.opentelemetry.io/collector/config"
24-
"go.opentelemetry.io/collector/config/configtelemetry"
2525
"go.opentelemetry.io/collector/consumer"
2626
"go.opentelemetry.io/collector/consumer/consumererror"
2727
"go.opentelemetry.io/collector/consumer/consumerhelper"
@@ -88,10 +88,7 @@ func NewMetricsExporter(
8888
be := newBaseExporter(cfg, logger, bs)
8989
be.wrapConsumerSender(func(nextSender requestSender) requestSender {
9090
return &metricsSenderWithObservability{
91-
obsrep: obsreport.NewExporter(obsreport.ExporterSettings{
92-
Level: configtelemetry.GetMetricsLevelFlagValue(),
93-
ExporterID: cfg.ID(),
94-
}),
91+
obsrep: be.obsrep,
9592
nextSender: nextSender,
9693
}
9794
})
@@ -100,7 +97,12 @@ func NewMetricsExporter(
10097
if bs.ResourceToTelemetrySettings.Enabled {
10198
md = convertResourceToLabels(md)
10299
}
103-
return be.sender.send(newMetricsRequest(ctx, md, pusher))
100+
req := newMetricsRequest(ctx, md, pusher)
101+
err := be.sender.send(req)
102+
if errors.Is(err, errSendingQueueIsFull) {
103+
be.obsrep.RecordMetricsEnqueueFailure(req.context(), req.count())
104+
}
105+
return err
104106
}, bs.consumerOptions...)
105107

106108
return &metricsExporter{

exporter/exporterhelper/metrics_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,30 @@ func TestMetricsExporter_WithRecordMetrics_ReturnError(t *testing.T) {
120120
checkRecordedMetricsForMetricsExporter(t, me, want)
121121
}
122122

123+
func TestMetricsExporter_WithRecordEnqueueFailedMetrics(t *testing.T) {
124+
doneFn, err := obsreporttest.SetupRecordedMetricsTest()
125+
require.NoError(t, err)
126+
defer doneFn()
127+
128+
rCfg := DefaultRetrySettings()
129+
qCfg := DefaultQueueSettings()
130+
qCfg.NumConsumers = 1
131+
qCfg.QueueSize = 2
132+
wantErr := errors.New("some-error")
133+
te, err := NewMetricsExporter(&fakeMetricsExporterConfig, zap.NewNop(), newPushMetricsData(wantErr), WithRetry(rCfg), WithQueue(qCfg))
134+
require.NoError(t, err)
135+
require.NotNil(t, te)
136+
137+
md := testdata.GenerateMetricsOneMetricOneDataPoint()
138+
const numBatches = 7
139+
for i := 0; i < numBatches; i++ {
140+
te.ConsumeMetrics(context.Background(), md)
141+
}
142+
143+
// 2 batched must be in queue, and 5 metric points rejected due to queue overflow
144+
obsreporttest.CheckExporterEnqueueFailedMetrics(t, fakeMetricsExporterName, int64(5))
145+
}
146+
123147
func TestMetricsExporter_WithSpan(t *testing.T) {
124148
me, err := NewMetricsExporter(&fakeMetricsExporterConfig, zap.NewNop(), newPushMetricsData(nil))
125149
require.NoError(t, err)

exporter/exporterhelper/queued_retry.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ var (
4141
metric.WithDescription("Current size of the retry queue (in batches)"),
4242
metric.WithLabelKeys(obsmetrics.ExporterKey),
4343
metric.WithUnit(metricdata.UnitDimensionless))
44+
45+
errSendingQueueIsFull = errors.New("sending_queue is full")
4446
)
4547

4648
func init() {
@@ -189,7 +191,7 @@ func (qrs *queuedRetrySender) send(req request) error {
189191
zap.Int("dropped_items", req.count()),
190192
)
191193
span.Annotate(qrs.traceAttributes, "Dropped item, sending_queue is full.")
192-
return errors.New("sending_queue is full")
194+
return errSendingQueueIsFull
193195
}
194196

195197
span.Annotate(qrs.traceAttributes, "Enqueued item.")

exporter/exporterhelper/traces.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ package exporterhelper
1616

1717
import (
1818
"context"
19+
"errors"
1920

2021
"go.uber.org/zap"
2122

2223
"go.opentelemetry.io/collector/component"
2324
"go.opentelemetry.io/collector/config"
24-
"go.opentelemetry.io/collector/config/configtelemetry"
2525
"go.opentelemetry.io/collector/consumer"
2626
"go.opentelemetry.io/collector/consumer/consumererror"
2727
"go.opentelemetry.io/collector/consumer/consumerhelper"
@@ -88,17 +88,18 @@ func NewTracesExporter(
8888
be := newBaseExporter(cfg, logger, bs)
8989
be.wrapConsumerSender(func(nextSender requestSender) requestSender {
9090
return &tracesExporterWithObservability{
91-
obsrep: obsreport.NewExporter(
92-
obsreport.ExporterSettings{
93-
Level: configtelemetry.GetMetricsLevelFlagValue(),
94-
ExporterID: cfg.ID(),
95-
}),
91+
obsrep: be.obsrep,
9692
nextSender: nextSender,
9793
}
9894
})
9995

10096
tc, err := consumerhelper.NewTraces(func(ctx context.Context, td pdata.Traces) error {
101-
return be.sender.send(newTracesRequest(ctx, td, pusher))
97+
req := newTracesRequest(ctx, td, pusher)
98+
err := be.sender.send(req)
99+
if errors.Is(err, errSendingQueueIsFull) {
100+
be.obsrep.RecordTracesEnqueueFailure(req.context(), req.count())
101+
}
102+
return err
102103
}, bs.consumerOptions...)
103104

104105
return &traceExporter{

exporter/exporterhelper/traces_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,30 @@ func TestTracesExporter_WithRecordMetrics_ReturnError(t *testing.T) {
131131
checkRecordedMetricsForTracesExporter(t, te, want)
132132
}
133133

134+
func TestTracesExporter_WithRecordEnqueueFailedMetrics(t *testing.T) {
135+
doneFn, err := obsreporttest.SetupRecordedMetricsTest()
136+
require.NoError(t, err)
137+
defer doneFn()
138+
139+
rCfg := DefaultRetrySettings()
140+
qCfg := DefaultQueueSettings()
141+
qCfg.NumConsumers = 1
142+
qCfg.QueueSize = 2
143+
wantErr := errors.New("some-error")
144+
te, err := NewTracesExporter(&fakeTracesExporterConfig, zap.NewNop(), newTraceDataPusher(wantErr), WithRetry(rCfg), WithQueue(qCfg))
145+
require.NoError(t, err)
146+
require.NotNil(t, te)
147+
148+
td := testdata.GenerateTracesTwoSpansSameResource()
149+
const numBatches = 7
150+
for i := 0; i < numBatches; i++ {
151+
te.ConsumeTraces(context.Background(), td)
152+
}
153+
154+
// 2 batched must be in queue, and 5 batches (10 spans) rejected due to queue overflow
155+
obsreporttest.CheckExporterEnqueueFailedTraces(t, fakeTracesExporterName, int64(10))
156+
}
157+
134158
func TestTracesExporter_WithSpan(t *testing.T) {
135159
te, err := NewTracesExporter(&fakeTracesExporterConfig, zap.NewNop(), newTraceDataPusher(nil))
136160
require.NoError(t, err)

internal/obsreportconfig/obsmetrics/obs_exporter.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,22 @@ const (
2727
SentSpansKey = "sent_spans"
2828
// FailedToSendSpansKey used to track spans that failed to be sent by exporters.
2929
FailedToSendSpansKey = "send_failed_spans"
30+
// FailedToEnqueueSpansKey used to track spans that failed to be added to the sending queue.
31+
FailedToEnqueueSpansKey = "enqueue_failed_spans"
3032

3133
// SentMetricPointsKey used to track metric points sent by exporters.
3234
SentMetricPointsKey = "sent_metric_points"
3335
// FailedToSendMetricPointsKey used to track metric points that failed to be sent by exporters.
3436
FailedToSendMetricPointsKey = "send_failed_metric_points"
37+
// FailedToEnqueueMetricPointsKey used to track metric points that failed to be added to the sending queue.
38+
FailedToEnqueueMetricPointsKey = "enqueue_failed_metric_points"
3539

3640
// SentLogRecordsKey used to track logs sent by exporters.
3741
SentLogRecordsKey = "sent_log_records"
3842
// FailedToSendLogRecordsKey used to track logs that failed to be sent by exporters.
3943
FailedToSendLogRecordsKey = "send_failed_log_records"
44+
// FailedToEnqueueLogRecordsKey used to track logs records that failed to be added to the sending queue.
45+
FailedToEnqueueLogRecordsKey = "enqueue_failed_log_records"
4046
)
4147

4248
var (
@@ -60,6 +66,10 @@ var (
6066
ExporterPrefix+FailedToSendSpansKey,
6167
"Number of spans in failed attempts to send to destination.",
6268
stats.UnitDimensionless)
69+
ExporterFailedToEnqueueSpans = stats.Int64(
70+
ExporterPrefix+FailedToEnqueueSpansKey,
71+
"Number of spans failed to be added to the sending queue.",
72+
stats.UnitDimensionless)
6373
ExporterSentMetricPoints = stats.Int64(
6474
ExporterPrefix+SentMetricPointsKey,
6575
"Number of metric points successfully sent to destination.",
@@ -68,6 +78,10 @@ var (
6878
ExporterPrefix+FailedToSendMetricPointsKey,
6979
"Number of metric points in failed attempts to send to destination.",
7080
stats.UnitDimensionless)
81+
ExporterFailedToEnqueueMetricPoints = stats.Int64(
82+
ExporterPrefix+FailedToEnqueueMetricPointsKey,
83+
"Number of metric points failed to be added to the sending queue.",
84+
stats.UnitDimensionless)
7185
ExporterSentLogRecords = stats.Int64(
7286
ExporterPrefix+SentLogRecordsKey,
7387
"Number of log record successfully sent to destination.",
@@ -76,4 +90,8 @@ var (
7690
ExporterPrefix+FailedToSendLogRecordsKey,
7791
"Number of log records in failed attempts to send to destination.",
7892
stats.UnitDimensionless)
93+
ExporterFailedToEnqueueLogRecords = stats.Int64(
94+
ExporterPrefix+FailedToEnqueueLogRecordsKey,
95+
"Number of log records failed to be added to the sending queue.",
96+
stats.UnitDimensionless)
7997
)

0 commit comments

Comments
 (0)