Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .chloggen/kafkaexporter-connection-idle-timeout.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
component: exporter/kafka

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `connection_idle_timeout` configuration option to control idle connection timeout.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [45321]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
Defaults to 4 minutes. It may take up to 2x the configured time before a connection is actually closed.
This setting is applicable for franz-go, while ignored in sarama because it does not close idle connections.

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
29 changes: 29 additions & 0 deletions .chloggen/kafkareceiver-connection-idle-timeout.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
component: receiver/kafka

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `connection_idle_timeout` configuration option to control idle connection timeout.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [45321]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
Defaults to 4 minutes. It may take up to 2x the configured time before a connection is actually closed.
This setting is applicable for franz-go, while ignored in sarama because it does not close idle connections.

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
1 change: 1 addition & 0 deletions exporter/kafkaexporter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ The following settings can be optionally configured:
- `protocol_version` (default = 2.1.0): Kafka protocol version.
- `resolve_canonical_bootstrap_servers_only` (default = false): Whether to resolve then reverse-lookup broker IPs during startup.
- `client_id` (default = "otel-collector"): The client ID to configure the Kafka client with. The client ID will be used for all produce requests.
- `connection_idle_timeout` (default = `4m`): The time after which idle connections to Kafka brokers are closed. Note: It may take up to 2x the configured time before a connection is actually closed. This setting is applicable for franz-go, while ignored in sarama because it does not close idle connections.
- `logs`
- `topic` (default = otlp\_logs): The name of the Kafka topic to which logs will be exported.
- `encoding` (default = otlp\_proto): The encoding for logs. See [Supported encodings](#supported-encodings).
Expand Down
4 changes: 4 additions & 0 deletions internal/kafka/franz_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ func commonOpts(ctx context.Context, clientCfg configkafka.ClientConfig,
if clientCfg.Metadata.RefreshInterval > 0 {
opts = append(opts, kgo.MetadataMaxAge(clientCfg.Metadata.RefreshInterval))
}
// Configure connection idle timeout
if clientCfg.ConnectionIdleTimeout > 0 {
opts = append(opts, kgo.ConnIdleTimeout(clientCfg.ConnectionIdleTimeout))
}
// Configure the min/max protocol version if provided
if clientCfg.ProtocolVersion != "" {
keyVersions := make(map[string]any)
Expand Down
18 changes: 14 additions & 4 deletions pkg/kafka/configkafka/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,21 @@ type ClientConfig struct {
//
// NOTE: this is experimental and may be removed in a future release.
UseLeaderEpoch bool `mapstructure:"use_leader_epoch"`

// ConnectionIdleTimeout specifies the time after which idle connections are closed.
//
// Note: It may take up to 2x the configured time before a connection is actually closed.
// This setting is applicable for franz-go, while ignored in sarama because it does not close idle connections.
ConnectionIdleTimeout time.Duration `mapstructure:"connection_idle_timeout"`
}

func NewDefaultClientConfig() ClientConfig {
return ClientConfig{
Brokers: []string{"localhost:9092"},
ClientID: "otel-collector",
Metadata: NewDefaultMetadataConfig(),
UseLeaderEpoch: true,
Brokers: []string{"localhost:9092"},
ClientID: "otel-collector",
Metadata: NewDefaultMetadataConfig(),
UseLeaderEpoch: true,
ConnectionIdleTimeout: 4 * time.Minute,
}
}

Expand All @@ -96,6 +103,9 @@ func (c ClientConfig) Validate() error {
return fmt.Errorf("invalid protocol version: %w", err)
}
}
if c.ConnectionIdleTimeout <= 0 {
return fmt.Errorf("connection_idle_timeout (%s) must be positive", c.ConnectionIdleTimeout)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we check that it's >= 100ms?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, nice catch. However, I don't think ClientConfig.Validate is called anywhere at all. In that case,

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line serves as a sanity check at otel level. It is here for consistency, not for function, because it isn't called.

It should be called by confmap. Did you observe that not to be the case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming a diff

diff --git a/receiver/kafkareceiver/config_test.go b/receiver/kafkareceiver/config_test.go
index 87dc786cd5..27dbe66acd 100644
--- a/receiver/kafkareceiver/config_test.go
+++ b/receiver/kafkareceiver/config_test.go
@@ -471,6 +471,15 @@ func TestConfigValidate(t *testing.T) {
 			},
 			expectedErr: "profiles.exclude_topics contains empty string",
 		},
+		{
+			name: "invalid config with negative connection idle timeout",
+			config: &Config{
+				ClientConfig: configkafka.ClientConfig{
+					ConnectionIdleTimeout: -1,
+				},
+			},
+			expectedErr: "connection_idle_timeout (-1) must be positive",
+		},
 	}
 
 	for _, tt := range tests {

output

=== RUN   TestConfigValidate
=== RUN   TestConfigValidate/invalid_config_with_negative_connection_idle_timeout
    config_test.go:496: 
        	Error Trace:	/home/carson/projects/opentelemetry-collector-contrib/receiver/kafkareceiver/config_test.go:496
        	Error:      	An error is expected but got nil.
        	Test:       	TestConfigValidate/invalid_config_with_negative_connection_idle_timeout
--- FAIL: TestConfigValidate/invalid_config_with_negative_connection_idle_timeout (0.00s)

--- FAIL: TestConfigValidate (0.00s)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm slightly inclined to delete this conn idle timeout validation in configkafka and to 100% rely on franz-go validation in #46024 . wdyt?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the test isn't failing because we're calling Config.Validate, which (intentionally) doesn't call the Validate method of fields. IIANM we should use xconfmap.Validate instead.

As for relying on franz-go for validation... I don't know, but my instinct is that we should validate in configkafka, and only ever pass through valid options to franz-go.

Copy link
Contributor Author

@carsonip carsonip Feb 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIL config.Validate vs xconfmap.Validate, thanks for the pointer. I'll update the tests to use xconfmap.Validate in a separate PR because it requires fixing existing tests if that's desired.

As for relying on franz-go for validation... I don't know, but my instinct is that we should validate in configkafka, and only ever pass through valid options to franz-go.

There are a few options:

  • A: no check in config.Validate & rely on franz-go validation
  • B: fully align configkafka validation and franz-go validation values. But it also means when franz-go validation rules changes, it requires a corresponding change in configkafka
  • C: middle ground between A and B. configkafka does basic sanity check to reject clearly invalid values, assuming this is also written for other kafka clients e.g. sarama. Then rely on franz-go for actual min/max checks.

The drawback of B is demonstrated in twmb/franz-go#1245 where we changed an arbitrary min conn idle timeout from 1s to 100ms. It is not hard to imagine these minor tweaks to happen over time.

With C it implies inconsistent error message if configured conn idle timeout is 0ms vs 1ms, if configkafka rejects 0ms and franz-go rejects >0ms and <100ms.

A helps us avoid the maintenance with no apparent loss in user experience.

The PR is currently doing B which is also fine IMO

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer option B, though we don't necessarily have to make the exactly the same; the collector could also be more strict (but not less).

This would align well with open-telemetry/opentelemetry-collector#14543

}
return nil
}

Expand Down
7 changes: 7 additions & 0 deletions pkg/kafka/configkafka/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ func TestClientConfig(t *testing.T) {
return cfg
}(),
},
"conn_idle_timeout": {
expected: func() ClientConfig {
cfg := NewDefaultClientConfig()
cfg.ConnIdleTimeout = 5 * time.Minute
return cfg
}(),
},

// Invalid configurations
"brokers_required": {
Expand Down
3 changes: 3 additions & 0 deletions pkg/kafka/configkafka/testdata/client_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,6 @@ kafka/foo:

kafka/not_use_leader_epoch:
use_leader_epoch: false

kafka/connection_idle_timeout:
connection_idle_timeout: 5m
1 change: 1 addition & 0 deletions receiver/kafkareceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ The following settings can be optionally configured:
- `client_id` (default = otel-collector): The consumer client ID that receiver will use
- `rack_id` (default = ""): The rack identifier for this client. When set and brokers are configured with a rack-aware replica selector, the client will prefer fetching from the closest replica.
- `use_leader_epoch` (default = true): (Experimental) When enabled, the consumer uses the leader epoch returned by brokers (KIP-320) to detect log truncation. Setting this to false clears the leader epoch from fetch offsets, disabling KIP-320. Disabling can improve compatibility with brokers that don’t fully support leader epochs (e.g., Azure Event Hubs), at the cost of losing automatic log-truncation safety.
- `connection_idle_timeout` (default = `4m`): The time after which idle connections to Kafka brokers are closed. Note: It may take up to 2x the configured time before a connection is actually closed. This setting is applicable for franz-go, while ignored in sarama because it does not close idle connections.
- `initial_offset` (default = latest): The initial offset to use if no offset was previously committed. Must be `latest` or `earliest`.
- `session_timeout` (default = `10s`): The request timeout for detecting client failures when using Kafka’s group management facilities.
- `heartbeat_interval` (default = `3s`): The expected time between heartbeats to the consumer coordinator when using Kafka’s group management facilities.
Expand Down
30 changes: 30 additions & 0 deletions receiver/kafkareceiver/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,36 @@ func TestLoadConfig(t *testing.T) {
},
},
},
{
id: component.NewIDWithName(metadata.Type, "connection_idle_timeout"),
expected: &Config{
ClientConfig: func() configkafka.ClientConfig {
config := configkafka.NewDefaultClientConfig()
config.ConnectionIdleTimeout = 5 * time.Minute
return config
}(),
ConsumerConfig: configkafka.NewDefaultConsumerConfig(),
Logs: TopicEncodingConfig{
Topics: []string{"otlp_logs"},
Encoding: "otlp_proto",
},
Metrics: TopicEncodingConfig{
Topics: []string{"otlp_metrics"},
Encoding: "otlp_proto",
},
Traces: TopicEncodingConfig{
Topics: []string{"otlp_spans"},
Encoding: "otlp_proto",
},
Profiles: TopicEncodingConfig{
Topics: []string{"otlp_profiles"},
Encoding: "otlp_proto",
},
ErrorBackOff: configretry.BackOffConfig{
Enabled: false,
},
},
},
}

for _, tt := range tests {
Expand Down
3 changes: 3 additions & 0 deletions receiver/kafkareceiver/testdata/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,6 @@ kafka/regex_topic_with_exclusion:
exclude_topics:
- "^traces-debug-.*$"
encoding: otlp_proto

kafka/connection_idle_timeout:
connection_idle_timeout: 5m
Loading