GATEWAYS-4306: exporting metrics for conntrack per zone#21
GATEWAYS-4306: exporting metrics for conntrack per zone#21shrouti1995 wants to merge 19 commits intomasterfrom
Conversation
internal/ovsexporter/conntrack.go
Outdated
| // ConntrackCollectorWithAggAccessor wraps the existing collector with access to the aggregator snapshot | ||
| type ConntrackCollectorWithAggAccessor struct { | ||
| *conntrackCollector | ||
| SnapshotFunc func() map[uint16]map[uint32]int |
* trying to fix the static check issue * trying to fix the static check issue
* adding test cases * adding mock tests * Context-Based Cancellation Refactoring * add centralised error propagation * solve lint error
* adding test cases * adding mock tests * Context-Based Cancellation Refactoring * add centralised error propagation * solve lint error * modelling test cases in table format * graceful shut down * centralised config * solve lint error
shrouti1995
left a comment
There was a problem hiding this comment.
@anitgandhi @jcooperdo when you get time
internal/conntrack/mock.go
Outdated
| } | ||
|
|
||
| // NewZoneMarkAggregator creates a mock aggregator for testing | ||
| func NewZoneMarkAggregator() (*MockZoneMarkAggregator, error) { |
There was a problem hiding this comment.
this should be named NewMockZoneMarkAggregator()
internal/conntrack/config.go
Outdated
| } | ||
|
|
||
| // LoadConfig loads conntrack configuration from environment variables | ||
| func LoadConfig() *Config { |
There was a problem hiding this comment.
it's confusing to use env vars directly for config here, when the top-level exporter config is driven by CLI flags
we should be consistent and use CLI flags for everything.
this package (internal/conntrack) shouldn't really care about flags vs env vars actually, it should only care about its own Config type/struct and associated defaults
it should be left up to the caller to define how to change the config (env vars, flags, whatever)
| // Test concurrent snapshot access | ||
| done := make(chan bool, 10) | ||
| for i := 0; i < 10; i++ { | ||
| go func() { | ||
| snapshot := agg.Snapshot() | ||
| if snapshot == nil { | ||
| t.Error("Concurrent snapshot returned nil") | ||
| } | ||
| done <- true | ||
| }() | ||
| } | ||
|
|
||
| // Wait for all goroutines | ||
| for i := 0; i < 10; i++ { | ||
| <-done | ||
| } |
There was a problem hiding this comment.
could simplify this with a waitgroup
internal/ovsexporter/ovsexporter.go
Outdated
| cs []prometheus.Collector | ||
| mu sync.Mutex | ||
| cs []prometheus.Collector | ||
| conntrackEnabled bool |
There was a problem hiding this comment.
is there ever a situation where this is false but aggregator is non-nil?
i don't believe there would be , in which case this extra bool doesn't provide any value
Co-authored-by: Anit Gandhi <anitgandhi@gmail.com>
internal/conntrack/config.go
Outdated
| } | ||
| } | ||
|
|
||
| // LoadConfig loads conntrack configuration from environment variables |
There was a problem hiding this comment.
aggregator.go would be a better name
CONNTRACK_CONFIG.md
Outdated
There was a problem hiding this comment.
this feels out-dated at this point seeing as we don't have env vars anymore and also in this PR we didn't have the hardcoded consts either
i'd recommend getting rid of this doc and instead making the godoc in internal/conntrack/config.go more clear by adding these details
internal/ovsexporter/ovsexporter.go
Outdated
| newDatapathCollector(c.Datapath.List), | ||
| } | ||
|
|
||
| // Create the aggregator |
There was a problem hiding this comment.
i don't see why we should put this in internal/ovsexporter at all
we should instead put the conntrack prometheus logic in internal/conntrack/exporter.go , and then just import it from main.go (if the additional exporter is enabled, which should be behind a new config flag bool in main.go)
There was a problem hiding this comment.
my bad. added it in this way.
| } | ||
|
|
||
| // StopWithTimeout cancels listening and closes the connection with a configurable timeout. | ||
| func (a *ZoneMarkAggregator) StopWithTimeout(timeout time.Duration) error { |
| func (a *ZoneMarkAggregator) GetError() error { | ||
| // This is a non-blocking way to check if there are any errors | ||
| // The actual error handling happens in Stop() | ||
| return nil | ||
| } |
There was a problem hiding this comment.
looks like a no-op, remove if it's not going to be useful
internal/ovsexporter/ovsexporter.go
Outdated
| // Additional generic netlink family collectors can be added here. | ||
| newDatapathCollector(c.Datapath.List), | ||
| }, | ||
| collectors := []prometheus.Collector{ |
internal/ovsexporter/test_helpers.go
Outdated
There was a problem hiding this comment.
this file should be in internal/conntrack right?
cmd/openvswitch_exporter/main.go
Outdated
| case syscall.SIGHUP: | ||
| log.Printf("Received SIGHUP, reloading config...") | ||
| // TODO: Add config reload logic here | ||
| log.Printf("Config reloaded") | ||
| return |
There was a problem hiding this comment.
if we're not going to support this now (which i don't think we really need to), let's just not handle SIGHUP
cmd/openvswitch_exporter/main.go
Outdated
| // TODO: Add config reload logic here | ||
| log.Printf("Config reloaded") | ||
| return | ||
| case syscall.SIGQUIT: |
There was a problem hiding this comment.
doesn't seem like we're actually treating SIGQUIT any differently than the other signals in practice
also, generally handling SIQQUIT gracefully is a bit weird. The Go runtime already does the "correct" thing , which is to dump a stack trace and exit.
let's remove handling for this, and just leave the standard SIGINT and SIGTERM for graceful termination
Co-authored-by: Anit Gandhi <anitgandhi@gmail.com>
| // Handle shutdown signals | ||
| sigChan := make(chan os.Signal, 1) | ||
| signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) | ||
|
|
||
| if err := http.ListenAndServe(*metricsAddr, mux); err != nil { | ||
| log.Fatalf("cannot start Open vSwitch exporter: %v", err) | ||
| // Start server in goroutine | ||
| go func() { | ||
| log.Printf("starting Open vSwitch exporter on %q", *metricsAddr) | ||
| if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { | ||
| log.Fatalf("cannot start Open vSwitch exporter: %v", err) | ||
| } | ||
| }() | ||
|
|
||
| // Wait for shutdown signal | ||
| sig := <-sigChan | ||
| log.Printf("Received signal %v, stopping gracefully...", sig) |
There was a problem hiding this comment.
| // Handle shutdown signals | |
| sigChan := make(chan os.Signal, 1) | |
| signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) | |
| if err := http.ListenAndServe(*metricsAddr, mux); err != nil { | |
| log.Fatalf("cannot start Open vSwitch exporter: %v", err) | |
| // Start server in goroutine | |
| go func() { | |
| log.Printf("starting Open vSwitch exporter on %q", *metricsAddr) | |
| if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { | |
| log.Fatalf("cannot start Open vSwitch exporter: %v", err) | |
| } | |
| }() | |
| // Wait for shutdown signal | |
| sig := <-sigChan | |
| log.Printf("Received signal %v, stopping gracefully...", sig) | |
| // Start server in goroutine | |
| go func() { | |
| log.Printf("starting Open vSwitch exporter on %q", *metricsAddr) | |
| if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { | |
| log.Fatalf("cannot start Open vSwitch exporter: %v", err) | |
| } | |
| }() | |
| // Wait for shutdown signal | |
| sigChan := make(chan os.Signal, 1) | |
| signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) | |
| sig := <-sigChan |
Adding support to export conntrack metrics per zone.
Details: