99
1010 "github.com/buildbuddy-io/buildbuddy/enterprise/server/raft/constants"
1111 "github.com/buildbuddy-io/buildbuddy/server/interfaces"
12+ "github.com/buildbuddy-io/buildbuddy/server/util/kuberesolver"
1213 "github.com/buildbuddy-io/buildbuddy/server/util/log"
1314 "github.com/buildbuddy-io/buildbuddy/server/util/proto"
1415 "github.com/buildbuddy-io/buildbuddy/server/util/status"
@@ -68,6 +69,13 @@ type StaticRegistry struct {
6869
6970 targetAddresses sync.Map // map of NHID(string) => addresses
7071
72+ // Pod watcher for resolving raft addresses to IPs via k8s Watch API.
73+ // When set, raft addresses that are pod FQDNs are watched for IP changes
74+ // and Resolve() returns the resolved IP instead of the hostname.
75+ podWatcherManager * kuberesolver.PodWatcherManager
76+ resolvedRaftAddrs sync.Map // map of raftAddress(string) => resolved IP:port(string)
77+ raftWatchCancels sync.Map // map of raftAddress(string) => cancel func()
78+
7179 log log.Logger
7280}
7381
@@ -85,6 +93,52 @@ func NewStaticNodeRegistry(streamConnections uint64, v dbConfig.TargetValidator,
8593 return n
8694}
8795
96+ // SetPodWatcherManager configures the registry to resolve raft addresses
97+ // (pod FQDNs) to IPs using the k8s Watch API via the given PodWatcherManager.
98+ // When set, Resolve() returns the resolved pod IP instead of the hostname,
99+ // and uses the IP as the connection key so that an IP change triggers a new
100+ // connection.
101+ func (n * StaticRegistry ) SetPodWatcherManager (m * kuberesolver.PodWatcherManager ) {
102+ n .podWatcherManager = m
103+ }
104+
105+ // resolveRaftAddress returns the resolved IP:port for the given raft address
106+ // if a pod watcher is tracking it, otherwise returns the address as-is.
107+ func (n * StaticRegistry ) resolveRaftAddress (raftAddr string ) string {
108+ if resolved , ok := n .resolvedRaftAddrs .Load (raftAddr ); ok {
109+ return resolved .(string )
110+ }
111+ return raftAddr
112+ }
113+
114+ // watchRaftAddress starts watching the given raft address for IP changes
115+ // via the k8s Watch API. If the address is not a pod FQDN or the pod watcher
116+ // is not configured, this is a no-op.
117+ func (n * StaticRegistry ) watchRaftAddress (raftAddr string ) {
118+ if n .podWatcherManager == nil {
119+ return
120+ }
121+ if _ , ok := n .raftWatchCancels .Load (raftAddr ); ok {
122+ return
123+ }
124+ cancel , err := n .podWatcherManager .WatchPodIP (raftAddr , func (ipPort string , watchErr error ) {
125+ if watchErr != nil {
126+ if _ , had := n .resolvedRaftAddrs .LoadAndDelete (raftAddr ); had {
127+ n .log .Warningf ("Raft address %s lost resolution: %s" , raftAddr , watchErr )
128+ }
129+ return
130+ }
131+ if prev , loaded := n .resolvedRaftAddrs .Swap (raftAddr , ipPort ); ! loaded || prev .(string ) != ipPort {
132+ n .log .Infof ("Raft address %s resolved to %s" , raftAddr , ipPort )
133+ }
134+ })
135+ if err != nil {
136+ // Not a pod FQDN or not in k8s — fall back to hostname resolution.
137+ return
138+ }
139+ n .raftWatchCancels .Store (raftAddr , cancel )
140+ }
141+
88142// Add adds the specified node and its target info to the registry.
89143func (n * StaticRegistry ) Add (rangeID uint64 , replicaID uint64 , target string ) {
90144 if n .validate != nil && ! n .validate (target ) {
@@ -156,7 +210,8 @@ func (n *StaticRegistry) Resolve(rangeID uint64, replicaID uint64) (string, stri
156210 if err != nil {
157211 return "" , "" , err
158212 }
159- return ci .GetRaftAddress (), n .getConnectionKey (ci .GetRaftAddress (), rangeID ), nil
213+ addr := n .resolveRaftAddress (ci .GetRaftAddress ())
214+ return addr , n .getConnectionKey (addr , rangeID ), nil
160215}
161216
162217// ResolveRaft returns the raft address and the connection key of the specified node.
@@ -224,6 +279,7 @@ func (n *StaticRegistry) AddNode(target, raftAddress, grpcAddress string) {
224279 grpc : grpcAddress ,
225280 }
226281 n .targetAddresses .Store (target , a )
282+ n .watchRaftAddress (raftAddress )
227283}
228284
229285// ListNodes lists all the {NHID, raftAddress, grpcAddress} available in the
@@ -244,6 +300,12 @@ func (n *StaticRegistry) ListNodes() []*rfpb.ConnectionInfo {
244300}
245301
246302func (n * StaticRegistry ) Close () error {
303+ n .raftWatchCancels .Range (func (key , value interface {}) bool {
304+ if cancel , ok := value .(func ()); ok && cancel != nil {
305+ cancel ()
306+ }
307+ return true
308+ })
247309 return nil
248310}
249311
@@ -301,6 +363,12 @@ func NewDynamicNodeRegistry(gossipManager interfaces.GossipService, streamConnec
301363 return dnr
302364}
303365
366+ // SetPodWatcherManager configures the underlying static registry to resolve
367+ // raft addresses via the k8s Watch API.
368+ func (d * DynamicNodeRegistry ) SetPodWatcherManager (m * kuberesolver.PodWatcherManager ) {
369+ d .sReg .SetPodWatcherManager (m )
370+ }
371+
304372func (d * DynamicNodeRegistry ) handleEvent (event * serf.UserEvent ) {
305373 if event .Name != constants .RegistryUpdateEvent {
306374 return
@@ -462,7 +530,8 @@ func (d *DynamicNodeRegistry) Resolve(rangeID uint64, replicaID uint64) (string,
462530 if err != nil {
463531 return "" , "" , err
464532 }
465- return ci .GetRaftAddress (), d .sReg .getConnectionKey (ci .GetRaftAddress (), rangeID ), nil
533+ addr := d .sReg .resolveRaftAddress (ci .GetRaftAddress ())
534+ return addr , d .sReg .getConnectionKey (addr , rangeID ), nil
466535}
467536
468537// Lookup returns the connectionInfo of the specified node
0 commit comments