@@ -31,6 +31,7 @@ import (
3131 "time"
3232
3333 btpb "cloud.google.com/go/bigtable/apiv2/bigtablepb"
34+ "github.com/googleapis/gax-go/v2"
3435 "go.opentelemetry.io/otel/attribute"
3536 "go.opentelemetry.io/otel/metric"
3637 gtransport "google.golang.org/api/transport/grpc"
@@ -320,7 +321,10 @@ type BigtableChannelPool struct {
320321 appProfile string
321322 instanceName string
322323 featureFlagsMD metadata.MD
323- meterProvider metric.MeterProvider
324+
325+ factory * connectionFactory // Use the factory for connection creation
326+
327+ meterProvider metric.MeterProvider
324328 // configs
325329 metricsConfig btopt.MetricsReporterConfig
326330
@@ -367,6 +371,15 @@ func NewBigtableChannelPool(ctx context.Context, connPoolSize int, strategy btop
367371 opt (pool )
368372 }
369373
374+ // Initialize the connectionFactory
375+ pool .factory = & connectionFactory {
376+ dial : dial ,
377+ instanceName : pool .instanceName ,
378+ appProfile : pool .appProfile ,
379+ featureFlagsMD : pool .featureFlagsMD ,
380+ logger : pool .logger ,
381+ }
382+
370383 // Set the selection function based on the strategy
371384 switch strategy {
372385 case btopt .LeastInFlight :
@@ -379,6 +392,7 @@ func NewBigtableChannelPool(ctx context.Context, connPoolSize int, strategy btop
379392
380393 var exitSignal error
381394
395+ // TODO: Replace this logic with addConnections(...).
382396 initialConns := make ([]* connEntry , connPoolSize )
383397 for i := 0 ; i < connPoolSize ; i ++ {
384398 select {
@@ -391,21 +405,12 @@ func NewBigtableChannelPool(ctx context.Context, connPoolSize int, strategy btop
391405 break
392406 }
393407
394- conn , err := dial ( )
408+ entry , err := pool . factory . newEntry ( ctx )
395409 if err != nil {
396410 exitSignal = err
397411 break
398412 }
399-
400- entry := & connEntry {conn : conn }
401- initialConns [i ] = entry // Note, we keep non primed conns in conns
402- // Prime the new connection in a non-blocking goroutine to warm it up.
403- go func (e * connEntry ) {
404- err := e .conn .Prime (ctx , pool .instanceName , pool .appProfile , pool .featureFlagsMD )
405- if err != nil {
406- btopt .Debugf (pool .logger , "bigtable_connpool: failed to prime initial connection: %v\n " , err )
407- }
408- }(entry )
413+ initialConns [i ] = entry
409414 }
410415 if exitSignal != nil {
411416 btopt .Debugf (pool .logger , "bigtable_connpool: error during initial connection creation: %v\n " , exitSignal )
@@ -530,25 +535,13 @@ func (p *BigtableChannelPool) replaceConnection(oldEntry *connEntry) {
530535 return
531536 default :
532537 }
533- newConn , err := p .dial ( )
538+ newEntry , err := p .factory . newEntry ( p . poolCtx )
534539 if err != nil {
535- btopt .Debugf (p .logger , "bigtable_connpool: Failed to redial connection at index %d: %v\n " , idx , err )
540+ btopt .Debugf (p .logger , "bigtable_connpool: Failed to replace connection at index %d: %v. Closing new conn. Old connection remains (draining). \n " , idx , err )
536541 return
537542 }
538543
539- err = newConn .Prime (p .poolCtx , p .instanceName , p .appProfile , p .featureFlagsMD )
540-
541- if err != nil {
542- btopt .Debugf (p .logger , "bigtable_connpool: Failed to prime replacement connection at index %d: %v. Closing new conn. Old connection remains (draining).\n " , idx , err )
543- newConn .Close () //
544- return // Abort
545- }
546-
547544 btopt .Debugf (p .logger , "bigtable_connpool: Successfully primed new connection. Replacing connection at index %d\n " , idx )
548- newEntry := & connEntry {
549- conn : newConn ,
550- }
551-
552545 // Copy-on-write
553546 newConns := make ([]* connEntry , len (currentConns ))
554547 copy (newConns , currentConns )
@@ -785,20 +778,13 @@ func (p *BigtableChannelPool) addConnections(increaseDelta, maxConns int) bool {
785778 default :
786779 }
787780
788- conn , err := p .dial ()
789- if err != nil {
790- btopt .Debugf (p .logger , "bigtable_connpool: Failed to dial new connection for scale up: %v\n " , err )
791- return
792- }
793-
794- err = conn .Prime (p .poolCtx , p .instanceName , p .appProfile , p .featureFlagsMD )
781+ entry , err := p .factory .newEntry (p .poolCtx )
795782 if err != nil {
796- btopt .Debugf (p .logger , "bigtable_connpool: Failed to prime new connection: %v. Connection will not be added.\n " , err )
797- conn .Close ()
783+ btopt .Debugf (p .logger , "bigtable_connpool: Failed to add new connection: %v. Connection will not be added.\n " , err )
798784 return
799785 }
800786
801- results <- & connEntry { conn : conn }
787+ results <- entry
802788 }()
803789 }
804790 // Goroutine to close the results channel once all workers are done.
@@ -904,6 +890,72 @@ func (p *BigtableChannelPool) removeConnections(decreaseDelta, minConns, maxRemo
904890
905891}
906892
893+ // connectionFactory is responsible for creating and priming new Bigtable connections.
894+ // TODO remove these members from BigtableConnPool struct
895+ type connectionFactory struct {
896+ dial func () (* BigtableConn , error )
897+ instanceName string
898+ appProfile string
899+ featureFlagsMD metadata.MD
900+ logger * log.Logger
901+ }
902+
903+ // newEntry creates a new connection, primes it, and returns it as a connEntry.
904+ // Blocks until the connection is successfully primed, or returns an error.
905+ func (cf * connectionFactory ) newEntry (ctx context.Context ) (* connEntry , error ) {
906+ conn , err := cf .dial ()
907+ if err != nil {
908+ return nil , fmt .Errorf ("factory dial failed: %w" , err )
909+ }
910+
911+ if err := cf .primeWithRetry (ctx , conn ); err != nil {
912+ conn .Close ()
913+ return nil , fmt .Errorf ("bigtable_connpool: connection factory prime failed: %w" , err )
914+ }
915+
916+ return & connEntry {conn : conn }, nil
917+ }
918+
919+ // primeWithRetry attempts to prime the connection, retrying with exponential backoff.
920+ func (cf * connectionFactory ) primeWithRetry (ctx context.Context , conn * BigtableConn ) error {
921+ backoffPolicy := gax.Backoff {
922+ Initial : 100 * time .Millisecond ,
923+ Max : 2 * time .Second ,
924+ Multiplier : 1.2 ,
925+ }
926+ maxAttempts := 3
927+ var lastErr error
928+ for attempt := 0 ; attempt < maxAttempts ; attempt ++ {
929+
930+ // ctx.Done() returns a error
931+ if err := ctx .Err (); err != nil {
932+ return fmt .Errorf ("bigtable_connpool: error before prime attempt %d: %w" , attempt , err )
933+ }
934+
935+ lastErr = conn .Prime (ctx , cf .instanceName , cf .appProfile , cf .featureFlagsMD )
936+ if lastErr == nil {
937+ return nil
938+ }
939+
940+ if attempt == maxAttempts - 1 {
941+ // no need to pause(), short circuit
942+ break
943+ }
944+
945+ pause := backoffPolicy .Pause ()
946+ btopt .Debugf (cf .logger , "bigtable_connpool: Prime failed with error on attempt %d, retrying in %v: %v" , attempt + 1 , pause , lastErr )
947+
948+ select {
949+ case <- ctx .Done ():
950+ return fmt .Errorf ("context done while backing off for prime: %w" , ctx .Err ())
951+ case <- time .After (pause ):
952+ }
953+ }
954+
955+ return fmt .Errorf ("factory prime failed after %d attempts: %w" , maxAttempts , lastErr )
956+
957+ }
958+
907959type multiError []error
908960
909961func (m multiError ) Error () string {
0 commit comments