Skip to content

Commit ddc7d4a

Browse files
Merge pull request vitessio#2762 from michael-berlin/healthcheck_fix
discovery: Set "LastError" when connecting to vttablet failed.
2 parents a0a0c31 + cf1bc31 commit ddc7d4a

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

go/vt/discovery/healthcheck.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,11 @@ func (hc *HealthCheckImpl) checkConn(hcc *healthCheckConn, name string) {
367367

368368
// Read stream health responses.
369369
for {
370-
_ = hcc.stream(hc, func(shr *querypb.StreamHealthResponse) error {
370+
hcc.stream(hc, func(shr *querypb.StreamHealthResponse) error {
371371
return hcc.processResponse(hc, shr)
372372
})
373373

374+
// Streaming RPC failed e.g. because vttablet was restarted.
374375
// Sleep until the next retry is up or the context is done/canceled.
375376
select {
376377
case <-hcc.ctx.Done():
@@ -381,18 +382,21 @@ func (hc *HealthCheckImpl) checkConn(hcc *healthCheckConn, name string) {
381382
}
382383

383384
// stream streams healthcheck responses to callback.
384-
func (hcc *healthCheckConn) stream(hc *HealthCheckImpl, callback func(*querypb.StreamHealthResponse) error) error {
385+
func (hcc *healthCheckConn) stream(hc *HealthCheckImpl, callback func(*querypb.StreamHealthResponse) error) {
385386
hcc.mu.Lock()
386387
conn := hcc.conn
387388
hcc.mu.Unlock()
389+
388390
if conn == nil {
389391
var err error
390-
// Keyspace, shard and tabletType are the ones from the tablet
391-
// record, but they won't be used just yet.
392392
conn, err = tabletconn.GetDialer()(hcc.tabletStats.Tablet, hc.connTimeout)
393393
if err != nil {
394-
return err
394+
hcc.mu.Lock()
395+
hcc.tabletStats.LastError = err
396+
hcc.mu.Unlock()
397+
return
395398
}
399+
396400
hcc.mu.Lock()
397401
hcc.conn = conn
398402
hcc.tabletStats.LastError = nil
@@ -407,9 +411,9 @@ func (hcc *healthCheckConn) stream(hc *HealthCheckImpl, callback func(*querypb.S
407411
hcc.tabletStats.Serving = false
408412
hcc.tabletStats.LastError = err
409413
hcc.mu.Unlock()
410-
return err
414+
return
411415
}
412-
return nil
416+
return
413417
}
414418

415419
// processResponse reads one health check response, and notifies HealthCheckStatsListener.

0 commit comments

Comments
 (0)