Skip to content

Commit 1825198

Browse files
WIP on livenss checks
1 parent 0f5d281 commit 1825198

File tree

3 files changed

+155
-152
lines changed

3 files changed

+155
-152
lines changed

depot/transformer/transformer.go

Lines changed: 38 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,10 @@ type transformer struct {
6363
gracefulShutdownInterval time.Duration
6464
healthCheckWorkPool *workpool.WorkPool
6565

66-
useContainerProxy bool
67-
drainWait time.Duration
66+
useContainerProxy bool
67+
drainWait time.Duration
68+
enableContainerProxyHealthChecks bool
69+
proxyHealthCheckInterval time.Duration
6870

6971
postSetupHook []string
7072
postSetupUser string
@@ -93,6 +95,13 @@ func WithContainerProxy(drainWait time.Duration) Option {
9395
}
9496
}
9597

98+
func WithProxyLivenessChecks(interval time.Duration) Option {
99+
return func(t *transformer) {
100+
t.enableContainerProxyHealthChecks = true
101+
t.proxyHealthCheckInterval = interval
102+
}
103+
}
104+
96105
func WithPostSetupHook(user string, hook []string) Option {
97106
return func(t *transformer) {
98107
t.postSetupUser = user
@@ -447,8 +456,9 @@ func (t *transformer) StepsRunner(
447456

448457
if t.useContainerProxy && t.useDeclarativeHealthCheck {
449458
envoyStartupLogger := logger.Session("envoy-startup-check")
459+
envoyLivenessLogger := logger.Session("envoy-liveness-check")
450460

451-
for idx, p := range config.ProxyTLSPorts {
461+
for idx, port := range config.ProxyTLSPorts {
452462
// add envoy startup checks
453463
startupSidecarName := fmt.Sprintf("%s-envoy-startup-healthcheck-%d", gardenContainer.Handle(), idx)
454464

@@ -458,7 +468,7 @@ func (t *transformer) StepsRunner(
458468
config.BindMounts,
459469
"",
460470
startupSidecarName,
461-
int(p),
471+
int(port),
462472
DefaultDeclarativeHealthcheckRequestTimeout,
463473
executor.TCPCheck,
464474
executor.IsStartupCheck,
@@ -469,31 +479,36 @@ func (t *transformer) StepsRunner(
469479
false,
470480
)
471481

472-
livenessStep := t.createCheck(
473-
&container,
474-
gardenContainer,
475-
config.BindMounts,
476-
"",
477-
fmt.Sprintf("%s-envoy-liveness-healthcheck-%d", gardenContainer.Handle(), idx),
478-
int(p),
479-
DefaultDeclarativeHealthcheckRequestTimeout,
480-
executor.TCPCheck,
481-
executor.IsLivenessCheck,
482-
t.unhealthyMonitoringInterval,
483-
envoyStartupLogger,
484-
"instance proxy failed to start",
485-
config.MetronClient,
486-
false,
487-
)
482+
if t.enableContainerProxyHealthChecks {
483+
livenessSidecarName := fmt.Sprintf("%s-envoy-liveness-healthcheck-%d", gardenContainer.Handle(), idx)
484+
485+
livenessStep := t.createCheck(
486+
&container,
487+
gardenContainer,
488+
config.BindMounts,
489+
"",
490+
livenessSidecarName,
491+
int(port),
492+
DefaultDeclarativeHealthcheckRequestTimeout,
493+
executor.TCPCheck,
494+
executor.IsLivenessCheck,
495+
t.proxyHealthCheckInterval,
496+
envoyLivenessLogger,
497+
"instance proxy health check failed",
498+
config.MetronClient,
499+
t.emitHealthCheckMetrics,
500+
)
501+
502+
proxyLivenessChecks = append(proxyLivenessChecks, livenessStep)
503+
}
488504

489505
proxyStartupChecks = append(proxyStartupChecks, step)
490-
proxyLivenessChecks = append(proxyLivenessChecks, livenessStep)
491506
}
492507
}
493508
var readinessChan chan steps.ReadinessState
494509
if container.CheckDefinition != nil && t.useDeclarativeHealthCheck {
495510
if container.CheckDefinition.Checks != nil {
496-
monitor = t.transformCheckDefinitionWithChristmasBonuses(logger,
511+
monitor = t.transformCheckDefinition(logger,
497512
&container,
498513
gardenContainer,
499514
logStreamer,
@@ -819,7 +834,7 @@ func (t *transformer) applyCheckDefaults(timeout int, interval time.Duration, pa
819834
return timeout, interval, path
820835
}
821836

822-
func (t *transformer) transformCheckDefinitionWithChristmasBonuses(
837+
func (t *transformer) transformCheckDefinition(
823838
logger lager.Logger,
824839
container *executor.Container,
825840
gardenContainer garden.Container,
@@ -950,135 +965,6 @@ func (t *transformer) transformCheckDefinitionWithChristmasBonuses(
950965
)
951966
}
952967

953-
func (t *transformer) transformCheckDefinition(
954-
logger lager.Logger,
955-
container *executor.Container,
956-
gardenContainer garden.Container,
957-
logstreamer log_streamer.LogStreamer,
958-
bindMounts []garden.BindMount,
959-
proxyStartupChecks []ifrit.Runner,
960-
metronClient loggingclient.IngressClient,
961-
) ifrit.Runner {
962-
var startupChecks []ifrit.Runner
963-
var livenessChecks []ifrit.Runner
964-
965-
sourceName := HealthLogSource
966-
if container.CheckDefinition.LogSource != "" {
967-
sourceName = container.CheckDefinition.LogSource
968-
}
969-
970-
logger.Info("transform-check-definitions-starting")
971-
defer func() {
972-
logger.Info("transform-check-definitions-finished")
973-
}()
974-
975-
startupLogger := logger.Session("startup-check")
976-
livenessLogger := logger.Session("liveness-check")
977-
978-
for index, check := range container.CheckDefinition.Checks {
979-
980-
startupSidecarName := fmt.Sprintf("%s-startup-healthcheck-%d", gardenContainer.Handle(), index)
981-
livenessSidecarName := fmt.Sprintf("%s-liveness-healthcheck-%d", gardenContainer.Handle(), index)
982-
983-
if err := check.Validate(); err != nil {
984-
logger.Error("invalid-check", err, lager.Data{"check": check})
985-
} else if check.HttpCheck != nil {
986-
timeout, interval, path := t.applyCheckDefaults(
987-
int(check.HttpCheck.RequestTimeoutMs),
988-
time.Duration(check.HttpCheck.IntervalMs)*time.Millisecond,
989-
check.HttpCheck.Path,
990-
)
991-
992-
startupChecks = append(startupChecks, t.createCheck(
993-
container,
994-
gardenContainer,
995-
bindMounts,
996-
path,
997-
startupSidecarName,
998-
int(check.HttpCheck.Port),
999-
timeout,
1000-
executor.HTTPCheck,
1001-
executor.IsStartupCheck,
1002-
t.unhealthyMonitoringInterval,
1003-
startupLogger,
1004-
"",
1005-
metronClient,
1006-
false,
1007-
))
1008-
livenessChecks = append(livenessChecks, t.createCheck(
1009-
container,
1010-
gardenContainer,
1011-
bindMounts,
1012-
path,
1013-
livenessSidecarName,
1014-
int(check.HttpCheck.Port),
1015-
timeout,
1016-
executor.HTTPCheck,
1017-
executor.IsLivenessCheck,
1018-
interval,
1019-
livenessLogger,
1020-
"",
1021-
metronClient,
1022-
t.emitHealthCheckMetrics,
1023-
))
1024-
1025-
} else if check.TcpCheck != nil {
1026-
1027-
timeout, interval, _ := t.applyCheckDefaults(
1028-
int(check.TcpCheck.ConnectTimeoutMs),
1029-
time.Duration(check.TcpCheck.IntervalMs)*time.Millisecond,
1030-
"", // only needed for http checks
1031-
)
1032-
1033-
startupChecks = append(startupChecks, t.createCheck(
1034-
container,
1035-
gardenContainer,
1036-
bindMounts,
1037-
"",
1038-
startupSidecarName,
1039-
int(check.TcpCheck.Port),
1040-
timeout,
1041-
executor.TCPCheck,
1042-
executor.IsStartupCheck,
1043-
t.unhealthyMonitoringInterval,
1044-
startupLogger,
1045-
"",
1046-
metronClient,
1047-
false,
1048-
))
1049-
livenessChecks = append(livenessChecks, t.createCheck(
1050-
container,
1051-
gardenContainer,
1052-
bindMounts,
1053-
"",
1054-
livenessSidecarName,
1055-
int(check.TcpCheck.Port),
1056-
timeout,
1057-
executor.TCPCheck,
1058-
executor.IsLivenessCheck,
1059-
interval,
1060-
livenessLogger,
1061-
"",
1062-
metronClient,
1063-
t.emitHealthCheckMetrics,
1064-
))
1065-
}
1066-
}
1067-
1068-
startupCheck := steps.NewParallel(append(proxyStartupChecks, startupChecks...))
1069-
livenessCheck := steps.NewCodependent(livenessChecks, false, false)
1070-
1071-
return steps.NewHealthCheckStep(
1072-
startupCheck,
1073-
livenessCheck,
1074-
logger,
1075-
t.clock,
1076-
logstreamer,
1077-
logstreamer.WithSource(sourceName),
1078-
time.Duration(container.StartTimeoutMs)*time.Millisecond,
1079-
)
1080-
}
1081-
1082968
func (t *transformer) transformContainerProxyStep(
1083969
container garden.Container,
1084970
execContainer executor.Container,

depot/transformer/transformer_test.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ var _ = Describe("Transformer", func() {
670670
Context("and container proxy is enabled", func() {
671671
BeforeEach(func() {
672672
options = append(options, transformer.WithContainerProxy(time.Second))
673+
options = append(options, transformer.WithProxyLivenessChecks(time.Second))
673674
cfg.BindMounts = append(cfg.BindMounts, garden.BindMount{
674675
Origin: garden.BindMountOriginHost,
675676
SrcPath: declarativeHealthcheckSrcPath,
@@ -1504,6 +1505,112 @@ var _ = Describe("Transformer", func() {
15041505
}))
15051506
})
15061507

1508+
Context("and container proxy is enabled", func() {
1509+
var (
1510+
otherStartupProcess *gardenfakes.FakeProcess
1511+
otherStartupCh chan int
1512+
otherLivenessProcess *gardenfakes.FakeProcess
1513+
otherLivenessCh chan int
1514+
)
1515+
1516+
BeforeEach(func() {
1517+
options = append(options, transformer.WithContainerProxy(time.Second))
1518+
cfg.ProxyTLSPorts = []uint16{61001}
1519+
1520+
otherStartupCh = make(chan int)
1521+
otherStartupProcess = makeProcess(otherStartupCh)
1522+
1523+
otherLivenessCh = make(chan int)
1524+
otherLivenessProcess = makeProcess(otherLivenessCh)
1525+
1526+
healthcheckCallCount := int64(0)
1527+
1528+
gardenContainer.RunStub = func(spec garden.ProcessSpec, io garden.ProcessIO) (process garden.Process, err error) {
1529+
defer GinkgoRecover()
1530+
// get rid of race condition caused by write inside the BeforeEach
1531+
processLock.Lock()
1532+
defer processLock.Unlock()
1533+
1534+
switch spec.Path {
1535+
case "/action/path":
1536+
return actionProcess, nil
1537+
case filepath.Join(transformer.HealthCheckDstPath, "healthcheck"):
1538+
oldCount := atomic.AddInt64(&healthcheckCallCount, 1)
1539+
switch oldCount {
1540+
case 1:
1541+
return startupProcess, nil
1542+
case 2:
1543+
return otherStartupProcess, nil
1544+
case 3:
1545+
return livenessProcess, nil
1546+
case 4:
1547+
return otherLivenessProcess, nil
1548+
}
1549+
return livenessProcess, nil
1550+
case "/monitor/path":
1551+
return monitorProcess, nil
1552+
}
1553+
1554+
err = errors.New("")
1555+
Fail("unexpected executable path: " + spec.Path)
1556+
return
1557+
}
1558+
})
1559+
1560+
JustBeforeEach(func() {
1561+
otherStartupCh <- 0
1562+
})
1563+
1564+
AfterEach(func() {
1565+
close(otherStartupCh)
1566+
close(otherLivenessCh)
1567+
})
1568+
1569+
Context("and proxy liveness check is enabled", func() {
1570+
BeforeEach(func() {
1571+
options = append(options, transformer.WithProxyLivenessChecks(time.Second*30))
1572+
})
1573+
1574+
It("starts the proxy liveness check", func() {
1575+
Eventually(gardenContainer.RunCallCount).Should(Equal(5))
1576+
var ids []string
1577+
var args [][]string
1578+
for i := 0; i < gardenContainer.RunCallCount(); i++ {
1579+
spec, _ := gardenContainer.RunArgsForCall(i)
1580+
ids = append(ids, spec.ID)
1581+
args = append(args, spec.Args)
1582+
}
1583+
1584+
Expect(ids).To(ContainElement(fmt.Sprintf("%s-%s", gardenContainer.Handle(), "envoy-liveness-healthcheck-0")))
1585+
Expect(args).To(ContainElement([]string{
1586+
"-port=61001",
1587+
"-timeout=1000ms",
1588+
"-liveness-interval=30s",
1589+
}))
1590+
})
1591+
})
1592+
1593+
Context("and proxy liveness check is disabled", func() {
1594+
It("starts the proxy liveness check", func() {
1595+
Eventually(gardenContainer.RunCallCount).Should(Equal(4))
1596+
var ids []string
1597+
var args [][]string
1598+
for i := 0; i < gardenContainer.RunCallCount(); i++ {
1599+
spec, _ := gardenContainer.RunArgsForCall(i)
1600+
ids = append(ids, spec.ID)
1601+
args = append(args, spec.Args)
1602+
}
1603+
1604+
Expect(ids).To(Not(ContainElement(fmt.Sprintf("%s-%s", gardenContainer.Handle(), "envoy-liveness-healthcheck-0"))))
1605+
Expect(args).To(Not(ContainElement([]string{
1606+
"-port=61001",
1607+
"-timeout=1000ms",
1608+
"-liveness-interval=30s",
1609+
})))
1610+
})
1611+
})
1612+
})
1613+
15071614
Context("when optional values are not provided in liveness check defintion", func() {
15081615
BeforeEach(func() {
15091616
container.CheckDefinition = &models.CheckDefinition{

0 commit comments

Comments
 (0)