From 273d68a3eaa563eb2c962d9f11d090e7b6df4d59 Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 17:34:32 +0000 Subject: [PATCH 1/4] lakebox: measure NAME column widths in terminal cells, not bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anwell flagged on the bug-bash form that `lakebox list` mis-aligns rows whose `--name` contains emoji or CJK glyphs: šŸš€ is 4 bytes / 1 rune but renders as 2 terminal cells, so the row's STATUS/AUTOSTOP/DEFAULT columns shift left when the padding math counts bytes (len) instead of display width. Switch every column-padding computation in `list` (and the symmetric NAME column in `ssh-key list`) to runewidth.StringWidth, which knows about East Asian Width and emoji widths. Names round-trip and store fine — only the display math was wrong. Promotes `github.com/mattn/go-runewidth` from indirect to direct require with the MIT SPDX comment and adds the corresponding NOTICE entry. The package was already pulled in transitively by the charmbracelet/x family. Co-authored-by: Isaac --- NOTICE | 4 ++++ cmd/lakebox/list.go | 21 ++++++++++++++------- cmd/lakebox/sshkey.go | 9 ++++++--- go.mod | 2 +- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/NOTICE b/NOTICE index 01f9df98246..fb80040b33a 100644 --- a/NOTICE +++ b/NOTICE @@ -151,6 +151,10 @@ mattn/go-isatty - https://github.com/mattn/go-isatty Copyright (c) Yasuhiro MATSUMOTO License - https://github.com/mattn/go-isatty/blob/master/LICENSE +mattn/go-runewidth - https://github.com/mattn/go-runewidth +Copyright (c) 2016 Yasuhiro Matsumoto +License - https://github.com/mattn/go-runewidth/blob/master/LICENSE + sabhiram/go-gitignore - https://github.com/sabhiram/go-gitignore Copyright (c) 2015 Shaba Abhiram License - https://github.com/sabhiram/go-gitignore/blob/master/LICENSE diff --git a/cmd/lakebox/list.go b/cmd/lakebox/list.go index 2bcef9c4243..7e951ffd7be 100644 --- a/cmd/lakebox/list.go +++ b/cmd/lakebox/list.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdctx" "github.com/databricks/cli/libs/cmdio" + "github.com/mattn/go-runewidth" "github.com/spf13/cobra" ) @@ -101,21 +102,27 @@ Example: // rendered only when at least one entry sets a display name // different from the ID — there's no point in a column of // pet-names that duplicate the ID column. + // All column widths are measured in *terminal cells*, not + // bytes or runes — emoji and CJK glyphs render as 2 cells + // despite being 1 rune / multi-byte, and using len() here + // (which counts bytes) misaligns the row whenever a `--name` + // includes wide characters. runewidth.StringWidth gives the + // East-Asian-Width-corrected cell count. idCol := 10 autostopCol := 8 nameCol := 4 showName := false for _, e := range entries { - if l := len(e.SandboxID); l > idCol { + if l := runewidth.StringWidth(e.SandboxID); l > idCol { idCol = l } - if l := len(e.autoStopLabel()); l > autostopCol { + if l := runewidth.StringWidth(e.autoStopLabel()); l > autostopCol { autostopCol = l } if e.Name != "" && e.Name != e.SandboxID { showName = true } - if l := len(e.Name); l > nameCol { + if l := runewidth.StringWidth(e.Name); l > nameCol { nameCol = l } } @@ -152,11 +159,11 @@ Example: } // Pad each cell manually so visible-width alignment is // preserved after the helpers wrap them with ANSI escapes. - idPad := max(idCol-len(id), 0) + idPad := max(idCol-runewidth.StringWidth(id), 0) st := status(ctx, e.Status) - stPad := max(statusCol-len(e.Status), 0) + stPad := max(statusCol-runewidth.StringWidth(e.Status), 0) as := e.autoStopLabel() - asPad := max(autostopCol-len(as), 0) + asPad := max(autostopCol-runewidth.StringWidth(as), 0) idStr := cmdio.Bold(ctx, id) if strings.EqualFold(e.Status, "running") { idStr = cmdio.Bold(ctx, cmdio.Cyan(ctx, id)) @@ -166,7 +173,7 @@ Example: if nm == "" || nm == id { nm = "-" } - nmPad := max(nameCol-len(nm), 0) + nmPad := max(nameCol-runewidth.StringWidth(nm), 0) nmStr := nm if nm == "-" { nmStr = cmdio.Faint(ctx, "-") diff --git a/cmd/lakebox/sshkey.go b/cmd/lakebox/sshkey.go index c0a5d915315..5b7f3829aa9 100644 --- a/cmd/lakebox/sshkey.go +++ b/cmd/lakebox/sshkey.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdctx" "github.com/databricks/cli/libs/cmdio" + "github.com/mattn/go-runewidth" "github.com/spf13/cobra" ) @@ -115,9 +116,11 @@ Examples: out := cmd.OutOrStdout() blank(out) + // Measure in terminal cells (runewidth) so wide / emoji + // glyphs in `--name` don't misalign the row. nameCol := 4 for _, k := range keys { - if l := len(k.Name); l > nameCol { + if l := runewidth.StringWidth(k.Name); l > nameCol { nameCol = l } } @@ -136,10 +139,10 @@ Examples: for _, k := range keys { // Pad NAME manually from the raw width because cmdio.Faint // wraps the cell in ANSI escapes that throw off `%-*s`. - displayName, visibleNameLen := k.Name, len(k.Name) + displayName, visibleNameLen := k.Name, runewidth.StringWidth(k.Name) if displayName == "" { displayName = cmdio.Faint(ctx, "(unset)") - visibleNameLen = len("(unset)") + visibleNameLen = runewidth.StringWidth("(unset)") } namePad := max(nameCol-visibleNameLen, 0) gutter := " " diff --git a/go.mod b/go.mod index f35dadb31ce..ddeceadd2f1 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/hexops/gotextdiff v1.0.3 // BSD-3-Clause github.com/jackc/pgx/v5 v5.9.2 // MIT github.com/mattn/go-isatty v0.0.22 // MIT + github.com/mattn/go-runewidth v0.0.23 // MIT github.com/palantir/pkg/yamlpatch v1.5.0 // BSD-3-Clause github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // BSD-2-Clause github.com/quasilyte/go-ruleguard/dsl v0.3.22 // BSD-3-Clause @@ -80,7 +81,6 @@ require ( github.com/lucasb-eyer/go-colorful v1.4.0 // indirect github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-localereader v0.0.1 // indirect - github.com/mattn/go-runewidth v0.0.23 // indirect github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect github.com/muesli/cancelreader v0.2.2 // indirect From d6e585163425cf019f719ff166ec4d963e85b12d Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 17:39:02 +0000 Subject: [PATCH 2/4] lakebox: format `--idle-timeout` bounds and offending value as Go durations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anwell flagged on the bug-bash form: `config --idle-timeout 25h` reported `between 60s and 86400s, got 90000s` — every number expressed in a unit the user didn't type, even though `--help` documented the same bounds as `60s to 24h`. Net effect: the user had to mentally convert 90000s back to 25h to understand the rejection. Route bounds and the offending value through `formatDurationSecs` (already in api.go), so the error reads: Error: idle-timeout must be 0 (clear) or between 1m and 24h, got 25h Also nudge the `--help` lower-bound text from `60s` → `1m` so both strings agree. Co-authored-by: Isaac --- cmd/lakebox/config.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cmd/lakebox/config.go b/cmd/lakebox/config.go index f5ad8ad8081..6ca1d6350ce 100644 --- a/cmd/lakebox/config.go +++ b/cmd/lakebox/config.go @@ -38,7 +38,7 @@ Three knobs are independent — pass any combination: the sandbox after this much idle time. Pass 0 (or 0s) to clear and revert to the manager's global default (10m). Valid range when set: - 60s to 24h. + 1m to 24h. --no-autostop[=true|false] When true, the sandbox is exempt from idle-driven auto-stop entirely. The @@ -157,9 +157,15 @@ func checkIdleSecs(secs int64) (int64, error) { return 0, nil // clear / revert to global default } if secs < minIdleTimeoutSecs || secs > maxIdleTimeoutSecs { + // Format both the bounds and the offending value as Go-style + // durations to match the input form the user typed and the + // flag's --help text (Anwell flagged the prior `86400s` / + // `90000s` echoes as confusing — same unit as input now). return 0, fmt.Errorf( - "idle-timeout must be 0 (clear) or between %ds and %ds, got %ds", - minIdleTimeoutSecs, maxIdleTimeoutSecs, secs, + "idle-timeout must be 0 (clear) or between %s and %s, got %s", + formatDurationSecs(minIdleTimeoutSecs), + formatDurationSecs(maxIdleTimeoutSecs), + formatDurationSecs(secs), ) } return secs, nil From 9e315d71c4bd4e28f16d300d5b6b74902649047c Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 18:01:50 +0000 Subject: [PATCH 3/4] lakebox: clear orphan gatewayHosts entry when last sandbox is deleted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mitch flagged on the bug-bash form: after deleting every sandbox on a profile, ~/.databricks/lakebox.json still carries the profile's gatewayHosts entry. Functionally harmless today but the entries accumulate per profile across the lifecycle, and the "Delete cleanup" CUJ explicitly asks for no ghost references in local config. In removeSandbox, when the deletion empties the sandbox list for a profile, also drop the profile's GatewayHosts entry (and the now-empty Sandboxes key, for tidiness). When other sandboxes remain on the same profile the gateway host is still relevant — only the all-empty case triggers the cleanup. Two new tests cover both cases. Co-authored-by: Isaac --- cmd/lakebox/resolve_test.go | 24 ++++++++++++++++++++++++ cmd/lakebox/state.go | 10 ++++++++++ 2 files changed, 34 insertions(+) diff --git a/cmd/lakebox/resolve_test.go b/cmd/lakebox/resolve_test.go index 761bf9cdb6b..9a8595191da 100644 --- a/cmd/lakebox/resolve_test.go +++ b/cmd/lakebox/resolve_test.go @@ -126,3 +126,27 @@ func TestRemoveSandboxMissingIsNoop(t *testing.T) { require.NoError(t, removeSandbox(ctx, "p", "nope")) assert.Equal(t, []cachedSandbox{{ID: "keep-me"}}, getSandboxes(ctx, "p")) } + +// Removing the last sandbox for a profile must also drop the profile's +// cached gateway host — otherwise lakebox.json accumulates orphan +// gatewayHosts entries that no longer correspond to any sandbox. +func TestRemoveSandboxClearsOrphanGatewayHost(t *testing.T) { + ctx, _ := stateCtx(t) + require.NoError(t, setSandboxes(ctx, "p", []cachedSandbox{{ID: "only-one"}})) + require.NoError(t, setGatewayHost(ctx, "p", "gw.example.test")) + require.Equal(t, "gw.example.test", getGatewayHost(ctx, "p")) + + require.NoError(t, removeSandbox(ctx, "p", "only-one")) + assert.Empty(t, getSandboxes(ctx, "p")) + assert.Empty(t, getGatewayHost(ctx, "p"), "gateway host must be cleared when the last sandbox is removed") +} + +// Removing one of many sandboxes must NOT touch the gateway host — it +// still applies to the remaining sandboxes on the profile. +func TestRemoveSandboxKeepsGatewayHostWhileSandboxesRemain(t *testing.T) { + ctx, _ := stateCtx(t) + require.NoError(t, setSandboxes(ctx, "p", []cachedSandbox{{ID: "keep"}, {ID: "drop"}})) + require.NoError(t, setGatewayHost(ctx, "p", "gw.example.test")) + require.NoError(t, removeSandbox(ctx, "p", "drop")) + assert.Equal(t, "gw.example.test", getGatewayHost(ctx, "p")) +} diff --git a/cmd/lakebox/state.go b/cmd/lakebox/state.go index 7e388684378..e30160563bc 100644 --- a/cmd/lakebox/state.go +++ b/cmd/lakebox/state.go @@ -219,6 +219,12 @@ func upsertSandbox(ctx context.Context, profile, id, name string) error { // removeSandbox drops a single cached entry for `profile`. Called from // `delete` so the cache doesn't keep referencing sandboxes that no // longer exist server-side. +// +// When the removal empties the sandbox list for a profile, also drop +// the profile's `GatewayHosts` entry — there is nothing for the +// gateway hostname to apply to until the user creates a new sandbox, +// and leaving the entry behind accumulates orphan state across the +// lifecycle of a profile (per Mitch's "Delete cleanup" CUJ). func removeSandbox(ctx context.Context, profile, id string) error { state, err := loadState(ctx) if err != nil { @@ -228,6 +234,10 @@ func removeSandbox(ctx context.Context, profile, id string) error { for i, s := range existing { if s.ID == id { state.Sandboxes[profile] = append(existing[:i], existing[i+1:]...) + if len(state.Sandboxes[profile]) == 0 { + delete(state.Sandboxes, profile) + delete(state.GatewayHosts, profile) + } return saveState(ctx, state) } } From 6d32203ce25a204c236c3cd07c12a83b45c4a7cf Mon Sep 17 00:00:00 2001 From: Akshay Singla Date: Fri, 29 May 2026 18:06:59 +0000 Subject: [PATCH 4/4] lakebox: make `start` block until the sandbox reaches Running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mitch (form row M) and tsanyu (form row 4) both flagged the same asymmetry: `lakebox create` blocks until the sandbox is Running, but `lakebox start` returns instantly with `āœ“ Started ` while the box is still in `Creating` — and the next `lakebox ssh` then blocks indefinitely waiting for the same cold-start the user thought already finished. Scripts that chain `start && ssh` race the boot. Add a polling loop after `api.start` that calls `api.get` every 2s until status is Running (or up to 10 minutes — Mitch observed 5–13min cold starts under F11, so 10m covers the common case while still failing fast on a stuck box). - status `Running` returned by api.start (already up) short-circuits with `āœ“ Already running ` instead of pretending to wait. - A transition to a terminal non-Running state (Stopped, Terminated, Failed) errors out immediately instead of waiting out the timeout. - The spinner is updated each poll with elapsed seconds so the user can see the wait is real. - Context cancellation (Ctrl-C) is honored. Verified live against e2dogfood: a Stopped sandbox start now blocks through the cold-start path. F11-class slow starts surface as a clean timeout error after 10m rather than the original lie. Co-authored-by: Isaac --- cmd/lakebox/start.go | 74 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 5 deletions(-) diff --git a/cmd/lakebox/start.go b/cmd/lakebox/start.go index 5b97145b098..dd6b7a5dd40 100644 --- a/cmd/lakebox/start.go +++ b/cmd/lakebox/start.go @@ -1,7 +1,10 @@ package lakebox import ( + "context" "fmt" + "strings" + "time" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdctx" @@ -9,16 +12,31 @@ import ( "github.com/spf13/cobra" ) +// Bounds for `start`'s "wait until Running" loop. The server's StartSandbox +// RPC returns immediately with status="Creating" (reused for cold start — +// see F10), so the CLI polls until it actually reaches Running. Matches +// `create`'s blocking semantics so scripts can chain start → ssh / start → +// config without racing the cold boot. The 10-minute timeout covers +// Mitch's observed cold-start range (5–13 minutes) for the common case; +// truly stuck sandboxes still surface as a timeout rather than hanging +// the script forever. +const ( + startPollInterval = 2 * time.Second + startWaitTimeout = 10 * time.Minute +) + func newStartCommand() *cobra.Command { cmd := &cobra.Command{ Use: "start ", Short: "Start a stopped Lakebox environment", Long: `Start a stopped Lakebox environment. -Boots the backing microVM and brings the sandbox to Running. -'databricks lakebox ssh' already auto-starts a stopped sandbox on -connection, so this command is mostly useful for pre-warming an -environment without immediately connecting. +Boots the backing microVM and blocks until the sandbox reaches +Running (or up to 5 minutes). 'databricks lakebox ssh' already +auto-starts a stopped sandbox on connection, so this command is +mostly useful for pre-warming an environment without immediately +connecting, or when a script needs to be sure the sandbox is up +before continuing. Starting an already-running sandbox is a no-op. @@ -57,10 +75,56 @@ Example: _ = setGatewayHost(ctx, profile, updated.GatewayHost) _ = upsertSandbox(ctx, profile, updated.SandboxID, updated.Name) - s.ok("Started " + cmdio.Bold(ctx, updated.SandboxID)) + // Already up — short-circuit so we don't pretend to wait + // when there's nothing to wait for. + if strings.EqualFold(updated.Status, "running") { + s.ok("Already running " + cmdio.Bold(ctx, updated.SandboxID)) + return nil + } + + final, err := waitForRunning(ctx, api, s, updated.SandboxID) + if err != nil { + s.fail("Failed to start " + lakeboxID) + return err + } + _ = upsertSandbox(ctx, profile, final.SandboxID, final.Name) + + s.ok("Started " + cmdio.Bold(ctx, final.SandboxID)) return nil }, } return cmd } + +// waitForRunning polls the sandbox until it reaches Running or the timeout +// elapses. The spinner is updated with the elapsed seconds each poll so the +// user can tell progress is happening, and a transition to an unexpected +// terminal state (Stopped / Terminated / Failed) short-circuits with a +// useful error rather than waiting out the full timeout. +func waitForRunning(ctx context.Context, api *lakeboxAPI, s *spinner, id string) (*sandboxEntry, error) { + start := time.Now() + deadline := start.Add(startWaitTimeout) + for { + sb, err := api.get(ctx, id) + if err != nil { + return nil, fmt.Errorf("polling status of %s: %w", id, err) + } + switch strings.ToLower(sb.Status) { + case "running": + return sb, nil + case "stopped", "terminated", "failed": + return nil, fmt.Errorf("sandbox %s reached unexpected state %q while starting", id, sb.Status) + } + elapsed := time.Since(start).Round(time.Second) + s.Update(fmt.Sprintf("Starting %s… (%s)", id, elapsed)) + if time.Now().After(deadline) { + return nil, fmt.Errorf("sandbox %s did not reach Running within %s (last seen %s)", id, startWaitTimeout, sb.Status) + } + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(startPollInterval): + } + } +}