Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Notable Changes

### CLI
* Introduce retries to `databricks psql` command ([#3492](https://github.com/databricks/cli/pull/3492))
* Add rule files for coding agents working on the CLI code base ([#3245](https://github.com/databricks/cli/pull/3245))

### Dependency updates
Expand Down
7 changes: 7 additions & 0 deletions acceptance/cmd/psql/failing-connection/always-fail.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
#
# This script prints its arguments and exits.
# The test script renames this script to "psql" in order to capture the arguments that the CLI passes to psql command.
#
echo "Simulating connection failure with exit code '2'"
exit 2
8 changes: 8 additions & 0 deletions acceptance/cmd/psql/failing-connection/out.test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Local = true
Cloud = false

[GOOS]
windows = false

[EnvMatrix]
DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"]
61 changes: 61 additions & 0 deletions acceptance/cmd/psql/failing-connection/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@

=== Command should use default number of retries:
>>> musterr [CLI] psql my-database -- --dbname=db1 -p 3000
Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql session to my-database.my-host.com (attempt 1/3)...
Simulating connection failure with exit code '2'
Connection failed with retryable error: connection failed (retryable): psql exited with code 2
Connection attempt 1/3 failed, retrying in 1s...
Launching psql session to my-database.my-host.com (attempt 2/3)...
Simulating connection failure with exit code '2'
Connection failed with retryable error: connection failed (retryable): psql exited with code 2
Connection attempt 2/3 failed, retrying in 2s...
Launching psql session to my-database.my-host.com (attempt 3/3)...
Simulating connection failure with exit code '2'
Connection failed with retryable error: connection failed (retryable): psql exited with code 2
Error: failed to connect after 3 attempts, last error: connection failed (retryable): psql exited with code 2

Exit code (musterr): 1

=== Command should use custom number of retries:
>>> musterr [CLI] psql my-database --max-retries 5 -- --dbname=db1 -p 3000
Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql session to my-database.my-host.com (attempt 1/5)...
Simulating connection failure with exit code '2'
Connection failed with retryable error: connection failed (retryable): psql exited with code 2
Connection attempt 1/5 failed, retrying in 1s...
Launching psql session to my-database.my-host.com (attempt 2/5)...
Simulating connection failure with exit code '2'
Connection failed with retryable error: connection failed (retryable): psql exited with code 2
Connection attempt 2/5 failed, retrying in 2s...
Launching psql session to my-database.my-host.com (attempt 3/5)...
Simulating connection failure with exit code '2'
Connection failed with retryable error: connection failed (retryable): psql exited with code 2
Connection attempt 3/5 failed, retrying in 4s...
Launching psql session to my-database.my-host.com (attempt 4/5)...
Simulating connection failure with exit code '2'
Connection failed with retryable error: connection failed (retryable): psql exited with code 2
Connection attempt 4/5 failed, retrying in 8s...
Launching psql session to my-database.my-host.com (attempt 5/5)...
Simulating connection failure with exit code '2'
Connection failed with retryable error: connection failed (retryable): psql exited with code 2
Error: failed to connect after 5 attempts, last error: connection failed (retryable): psql exited with code 2

Exit code (musterr): 1

=== Command should not use retries:
>>> musterr [CLI] psql my-database --max-retries 0 -- --dbname=db1 -p 3000
Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql with connection to my-database.my-host.com...
Simulating connection failure with exit code '2'

Exit code (musterr): 2
17 changes: 17 additions & 0 deletions acceptance/cmd/psql/failing-connection/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
mv always-fail.sh psql

cleanup() {
rm psql
}
trap cleanup EXIT

export PATH="$(pwd):$PATH"

title "Command should use default number of retries:"
trace musterr $CLI psql my-database -- --dbname=db1 -p 3000

title "Command should use custom number of retries:"
trace musterr $CLI psql my-database --max-retries 5 -- --dbname=db1 -p 3000

title "Command should not use retries:"
trace musterr $CLI psql my-database --max-retries 0 -- --dbname=db1 -p 3000
32 changes: 32 additions & 0 deletions acceptance/cmd/psql/failing-connection/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This acceptance test is disabled on Windows runners because
# the current argument capturing method does not work on windows-latest GitHub Runner.
#
# See PR #3228 for documented attempts to fix this issue:
# https://github.com/databricks/cli/pull/3228
GOOS.windows = false

[[Server]]
Pattern = "GET /api/2.0/database/instances/my-database"
Response.Body = '''
{
"state": "AVAILABLE",
"pg_version": "14",
"read_write_dns": "my-database.my-host.com"
}
'''

[[Server]]
Pattern = "GET /api/2.0/database/instances"
Response.Body = '''
{
"database_instances": []
}
'''

[[Server]]
Pattern = "POST /api/2.0/database/credentials"
Response.Body = '''
{
"token": "my-secret-token"
}
'''
31 changes: 26 additions & 5 deletions acceptance/cmd/psql/simple/output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql with connection to my-database.my-host.com...
Launching psql session to my-database.my-host.com (attempt 1/3)...
echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --port=5432 --dbname=databricks_postgres
PGPASSWORD=my-secret-token
PGSSLMODE=require
Expand All @@ -34,7 +34,7 @@ Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql with connection to my-database.my-host.com...
Launching psql session to my-database.my-host.com (attempt 1/3)...
echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --port=5432 --dbname=databricks_postgres -c SELECT * FROM my_table --echo-all
PGPASSWORD=my-secret-token
PGSSLMODE=require
Expand All @@ -45,7 +45,7 @@ Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql with connection to my-database.my-host.com...
Launching psql session to my-database.my-host.com (attempt 1/3)...
echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --port=5432 --dbname=db1
PGPASSWORD=my-secret-token
PGSSLMODE=require
Expand All @@ -55,7 +55,7 @@ Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql with connection to my-database.my-host.com...
Launching psql session to my-database.my-host.com (attempt 1/3)...
echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --port=5432 -d db2
PGPASSWORD=my-secret-token
PGSSLMODE=require
Expand All @@ -66,7 +66,7 @@ Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql with connection to my-database.my-host.com...
Launching psql session to my-database.my-host.com (attempt 1/3)...
echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --dbname=db1 -p 3000
PGPASSWORD=my-secret-token
PGSSLMODE=require
Expand All @@ -76,6 +76,27 @@ Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql session to my-database.my-host.com (attempt 1/3)...
echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] -d db2 --port=3001
PGPASSWORD=my-secret-token
PGSSLMODE=require

=== Command should not use retries if max-retries is set to 0:
>>> [CLI] psql my-database --max-retries 0 -- --dbname=db1 -p 3000
Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql with connection to my-database.my-host.com...
echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --dbname=db1 -p 3000
PGPASSWORD=my-secret-token
PGSSLMODE=require

>>> [CLI] psql my-database --max-retries 0 -- -d db2 --port=3001
Connecting to Databricks Database Instance my-database ...
Postgres version: 14
Database instance status: AVAILABLE
Successfully fetched database credentials
Launching psql with connection to my-database.my-host.com...
echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] -d db2 --port=3001
PGPASSWORD=my-secret-token
Expand Down
4 changes: 4 additions & 0 deletions acceptance/cmd/psql/simple/script
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,7 @@ trace $CLI psql my-database -- -d db2
title "Command should use the port from extra arguments when specified:"
trace $CLI psql my-database -- --dbname=db1 -p 3000
trace $CLI psql my-database -- -d db2 --port=3001

title "Command should not use retries if max-retries is set to 0:"
trace $CLI psql my-database --max-retries 0 -- --dbname=db1 -p 3000
trace $CLI psql my-database --max-retries 0 -- -d db2 --port=3001
18 changes: 17 additions & 1 deletion cmd/psql/psql.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package psql
import (
"errors"
"fmt"
"time"

"github.com/databricks/cli/libs/cmdctx"
"github.com/databricks/cli/libs/cmdio"
Expand All @@ -26,12 +27,17 @@ func newLakebaseConnectCommand() *cobra.Command {

This command requires a psql client to be installed on your machine for the connection to work.

The command includes automatic retry logic for connection failures. You can configure the retry behavior using the flags below.

You can pass additional arguments to psql after a double-dash (--):
databricks psql my-database -- -c "SELECT * FROM my_table"
databricks psql my-database -- --echo-all -d "my-db"
`,
}

// Add retry configuration flag
cmd.Flags().Int("max-retries", 3, "Maximum number of connection retry attempts (set to 0 to disable retries)")

cmd.PreRunE = root.MustWorkspaceClient
cmd.RunE = func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
Expand Down Expand Up @@ -74,7 +80,17 @@ You can pass additional arguments to psql after a double-dash (--):
databaseInstanceName := args[0]
extraArgs := args[1:]

return lakebase.Connect(cmd.Context(), databaseInstanceName, extraArgs...)
// Read retry configuration from flags
maxRetries, _ := cmd.Flags().GetInt("max-retries")

retryConfig := lakebase.RetryConfig{
MaxRetries: maxRetries, // Retries are disables when max-retries is 0
InitialDelay: time.Second, // Fixed initial delay
MaxDelay: 10 * time.Second, // Fixed max delay
BackoffFactor: 2.0, // Fixed backoff factor
}

return lakebase.ConnectWithRetryConfig(cmd.Context(), databaseInstanceName, retryConfig, extraArgs...)
}

cmd.ValidArgsFunction = func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
Expand Down
103 changes: 95 additions & 8 deletions libs/lakebase/connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,53 @@ import (
"errors"
"fmt"
"os"
"os/exec"
"strings"
"time"

"github.com/databricks/cli/libs/cmdctx"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/exec"
execlib "github.com/databricks/cli/libs/exec"
"github.com/databricks/databricks-sdk-go/service/database"
"github.com/google/uuid"
)

func Connect(ctx context.Context, databaseInstanceName string, extraArgs ...string) error {
// RetryConfig holds configuration for connection retry behavior
type RetryConfig struct {
MaxRetries int
InitialDelay time.Duration
MaxDelay time.Duration
BackoffFactor float64
}

// attemptConnection launches psql interactively and returns an error if connection fails
func attemptConnection(ctx context.Context, args, env []string) error {
cmd := exec.CommandContext(ctx, args[0], args[1:]...)
cmd.Env = env
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr

err := cmd.Run()
if err != nil {
// Check if the error might be due to connection issues
// Since we can't capture stderr when running interactively, we check the exit code
var exitError *exec.ExitError
if errors.As(err, &exitError) {
exitCode := exitError.ExitCode()

// We do not use the Databricks SDK for checking whether the error is retryable because the call in question is not to the API
// psql returns exit code 2 for connection failures
if exitCode == 2 {
return fmt.Errorf("connection failed (retryable): psql exited with code %d", exitCode)
}
}
}

return err
}

func ConnectWithRetryConfig(ctx context.Context, databaseInstanceName string, retryConfig RetryConfig, extraArgs ...string) error {
cmdio.LogString(ctx, fmt.Sprintf("Connecting to Databricks Database Instance %s ...", databaseInstanceName))

w := cmdctx.WorkspaceClient(ctx)
Expand Down Expand Up @@ -91,11 +128,61 @@ func Connect(ctx context.Context, databaseInstanceName string, extraArgs ...stri
"PGSSLMODE=require",
)

cmdio.LogString(ctx, fmt.Sprintf("Launching psql with connection to %s...", db.ReadWriteDns))
// If retries are disabled, go directly to interactive session
if retryConfig.MaxRetries <= 0 {
cmdio.LogString(ctx, fmt.Sprintf("Launching psql with connection to %s...", db.ReadWriteDns))
return execlib.Execv(execlib.ExecvOptions{
Args: args,
Env: cmdEnv,
})
}

// Execute psql command inline
return exec.Execv(exec.ExecvOptions{
Args: args,
Env: cmdEnv,
})
// Try launching psql with retry logic
maxRetries := retryConfig.MaxRetries
delay := retryConfig.InitialDelay

var lastErr error
for attempt := range maxRetries {
if attempt > 0 {
cmdio.LogString(ctx, fmt.Sprintf("Connection attempt %d/%d failed, retrying in %v...", attempt, maxRetries, delay))

// Wait with context cancellation support
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(delay):
}

// Exponential backoff
delay = time.Duration(float64(delay) * retryConfig.BackoffFactor)
if delay > retryConfig.MaxDelay {
delay = retryConfig.MaxDelay
}
}

cmdio.LogString(ctx, fmt.Sprintf("Launching psql session to %s (attempt %d/%d)...", db.ReadWriteDns, attempt+1, maxRetries))

// Try to launch psql and capture the exit status
err := attemptConnection(ctx, args, cmdEnv)
if err == nil {
// psql exited normally (user quit)
return nil
}

lastErr = err

// Check if this is a retryable error
// We do not use the Databricks SDK for checking whether the error is retryable because the call in question is not to the API
if !strings.Contains(err.Error(), "connection failed (retryable)") {
// Non-retryable error, fail immediately
return err
}

if attempt < maxRetries {
cmdio.LogString(ctx, fmt.Sprintf("Connection failed with retryable error: %v", err))
}
}

// All retries exhausted
return fmt.Errorf("failed to connect after %d attempts, last error: %w", maxRetries, lastErr)
}
Loading
Loading