diff --git a/.wsignore b/.wsignore index 827ebf4332..d4831cd94e 100644 --- a/.wsignore +++ b/.wsignore @@ -11,6 +11,12 @@ experimental/python/docs/images/databricks-logo.svg **/*.zip **/*.whl +# new lines are recorded differently on windows and unix. +# In unix: "raw_body": "hello, world\n" +# In windows: "raw_body": "hello, world\r\n" +# In order to prevent that difference, hello.txt does not have a trailing newline. +acceptance/selftest/record_cloud/volume-io/hello.txt + # "bundle run" has trailing whitespace: acceptance/bundle/integration_whl/*/output.txt diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index e1a359e7ee..55b189d669 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -18,7 +18,6 @@ import ( "slices" "sort" "strings" - "sync" "testing" "time" "unicode/utf8" @@ -242,7 +241,7 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { if len(expanded[0]) > 0 { t.Logf("Running test with env %v", expanded[0]) } - runTest(t, dir, coverDir, repls.Clone(), config, configPath, expanded[0]) + runTest(t, dir, coverDir, repls.Clone(), config, configPath, expanded[0], inprocessMode) } else { for _, envset := range expanded { envname := strings.Join(envset, "/") @@ -250,7 +249,7 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { if !inprocessMode { t.Parallel() } - runTest(t, dir, coverDir, repls.Clone(), config, configPath, envset) + runTest(t, dir, coverDir, repls.Clone(), config, configPath, envset, inprocessMode) }) } } @@ -342,7 +341,14 @@ func getSkipReason(config *internal.TestConfig, configPath string) string { return "" } -func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsContext, config internal.TestConfig, configPath string, customEnv []string) { +func runTest(t *testing.T, + dir, coverDir string, + repls testdiff.ReplacementsContext, + config internal.TestConfig, + configPath string, + customEnv []string, + inprocessMode bool, +) { if LogConfig { configBytes, err := json.MarshalIndent(config, "", " ") require.NoError(t, err) @@ -395,16 +401,12 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont } else if isRunningOnCloud { timeout = max(timeout, config.TimeoutCloud) } - ctx, cancelFunc := context.WithTimeout(context.Background(), timeout) defer cancelFunc() args := []string{"bash", "-euo", "pipefail", EntryPointScript} cmd := exec.CommandContext(ctx, args[0], args[1:]...) - // This mutex is used to synchronize recording requests - var serverMutex sync.Mutex - - cfg, user := internal.PrepareServerAndClient(t, config, LogRequests, tmpDir, &serverMutex) + cfg, user := internal.PrepareServerAndClient(t, config, LogRequests, tmpDir) testdiff.PrepareReplacementsUser(t, &repls, user) testdiff.PrepareReplacementsWorkspaceConfig(t, &repls, cfg) diff --git a/acceptance/internal/prepare_server.go b/acceptance/internal/prepare_server.go index f9cd03216c..bb49fea183 100644 --- a/acceptance/internal/prepare_server.go +++ b/acceptance/internal/prepare_server.go @@ -14,12 +14,12 @@ import ( "time" "unicode/utf8" - sdkconfig "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/service/iam" - "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/testproxy" "github.com/databricks/cli/libs/testserver" "github.com/databricks/databricks-sdk-go" + sdkconfig "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -40,11 +40,10 @@ func isTruePtr(value *bool) bool { return value != nil && *value } -func PrepareServerAndClient(t *testing.T, config TestConfig, logRequests bool, outputDir string, mu *sync.Mutex) (*sdkconfig.Config, iam.User) { +func PrepareServerAndClient(t *testing.T, config TestConfig, logRequests bool, outputDir string) (*sdkconfig.Config, iam.User) { cloudEnv := os.Getenv("CLOUD_ENV") + recordRequests := isTruePtr(config.RecordRequests) - // If we are running on a cloud environment, use the host configured in the - // environment. if cloudEnv != "" { w, err := databricks.NewWorkspaceClient() require.NoError(t, err) @@ -52,69 +51,104 @@ func PrepareServerAndClient(t *testing.T, config TestConfig, logRequests bool, o user, err := w.CurrentUser.Me(context.Background()) require.NoError(t, err, "Failed to get current user") - return w.Config, *user - } + cfg := w.Config - recordRequests := isTruePtr(config.RecordRequests) + // If we are running in a cloud environment AND we are recording requests, + // start a dedicated server to act as a reverse proxy to a real Databricks workspace. + if recordRequests { + host, token := startProxyServer(t, logRequests, config.IncludeRequestHeaders, outputDir) + cfg = &sdkconfig.Config{ + Host: host, + Token: token, + } + } - tokenSuffix := strings.ReplaceAll(uuid.NewString(), "-", "") - token := "dbapi" + tokenSuffix + return cfg, *user + } - // If we are not recording requests, and no custom server server stubs are configured, + // If we are not recording requests, and no custom server stubs are configured, // use the default shared server. if len(config.Server) == 0 && !recordRequests { - return &sdkconfig.Config{ + // Use a unique token for each test. This allows us to maintain + // separate state for each test in fake workspaces. + tokenSuffix := strings.ReplaceAll(uuid.NewString(), "-", "") + token := "dbapi" + tokenSuffix + + cfg := &sdkconfig.Config{ Host: os.Getenv("DATABRICKS_DEFAULT_HOST"), Token: token, - }, TestUser - } + } - host := startDedicatedServer(t, config.Server, recordRequests, logRequests, config.IncludeRequestHeaders, outputDir, mu) + return cfg, TestUser + } - return &sdkconfig.Config{ + // Default case. Start a dedicated local server for the test with the server stubs configured + // as overrides. + host, token := startLocalServer(t, config.Server, recordRequests, logRequests, config.IncludeRequestHeaders, outputDir) + cfg := &sdkconfig.Config{ Host: host, Token: token, - }, TestUser + } + + // For the purposes of replacements, use testUser for local runs. + // Note, users might have overriden /api/2.0/preview/scim/v2/Me but that should not affect the replacement: + return cfg, TestUser } -func startDedicatedServer(t *testing.T, +func recordRequestsCallback(t *testing.T, includeHeaders []string, outputDir string) func(request *testserver.Request) { + mu := sync.Mutex{} + + return func(request *testserver.Request) { + mu.Lock() + defer mu.Unlock() + + req := getLoggedRequest(request, includeHeaders) + reqJson, err := json.MarshalIndent(req, "", " ") + assert.NoErrorf(t, err, "Failed to json-encode: %#v", req) + + requestsPath := filepath.Join(outputDir, "out.requests.txt") + f, err := os.OpenFile(requestsPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) + assert.NoError(t, err) + defer f.Close() + + _, err = f.WriteString(string(reqJson) + "\n") + assert.NoError(t, err) + } +} + +func logResponseCallback(t *testing.T) func(request *testserver.Request, response *testserver.EncodedResponse) { + mu := sync.Mutex{} + + return func(request *testserver.Request, response *testserver.EncodedResponse) { + mu.Lock() + defer mu.Unlock() + + t.Logf("%d %s %s\n%s\n%s", + response.StatusCode, request.Method, request.URL, + formatHeadersAndBody("> ", request.Headers, request.Body), + formatHeadersAndBody("# ", response.Headers, response.Body), + ) + } +} + +func startLocalServer(t *testing.T, stubs []ServerStub, recordRequests bool, logRequests bool, includeHeaders []string, outputDir string, - mu *sync.Mutex, -) string { +) (string, string) { s := testserver.New(t) + // Record API requests in out.requests.txt if RecordRequests is true + // in test.toml if recordRequests { - requestsPath := filepath.Join(outputDir, "out.requests.txt") - s.RequestCallback = func(request *testserver.Request) { - req := getLoggedRequest(request, includeHeaders) - reqJson, err := json.MarshalIndent(req, "", " ") - - mu.Lock() - defer mu.Unlock() - - assert.NoErrorf(t, err, "Failed to json-encode: %#v", req) - - f, err := os.OpenFile(requestsPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) - assert.NoError(t, err) - defer f.Close() - - _, err = f.WriteString(string(reqJson) + "\n") - assert.NoError(t, err) - } + s.RequestCallback = recordRequestsCallback(t, includeHeaders, outputDir) } + // Log API responses if the -logrequests flag is set. if logRequests { - s.ResponseCallback = func(request *testserver.Request, response *testserver.EncodedResponse) { - t.Logf("%d %s %s\n%s\n%s", - response.StatusCode, request.Method, request.URL, - formatHeadersAndBody("> ", request.Headers, request.Body), - formatHeadersAndBody("# ", response.Headers, response.Body), - ) - } + s.ResponseCallback = logResponseCallback(t) } for ind := range stubs { @@ -132,8 +166,25 @@ func startDedicatedServer(t *testing.T, // The earliest handlers take precedence, add default handlers last addDefaultHandlers(s) + return s.URL, "dbapi123" +} + +func startProxyServer(t *testing.T, + logRequests bool, + includeHeaders []string, + outputDir string, +) (string, string) { + s := testproxy.New(t) + + // Always record requests for a proxy server. + s.RequestCallback = recordRequestsCallback(t, includeHeaders, outputDir) + + // Log API responses if the -logrequests flag is set. + if logRequests { + s.ResponseCallback = logResponseCallback(t) + } - return s.URL + return s.URL, "dbapi1234" } type LoggedRequest struct { diff --git a/acceptance/selftest/record_cloud/basic/out.requests.txt b/acceptance/selftest/record_cloud/basic/out.requests.txt new file mode 100644 index 0000000000..ec41b6c5f6 --- /dev/null +++ b/acceptance/selftest/record_cloud/basic/out.requests.txt @@ -0,0 +1,4 @@ +{ + "method": "GET", + "path": "/api/2.0/preview/scim/v2/Me" +} diff --git a/acceptance/selftest/record_cloud/basic/output.txt b/acceptance/selftest/record_cloud/basic/output.txt new file mode 100644 index 0000000000..7f0b941262 --- /dev/null +++ b/acceptance/selftest/record_cloud/basic/output.txt @@ -0,0 +1,3 @@ + +>>> [CLI] current-user me +"[USERNAME]" diff --git a/acceptance/selftest/record_cloud/basic/script b/acceptance/selftest/record_cloud/basic/script new file mode 100644 index 0000000000..5d8c2eac19 --- /dev/null +++ b/acceptance/selftest/record_cloud/basic/script @@ -0,0 +1,2 @@ +# Proxy server successfully records a requests and returns a response. +trace $CLI current-user me | jq .name.givenName diff --git a/acceptance/selftest/record_cloud/error/out.requests.txt b/acceptance/selftest/record_cloud/error/out.requests.txt new file mode 100644 index 0000000000..30a1cb329c --- /dev/null +++ b/acceptance/selftest/record_cloud/error/out.requests.txt @@ -0,0 +1,4 @@ +{ + "method": "GET", + "path": "/api/2.2/jobs/get" +} diff --git a/acceptance/selftest/record_cloud/error/output.txt b/acceptance/selftest/record_cloud/error/output.txt new file mode 100644 index 0000000000..f911ca508f --- /dev/null +++ b/acceptance/selftest/record_cloud/error/output.txt @@ -0,0 +1,5 @@ + +>>> [CLI] jobs get 1234 +Error: Job 1234 does not exist. + +Exit code: 1 diff --git a/acceptance/selftest/record_cloud/error/script b/acceptance/selftest/record_cloud/error/script new file mode 100644 index 0000000000..662a0732cc --- /dev/null +++ b/acceptance/selftest/record_cloud/error/script @@ -0,0 +1,2 @@ +# Proxy server should successfully return non 200 responses. +errcode trace $CLI jobs get 1234 diff --git a/acceptance/selftest/record_cloud/pipeline-crud/output.txt b/acceptance/selftest/record_cloud/pipeline-crud/output.txt new file mode 100644 index 0000000000..2aef802cd9 --- /dev/null +++ b/acceptance/selftest/record_cloud/pipeline-crud/output.txt @@ -0,0 +1,80 @@ + +=== Create a pipeline + +>>> print_requests +{ + "method": "POST", + "path": "/api/2.0/pipelines", + "body": { + "allow_duplicate_names": true, + "libraries": [ + { + "file": { + "path": "/whatever.py" + } + } + ], + "name": "test-pipeline-1" + } +} + +=== Get the pipeline +>>> [CLI] pipelines get [UUID] +"test-pipeline-1" + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/pipelines/[UUID]" +} + +=== Update the pipeline +>>> [CLI] pipelines update [UUID] --json @pipeline2.json + +>>> print_requests +{ + "method": "PUT", + "path": "/api/2.0/pipelines/[UUID]", + "body": { + "allow_duplicate_names": true, + "libraries": [ + { + "file": { + "path": "/whatever.py" + } + } + ], + "name": "test-pipeline-2" + } +} + +=== Verify the update +>>> [CLI] pipelines get [UUID] +"test-pipeline-2" + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/pipelines/[UUID]" +} + +=== Delete the pipeline +>>> [CLI] pipelines delete [UUID] + +>>> print_requests +{ + "method": "DELETE", + "path": "/api/2.0/pipelines/[UUID]" +} + +=== Verify the deletion +>>> [CLI] pipelines get [UUID] +Error: The specified pipeline [UUID] was not found. + +Exit code: 1 + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/pipelines/[UUID]" +} diff --git a/acceptance/selftest/record_cloud/pipeline-crud/pipeline1.json b/acceptance/selftest/record_cloud/pipeline-crud/pipeline1.json new file mode 100644 index 0000000000..dae02d2fd5 --- /dev/null +++ b/acceptance/selftest/record_cloud/pipeline-crud/pipeline1.json @@ -0,0 +1,11 @@ +{ + "name": "test-pipeline-1", + "allow_duplicate_names": true, + "libraries": [ + { + "file": { + "path": "/whatever.py" + } + } + ] +} diff --git a/acceptance/selftest/record_cloud/pipeline-crud/pipeline2.json b/acceptance/selftest/record_cloud/pipeline-crud/pipeline2.json new file mode 100644 index 0000000000..61f87b1f7a --- /dev/null +++ b/acceptance/selftest/record_cloud/pipeline-crud/pipeline2.json @@ -0,0 +1,11 @@ +{ + "name": "test-pipeline-2", + "allow_duplicate_names": true, + "libraries": [ + { + "file": { + "path": "/whatever.py" + } + } + ] +} diff --git a/acceptance/selftest/record_cloud/pipeline-crud/script b/acceptance/selftest/record_cloud/pipeline-crud/script new file mode 100644 index 0000000000..85de4aa322 --- /dev/null +++ b/acceptance/selftest/record_cloud/pipeline-crud/script @@ -0,0 +1,36 @@ +print_requests() { + cat out.requests.txt + rm out.requests.txt +} + +# Verify that the entire crud lifecycle of a pipeline works. +title "Create a pipeline" +export pipeline_id=$($CLI pipelines create --json @pipeline1.json | jq -r .pipeline_id) +echo "" + +trace print_requests + +title "Get the pipeline" +trace $CLI pipelines get $pipeline_id | jq .name + +trace print_requests + +title "Update the pipeline" +trace $CLI pipelines update $pipeline_id --json @pipeline2.json + +trace print_requests + +title "Verify the update" +trace $CLI pipelines get $pipeline_id | jq .name + +trace print_requests + +title "Delete the pipeline" +trace $CLI pipelines delete $pipeline_id + +trace print_requests + +title "Verify the deletion" +errcode trace $CLI pipelines get $pipeline_id + +trace print_requests diff --git a/acceptance/selftest/record_cloud/test.toml b/acceptance/selftest/record_cloud/test.toml new file mode 100644 index 0000000000..20d0343612 --- /dev/null +++ b/acceptance/selftest/record_cloud/test.toml @@ -0,0 +1,3 @@ +Cloud = true +Local = false +RecordRequests = true diff --git a/acceptance/selftest/record_cloud/volume-io/hello.txt b/acceptance/selftest/record_cloud/volume-io/hello.txt new file mode 100644 index 0000000000..8c01d89ae0 --- /dev/null +++ b/acceptance/selftest/record_cloud/volume-io/hello.txt @@ -0,0 +1 @@ +hello, world \ No newline at end of file diff --git a/acceptance/selftest/record_cloud/volume-io/output.txt b/acceptance/selftest/record_cloud/volume-io/output.txt new file mode 100644 index 0000000000..8407516403 --- /dev/null +++ b/acceptance/selftest/record_cloud/volume-io/output.txt @@ -0,0 +1,155 @@ + +>>> [CLI] schemas create schema-[UNIQUE_NAME] main +{ + "full_name": "main.schema-[UNIQUE_NAME]", + "owner": "[USERNAME]" +} + +>>> print_requests +{ + "method": "POST", + "path": "/api/2.1/unity-catalog/schemas", + "body": { + "catalog_name": "main", + "name": "schema-[UNIQUE_NAME]" + } +} + +>>> [CLI] schemas get main.schema-[UNIQUE_NAME] +{ + "full_name": "main.schema-[UNIQUE_NAME]", + "owner": "[USERNAME]" +} + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.1/unity-catalog/schemas/main.schema-[UNIQUE_NAME]" +} + +>>> [CLI] volumes create main schema-[UNIQUE_NAME] volume-[UNIQUE_NAME] MANAGED +{ + "full_name": "main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME]", + "owner": "[USERNAME]" +} + +>>> print_requests +{ + "method": "POST", + "path": "/api/2.1/unity-catalog/volumes", + "body": { + "catalog_name": "main", + "name": "volume-[UNIQUE_NAME]", + "schema_name": "schema-[UNIQUE_NAME]", + "volume_type": "MANAGED" + } +} + +>>> [CLI] volumes read main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME] +{ + "full_name": "main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME]", + "owner": "[USERNAME]" +} + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.1/unity-catalog/volumes/main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME]" +} + +>>> [CLI] fs cp ./hello.txt dbfs:/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME] +./hello.txt -> dbfs:/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]/hello.txt + +>>> print_requests +{ + "method": "HEAD", + "path": "/api/2.0/fs/directories/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]" +} +{ + "method": "HEAD", + "path": "/api/2.0/fs/directories/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]" +} +{ + "method": "PUT", + "path": "/api/2.0/fs/files/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]/hello.txt", + "raw_body": "hello, world" +} + +>>> [CLI] fs ls dbfs:/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME] +hello.txt + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/fs/directories/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]" +} + +>>> [CLI] fs cat dbfs:/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]/hello.txt +hello, world +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/fs/files/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]/hello.txt" +} + +>>> [CLI] fs rm dbfs:/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]/hello.txt + +>>> print_requests +{ + "method": "HEAD", + "path": "/api/2.0/fs/directories/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]/hello.txt" +} +{ + "method": "HEAD", + "path": "/api/2.0/fs/files/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]/hello.txt" +} +{ + "method": "DELETE", + "path": "/api/2.0/fs/files/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]/hello.txt" +} + +>>> [CLI] fs ls dbfs:/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME] + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/fs/directories/Volumes/main/schema-[UNIQUE_NAME]/volume-[UNIQUE_NAME]" +} + +>>> [CLI] volumes delete main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME] + +>>> print_requests +{ + "method": "DELETE", + "path": "/api/2.1/unity-catalog/volumes/main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME]" +} + +>>> [CLI] volumes read main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME] +Error: Volume 'main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME]' does not exist. + +Exit code: 1 + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.1/unity-catalog/volumes/main.schema-[UNIQUE_NAME].volume-[UNIQUE_NAME]" +} + +>>> [CLI] schemas delete main.schema-[UNIQUE_NAME] + +>>> print_requests +{ + "method": "DELETE", + "path": "/api/2.1/unity-catalog/schemas/main.schema-[UNIQUE_NAME]" +} + +>>> [CLI] schemas get main.schema-[UNIQUE_NAME] +Error: Schema 'main.schema-[UNIQUE_NAME]' does not exist. + +Exit code: 1 + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.1/unity-catalog/schemas/main.schema-[UNIQUE_NAME]" +} diff --git a/acceptance/selftest/record_cloud/volume-io/script b/acceptance/selftest/record_cloud/volume-io/script new file mode 100644 index 0000000000..741eec1957 --- /dev/null +++ b/acceptance/selftest/record_cloud/volume-io/script @@ -0,0 +1,56 @@ +print_requests() { + cat out.requests.txt + rm out.requests.txt +} + +trace $CLI schemas create schema-$UNIQUE_NAME main | jq '{full_name, owner}' + +trace print_requests + +trace $CLI schemas get main.schema-$UNIQUE_NAME | jq '{full_name, owner}' + +trace print_requests + +trace $CLI volumes create main schema-$UNIQUE_NAME volume-$UNIQUE_NAME MANAGED | jq '{full_name, owner}' + +trace print_requests + +trace $CLI volumes read main.schema-$UNIQUE_NAME.volume-$UNIQUE_NAME | jq '{full_name, owner}' + +trace print_requests + +trace $CLI fs cp ./hello.txt dbfs:/Volumes/main/schema-$UNIQUE_NAME/volume-$UNIQUE_NAME + +trace print_requests + +trace $CLI fs ls dbfs:/Volumes/main/schema-$UNIQUE_NAME/volume-$UNIQUE_NAME + +trace print_requests + +trace $CLI fs cat dbfs:/Volumes/main/schema-$UNIQUE_NAME/volume-$UNIQUE_NAME/hello.txt + +trace print_requests + +trace $CLI fs rm dbfs:/Volumes/main/schema-$UNIQUE_NAME/volume-$UNIQUE_NAME/hello.txt + +trace print_requests + +trace $CLI fs ls dbfs:/Volumes/main/schema-$UNIQUE_NAME/volume-$UNIQUE_NAME + +trace print_requests + +trace $CLI volumes delete main.schema-$UNIQUE_NAME.volume-$UNIQUE_NAME + +trace print_requests + +errcode trace $CLI volumes read main.schema-$UNIQUE_NAME.volume-$UNIQUE_NAME + +trace print_requests + +trace $CLI schemas delete main.schema-$UNIQUE_NAME + +trace print_requests + +errcode trace $CLI schemas get main.schema-$UNIQUE_NAME + +trace print_requests diff --git a/acceptance/selftest/record_cloud/volume-io/test.toml b/acceptance/selftest/record_cloud/volume-io/test.toml new file mode 100644 index 0000000000..9f65b59bbd --- /dev/null +++ b/acceptance/selftest/record_cloud/volume-io/test.toml @@ -0,0 +1 @@ +RequiresUnityCatalog = true diff --git a/acceptance/selftest/record_cloud/workspace-file-io/hello.txt b/acceptance/selftest/record_cloud/workspace-file-io/hello.txt new file mode 100644 index 0000000000..4b5fa63702 --- /dev/null +++ b/acceptance/selftest/record_cloud/workspace-file-io/hello.txt @@ -0,0 +1 @@ +hello, world diff --git a/acceptance/selftest/record_cloud/workspace-file-io/output.txt b/acceptance/selftest/record_cloud/workspace-file-io/output.txt new file mode 100644 index 0000000000..39b5a2cf8d --- /dev/null +++ b/acceptance/selftest/record_cloud/workspace-file-io/output.txt @@ -0,0 +1,124 @@ + +=== create a folder +>>> [CLI] workspace mkdirs /Users/[USERNAME]/[UNIQUE_NAME] + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/preview/scim/v2/Me" +} +{ + "method": "POST", + "path": "/api/2.0/workspace/mkdirs", + "body": { + "path": "/Users/[USERNAME]/[UNIQUE_NAME]" + } +} + +=== upload a file +>>> [CLI] workspace import /Users/[USERNAME]/[UNIQUE_NAME]/hello.txt --format AUTO --file ./hello.txt + +>>> print_requests +{ + "method": "POST", + "path": "/api/2.0/workspace/import", + "body": { + "content": "[HELLO-WORLD]", + "format": "AUTO", + "path": "/Users/[USERNAME]/[UNIQUE_NAME]/hello.txt" + } +} + +=== list the folder +>>> [CLI] workspace list /Users/[USERNAME]/[UNIQUE_NAME] --output json +{ + "path": "/Users/[USERNAME]/[UNIQUE_NAME]/hello.txt", + "object_type": "FILE" +} + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/workspace/list" +} + +=== stat the file +>>> [CLI] workspace get-status /Users/[USERNAME]/[UNIQUE_NAME]/hello.txt +{ + "path": "/Users/[USERNAME]/[UNIQUE_NAME]/hello.txt", + "object_type": "FILE" +} + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/workspace/get-status" +} + +=== download the file +>>> [CLI] workspace export /Users/[USERNAME]/[UNIQUE_NAME]/hello.txt --format AUTO --file ./hello2.txt + +>>> cat hello2.txt +hello, world + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/workspace/export" +} + +=== delete the file +>>> [CLI] workspace delete /Users/[USERNAME]/[UNIQUE_NAME]/hello.txt + +>>> print_requests +{ + "method": "POST", + "path": "/api/2.0/workspace/delete", + "body": { + "path": "/Users/[USERNAME]/[UNIQUE_NAME]/hello.txt" + } +} + +=== stat the file again +>>> [CLI] workspace get-status /Users/[USERNAME]/[UNIQUE_NAME]/hello.txt +Error: Path (/Users/[USERNAME]/[UNIQUE_NAME]/hello.txt) doesn't exist. + +Exit code: 1 + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/workspace/get-status" +} + +=== list the folder +>>> [CLI] workspace list /Users/[USERNAME]/[UNIQUE_NAME] +ID Type Language Path + +=== delete the folder +>>> [CLI] workspace delete /Users/[USERNAME]/[UNIQUE_NAME] + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/workspace/list" +} +{ + "method": "POST", + "path": "/api/2.0/workspace/delete", + "body": { + "path": "/Users/[USERNAME]/[UNIQUE_NAME]" + } +} + +=== list the folder again +>>> [CLI] workspace list /Users/[USERNAME]/[UNIQUE_NAME] +Error: Path (/Users/[USERNAME]/[UNIQUE_NAME]) doesn't exist. + +Exit code: 1 + +>>> print_requests +{ + "method": "GET", + "path": "/api/2.0/workspace/list" +} diff --git a/acceptance/selftest/record_cloud/workspace-file-io/script b/acceptance/selftest/record_cloud/workspace-file-io/script new file mode 100644 index 0000000000..990b785691 --- /dev/null +++ b/acceptance/selftest/record_cloud/workspace-file-io/script @@ -0,0 +1,62 @@ +export username=$($CLI current-user me | jq -r .userName) + +# MSYS2 automatically converts absolute paths like /Users/$username/$UNIQUE_NAME to +# C:/Program Files/Git/Users/$username/UNIQUE_NAME before passing it to the CLI +# Setting this environment variable prevents that conversion on windows. +export MSYS_NO_PATHCONV=1 + +print_requests() { + cat out.requests.txt + rm out.requests.txt +} + +title "create a folder" +trace $CLI workspace mkdirs /Users/$username/$UNIQUE_NAME + +trace print_requests + +title "upload a file" +trace $CLI workspace import /Users/$username/$UNIQUE_NAME/hello.txt --format AUTO --file ./hello.txt + +trace print_requests + +title "list the folder" +trace $CLI workspace list /Users/$username/$UNIQUE_NAME --output json | jq '.[] | {path, object_type}' + +trace print_requests + +title "stat the file" +trace $CLI workspace get-status /Users/$username/$UNIQUE_NAME/hello.txt | jq '{path, object_type}' + +trace print_requests + +title "download the file" +trace $CLI workspace export /Users/$username/$UNIQUE_NAME/hello.txt --format AUTO --file ./hello2.txt + +trace cat hello2.txt +rm hello2.txt + +trace print_requests + +title "delete the file" +trace $CLI workspace delete /Users/$username/$UNIQUE_NAME/hello.txt + +trace print_requests + +title "stat the file again" +errcode trace $CLI workspace get-status /Users/$username/$UNIQUE_NAME/hello.txt + +trace print_requests + +title "list the folder" +trace $CLI workspace list /Users/$username/$UNIQUE_NAME + +title "delete the folder" +trace $CLI workspace delete /Users/$username/$UNIQUE_NAME + +trace print_requests + +title "list the folder again" +errcode trace $CLI workspace list /Users/$username/$UNIQUE_NAME + +trace print_requests diff --git a/acceptance/selftest/record_cloud/workspace-file-io/test.toml b/acceptance/selftest/record_cloud/workspace-file-io/test.toml new file mode 100644 index 0000000000..fa35335247 --- /dev/null +++ b/acceptance/selftest/record_cloud/workspace-file-io/test.toml @@ -0,0 +1,9 @@ +# hello, world\n base64 encoded in unix +[[Repls]] +Old = "aGVsbG8sIHdvcmxkCg==" +New = "[HELLO-WORLD]" + +# hello, world\n base64 encoded in windows +[[Repls]] +Old = "aGVsbG8sIHdvcmxkDQo=" +New = "[HELLO-WORLD]" diff --git a/libs/testproxy/server.go b/libs/testproxy/server.go new file mode 100644 index 0000000000..b5dfbf2f10 --- /dev/null +++ b/libs/testproxy/server.go @@ -0,0 +1,153 @@ +package testproxy + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/testserver" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/client" + "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type ProxyServer struct { + *httptest.Server + + t testutil.TestingT + + apiClient *client.DatabricksClient + RequestCallback func(request *testserver.Request) + ResponseCallback func(request *testserver.Request, response *testserver.EncodedResponse) +} + +// This creates a reverse proxy server that sits in front of a real Databricks +// workspace. This is useful for recording API requests and responses in +// integration tests. +// +// Note: We cannot directly proxy the request from a localhost URL to a real +// workspace as is. This is because auth resolution in the Databricks SDK relies +// what the URL actually looks like to determine the auth method to use. +// For example, in OAuth flows, the SDK can make requests to different Microsoft +// OAuth endpoints based on the nature of the URL. +// For reference, see: +// https://github.com/databricks/databricks-sdk-go/blob/79e4b3a6e9b0b7dcb1af9ad4025deb447b01d933/common/environment/environments.go#L57 +func New(t testutil.TestingT) *ProxyServer { + s := &ProxyServer{ + t: t, + } + + // Create an API client using the current authentication context. + // In CI test environments this would read the appropriate environment + // variables. + var err error + s.apiClient, err = client.New(&config.Config{}) + require.NoError(t, err) + + // Set up the proxy handler as the default handler for all requests. + server := httptest.NewServer(http.HandlerFunc(s.proxyToCloud)) + t.Cleanup(server.Close) + + s.Server = server + return s +} + +func (s *ProxyServer) reqBody(r testserver.Request) any { + // The SDK expects the query parameters to be specified in the "request body" + // argument for GET, DELETE, and HEAD requests in the .Do method. + if r.Method == "GET" || r.Method == "DELETE" || r.Method == "HEAD" { + queryParams := make(map[string]any) + for k, v := range r.URL.Query() { + queryParams[k] = v[0] + } + return queryParams + } + + // Otherwise, return the raw body. + return r.Body +} + +func (s *ProxyServer) proxyToCloud(w http.ResponseWriter, r *http.Request) { + request := testserver.NewRequest(s.t, r, nil) + if s.RequestCallback != nil { + s.RequestCallback(&request) + } + + headers := make(map[string]string) + for k, v := range r.Header { + // Authorization headers will be set by the SDK. Do not pass them along here. + if k == "Authorization" { + continue + } + // The default HTTP client in Go sets the Accept-Encoding header to + // "gzip". Since it is originally meant to be read by the server and + // will be set again when the SDK makes the request to the workspace, do + // not pass it along here. + if k == "Accept-Encoding" { + continue + } + headers[k] = v[0] + } + + queryParams := make(map[string]any) + for k, v := range r.URL.Query() { + queryParams[k] = v[0] + } + + reqBody := s.reqBody(request) + respBody := bytes.Buffer{} + err := s.apiClient.Do(context.Background(), r.Method, r.URL.Path, headers, queryParams, reqBody, &respBody) + + var encodedResponse *testserver.EncodedResponse + + // API errors from the SDK are expected to be of the type [apierr.APIError]. If we + // get an API error then parse the error and forward it back to the client + // in an appropriate format. + apiErr := &apierr.APIError{} + if errors.As(err, &apiErr) { + body := map[string]string{ + "error_code": apiErr.ErrorCode, + "message": apiErr.Message, + } + + b, err := json.Marshal(body) + assert.NoError(s.t, err) + + encodedResponse = &testserver.EncodedResponse{ + StatusCode: apiErr.StatusCode, + Body: b, + } + } + + // Something else went wrong. + if encodedResponse == nil && err != nil { + encodedResponse = &testserver.EncodedResponse{ + StatusCode: 500, + Body: []byte(err.Error()), + } + } + + // Successful response + if encodedResponse == nil { + encodedResponse = &testserver.EncodedResponse{ + StatusCode: 200, + Body: respBody.Bytes(), + } + } + + // Send response to client. + w.WriteHeader(encodedResponse.StatusCode) + + _, err = w.Write(encodedResponse.Body) + assert.NoError(s.t, err) + + if s.ResponseCallback != nil { + s.ResponseCallback(&request, encodedResponse) + } +}