diff --git a/third_party/terraform/tests/resource_dataflow_flex_template_job_test.go.erb b/third_party/terraform/tests/resource_dataflow_flex_template_job_test.go.erb index d1775c73f1ed..962ff13393bd 100644 --- a/third_party/terraform/tests/resource_dataflow_flex_template_job_test.go.erb +++ b/third_party/terraform/tests/resource_dataflow_flex_template_job_test.go.erb @@ -10,9 +10,9 @@ import ( ) func TestAccDataflowFlexTemplateJob_basic(t *testing.T) { - // This resource uses custom retry logic that cannot be sped up without - // modifying the actual resource - skipIfVcr(t) + // This resource uses custom retry logic that cannot be sped up without + // modifying the actual resource + skipIfVcr(t) t.Parallel() randStr := randString(t, 10) @@ -25,7 +25,7 @@ func TestAccDataflowFlexTemplateJob_basic(t *testing.T) { CheckDestroy: testAccCheckDataflowJobDestroyProducer(t), Steps: []resource.TestStep{ { - Config: testAccDataflowFlowFlexTemplateJob_basic(bucket, job), + Config: testAccDataflowFlexTemplateJob_basic(bucket, job), Check: resource.ComposeTestCheckFunc( testAccDataflowJobExists(t, "google_dataflow_flex_template_job.big_data"), ), @@ -34,8 +34,95 @@ func TestAccDataflowFlexTemplateJob_basic(t *testing.T) { }) } +func TestAccDataflowFlexTemplateJob_withServiceAccount(t *testing.T) { + // Dataflow responses include serialized java classes and bash commands + // This makes body comparison infeasible + skipIfVcr(t) + t.Parallel() + + randStr := randString(t, 10) + bucket := "tf-test-dataflow-gcs-" + randStr + job := "tf-test-dataflow-job-" + randStr + accountId := "tf-test-dataflow-sa" + randStr + zone := "us-central1-b" + + vcrTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckDataflowJobDestroyProducer(t), + Steps: []resource.TestStep{ + { + Config: testAccDataflowFlexTemplateJob_serviceAccount(bucket, job, accountId, zone), + Check: resource.ComposeTestCheckFunc( + testAccDataflowJobExists(t, "google_dataflow_flex_template_job.big_data"), + testAccDataflowFlexTemplateJobHasServiceAccount(t, "google_dataflow_flex_template_job.big_data", accountId, zone), + ), + }, + }, + }) +} + +func testAccDataflowFlexTemplateJobHasServiceAccount(t *testing.T, res, expectedId, zone string) resource.TestCheckFunc { + return func(s *terraform.State) error { + instance, err := testAccDataflowFlexTemplateJobGetGeneratedInstance(t, s, res, zone) + if err != nil { + return fmt.Errorf("Error getting dataflow job instance: %s", err) + } + accounts := instance.ServiceAccounts + if len(accounts) != 1 { + return fmt.Errorf("Found multiple service accounts (%d) for dataflow job %q, expected 1", len(accounts), res) + } + actualId := strings.Split(accounts[0].Email, "@")[0] + if expectedId != actualId { + return fmt.Errorf("service account mismatch, expected account ID = %q, actual email = %q", expectedId, accounts[0].Email) + } + return nil + } +} + +func testAccDataflowFlexTemplateJobGetGeneratedInstance(t *testing.T, s *terraform.State, res, zone string) (*compute.Instance, error) { + rs, ok := s.RootModule().Resources[res] + if !ok { + return nil, fmt.Errorf("resource %q not in state", res) + } + if rs.Primary.ID == "" { + return nil, fmt.Errorf("resource %q does not have an ID set", res) + } + filter := fmt.Sprintf("labels.goog-dataflow-job-id = %s", rs.Primary.ID) + + config := googleProviderConfig(t) + + var instance *compute.Instance + + err := resource.Retry(1*time.Minute, func() *resource.RetryError { + instances, rerr := config.NewComputeClient(config.userAgent).Instances. + List(config.Project, zone). + Filter(filter). + MaxResults(2). + Do() + if rerr != nil { + return resource.NonRetryableError(rerr) + } + if len(instances.Items) == 0 { + return resource.RetryableError(fmt.Errorf("no instance found for dataflow job %q", rs.Primary.ID)) + } + if len(instances.Items) > 1 { + return resource.NonRetryableError(fmt.Errorf("Wrong number of matching instances for dataflow job: %s, %d", rs.Primary.ID, len(instances.Items))) + } + instance = instances.Items[0] + if instance == nil { + return resource.NonRetryableError(fmt.Errorf("invalid instance")) + } + return nil + }) + if err != nil { + return nil, err + } + return instance, nil +} + // note: this config creates a job that doesn't actually do anything -func testAccDataflowFlowFlexTemplateJob_basic(bucket, job string) string { +func testAccDataflowFlexTemplateJob_basic(bucket, job string) string { return fmt.Sprintf(` resource "google_storage_bucket" "temp" { name = "%s" @@ -43,9 +130,9 @@ resource "google_storage_bucket" "temp" { } resource "google_storage_bucket_object" "flex_template" { - name = "flex_template.json" - bucket = google_storage_bucket.temp.name - content = < diff --git a/third_party/terraform/website/docs/r/dataflow_flex_template_job.html.markdown b/third_party/terraform/website/docs/r/dataflow_flex_template_job.html.markdown index f209114c3ec9..598dda663d38 100644 --- a/third_party/terraform/website/docs/r/dataflow_flex_template_job.html.markdown +++ b/third_party/terraform/website/docs/r/dataflow_flex_template_job.html.markdown @@ -29,14 +29,14 @@ resource "google_dataflow_flex_template_job" "big_data_job" { ## Note on "destroy" / "apply" There are many types of Dataflow jobs. Some Dataflow jobs run constantly, -getting new data from (e.g.) a GCS bucket, and outputting data continuously. +getting new data from (e.g.) a GCS bucket, and outputting data continuously. Some jobs process a set amount of data then terminate. All jobs can fail while running due to programming errors or other issues. In this way, Dataflow jobs are different from most other Terraform / Google resources. The Dataflow resource is considered 'existing' while it is in a nonterminal state. If it reaches a terminal state (e.g. 'FAILED', 'COMPLETE', -'CANCELLED'), it will be recreated on the next 'apply'. This is as expected for +'CANCELLED'), it will be recreated on the next 'apply'. This is as expected for jobs which run continuously, but may surprise users who use this resource for other kinds of Dataflow jobs. @@ -60,15 +60,16 @@ Template. - - - * `parameters` - (Optional) Key/Value pairs to be passed to the Dataflow job (as -used in the template). +used in the template). Additional [pipeline options](https://cloud.google.com/dataflow/docs/guides/specifying-exec-params#setting-other-cloud-dataflow-pipeline-options) +such as `serviceAccount`, `workerMachineType`, etc can be specified here. * `labels` - (Optional) User labels to be specified for the job. Keys and values should follow the restrictions specified in the [labeling restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions) page. **Note**: This field is marked as deprecated in Terraform as the API does not currently -support adding labels. +support adding labels. **NOTE**: Google-provided Dataflow templates often provide default labels that begin with `goog-dataflow-provided`. Unless explicitly set in config, these -labels will be ignored to prevent diffs on re-apply. +labels will be ignored to prevent diffs on re-apply. * `on_delete` - (Optional) One of "drain" or "cancel". Specifies behavior of deletion during `terraform destroy`. See above note. @@ -76,6 +77,8 @@ deletion during `terraform destroy`. See above note. * `project` - (Optional) The project in which the resource belongs. If it is not provided, the provider project is used. +* `region` - (Optional) The region in which the created job should run. + ## Attributes Reference In addition to the arguments listed above, the following computed attributes are exported: