From 139e3670cbba196a3d41af74993c78c5887a2e92 Mon Sep 17 00:00:00 2001 From: qaziashikin Date: Wed, 3 Jun 2026 20:38:57 -0400 Subject: [PATCH 1/5] Fix IDC domain S3 path resolution --- .../sagemaker_unified_studio_notebook.py | 43 ++++++++++++------- .../test_sagemaker_unified_studio_notebook.py | 36 ++++++++++++++-- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py b/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py index 20ed57b3a2be3..90b14d5c400a0 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py @@ -247,21 +247,26 @@ def _handle_status( error_message = execution_message raise RuntimeError(error_message) - def get_project_s3_path(self, domain_identifier: str, project_id: str) -> str: + def get_project_s3_path(self, domain_identifier: str, project_id: str) -> tuple[str, str]: """ - Look up the S3 bucket path for a SageMaker Unified Studio project. + Look up the S3 location for a SageMaker Unified Studio project. - The bucket path is read from the ``s3BucketPath`` provisioned resource of - the project's default ("Tooling") environment via the DataZone APIs: - ``GetEnvironment(GetProjectDefaultEnvironment(...))``. This mirrors how - SageMaker Unified Studio resolves the project bucket, and accommodates projects - whose bucket name does not follow the - ``amazon-sagemaker-{account_id}-{region}-{project_id}`` template (for - example, BYOR-bucket projects). + The bucket and key prefix are read from the ``s3BucketPath`` provisioned + resource of the project's default ("Tooling") environment via the + DataZone APIs. This mirrors how SageMaker Unified Studio resolves the + project bucket and accommodates projects whose bucket name does not + follow the ``amazon-sagemaker-{account_id}-{region}-{project_id}`` + template (for example, BYOR-bucket projects). :param domain_identifier: The ID of the DataZone domain. :param project_id: The ID of the DataZone project. - :return: The S3 bucket name for the project. + :return: A ``(bucket, prefix)`` tuple. ``bucket`` is the S3 bucket name. + ``prefix`` is the path component of the project's + ``s3BucketPath`` (with no leading or trailing ``/``) — typically + ``""`` for IAM domains and + ``"//"`` for IDC domains. + Callers should prepend ``prefix`` to any S3 key they construct so + writes/reads stay within the IAM scope of the project's role. :raises RuntimeError: If the default tooling environment or the ``s3BucketPath`` provisioned resource cannot be found. """ @@ -277,7 +282,9 @@ def get_project_s3_path(self, domain_identifier: str, project_id: str) -> str: f"environment {environment_id} for project {project_id} in domain " f"{domain_identifier}" ) - # value looks like "s3:///"; return the bucket name only. + # value looks like "s3://" (IAM) or + # "s3://///" (IDC). Return both + # parts so callers can construct project-scoped keys. parts = urlparse(value, allow_fragments=False) bucket = parts.netloc if not bucket: @@ -286,7 +293,8 @@ def get_project_s3_path(self, domain_identifier: str, project_id: str) -> str: f"'{value}' in default tooling environment {environment_id} for " f"project {project_id} in domain {domain_identifier}" ) - return bucket + prefix = parts.path.strip("/") + return bucket, prefix raise RuntimeError( f"s3BucketPath provisioned resource not found in default tooling environment " @@ -419,10 +427,10 @@ def get_notebook_outputs( """ log = logging.getLogger(__name__) try: - bucket = self.get_project_s3_path(domain_identifier, owning_project_identifier) + bucket, prefix = self.get_project_s3_path(domain_identifier, owning_project_identifier) except Exception: log.warning( - "Failed to resolve project S3 bucket for project %s in domain %s, " + "Failed to resolve project S3 location for project %s in domain %s, " "skipping notebook outputs read.", owning_project_identifier, domain_identifier, @@ -430,7 +438,12 @@ def get_notebook_outputs( ) return {} - key = f"sys/notebooks/{notebook_identifier}/runs/{notebook_run_id}/notebook_outputs.json" + # IDC domains have a non-empty prefix (e.g. "//") + # and the project role's IAM policy only allows S3 reads under that prefix. + # IAM domains have prefix == "" and the key is unchanged from the + # legacy bucket-root layout. + run_key = f"sys/notebooks/{notebook_identifier}/runs/{notebook_run_id}/notebook_outputs.json" + key = f"{prefix}/{run_key}" if prefix else run_key log.info("Reading notebook outputs from s3://%s/%s", bucket, key) diff --git a/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py b/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py index 9202343116153..6fa2d57695ffe 100644 --- a/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py +++ b/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py @@ -334,7 +334,7 @@ def test_get_project_s3_path_uses_default_tooling_environment(self): result = self.hook.get_project_s3_path(DOMAIN_ID, PROJECT_ID) - assert result == bucket + assert result == (bucket, f"dzd_x/{PROJECT_ID}/dev") self.mock_client.list_environment_blueprints.assert_any_call( domainIdentifier=DOMAIN_ID, managed=True, @@ -368,7 +368,7 @@ def test_get_project_s3_path_picks_lowest_deployment_order(self): result = self.hook.get_project_s3_path(DOMAIN_ID, PROJECT_ID) - assert result == bucket + assert result == (bucket, "p") self.mock_client.get_environment.assert_called_once_with( domainIdentifier=DOMAIN_ID, identifier=env_id, @@ -389,7 +389,7 @@ def test_get_project_s3_path_falls_back_to_tooling_lite(self): result = self.hook.get_project_s3_path(DOMAIN_ID, PROJECT_ID) - assert result == bucket + assert result == (bucket, "p") # Both blueprint lookups happened. assert self.mock_client.list_environment_blueprints.call_count == 2 self.mock_client.get_environment.assert_called_once_with( @@ -414,7 +414,7 @@ def test_get_project_s3_path_falls_back_to_first_when_no_deployment_order(self): result = self.hook.get_project_s3_path(DOMAIN_ID, PROJECT_ID) - assert result == bucket + assert result == (bucket, "p") self.mock_client.get_environment.assert_called_once_with( domainIdentifier=DOMAIN_ID, identifier=env_id, @@ -499,6 +499,34 @@ def test_get_notebook_outputs_success(self): bucket = "test-bucket" self._stub_project_bucket(bucket) + with patch(f"{HOOK_MODULE}.S3Hook") as mock_s3_hook_cls: + mock_s3_hook_cls.return_value.read_key.return_value = json.dumps(outputs) + result = self.hook.get_notebook_outputs( + notebook_identifier=NOTEBOOK_ID, + notebook_run_id=NOTEBOOK_RUN_ID, + domain_identifier=DOMAIN_ID, + owning_project_identifier=PROJECT_ID, + ) + + assert result == outputs + expected_key = ( + f"dzd_x/{PROJECT_ID}/dev/sys/notebooks/{NOTEBOOK_ID}/runs/{NOTEBOOK_RUN_ID}/notebook_outputs.json" + ) + mock_s3_hook_cls.return_value.read_key.assert_called_once_with(key=expected_key, bucket_name=bucket) + + def test_get_notebook_outputs_iam_mode_no_prefix(self): + """IAM-mode projects (s3BucketPath is bucket-only) read from the bucket root.""" + outputs = {"name": "Alice"} + bucket = "iam-mode-bucket" + # Tooling env returns s3BucketPath without a path component. + self._stub_tooling_blueprint_lookup( + environments=[{"id": "env-1", "name": "Tooling", "deploymentOrder": 1}] + ) + self.mock_client.get_environment.return_value = { + "id": "env-1", + "provisionedResources": [{"name": "s3BucketPath", "value": f"s3://{bucket}"}], + } + with patch(f"{HOOK_MODULE}.S3Hook") as mock_s3_hook_cls: mock_s3_hook_cls.return_value.read_key.return_value = json.dumps(outputs) result = self.hook.get_notebook_outputs( From e3fe045b0729dccd4425c1bab6bbf0e13e960833 Mon Sep 17 00:00:00 2001 From: qaziashikin Date: Wed, 3 Jun 2026 22:05:10 -0400 Subject: [PATCH 2/5] Change S3 path to use .sys --- .../sagemaker_unified_studio_notebook.py | 2 +- ...ample_sagemaker_unified_studio_notebook.py | 72 +++++++++---------- .../test_sagemaker_unified_studio_notebook.py | 4 +- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py b/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py index 90b14d5c400a0..8c782ba450471 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py @@ -442,7 +442,7 @@ def get_notebook_outputs( # and the project role's IAM policy only allows S3 reads under that prefix. # IAM domains have prefix == "" and the key is unchanged from the # legacy bucket-root layout. - run_key = f"sys/notebooks/{notebook_identifier}/runs/{notebook_run_id}/notebook_outputs.json" + run_key = f".sys/notebooks/{notebook_identifier}/runs/{notebook_run_id}/notebook_outputs.json" key = f"{prefix}/{run_key}" if prefix else run_key log.info("Reading notebook outputs from s3://%s/%s", bucket, key) diff --git a/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio_notebook.py b/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio_notebook.py index 3521d1d986297..c50c27c456770 100644 --- a/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio_notebook.py +++ b/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio_notebook.py @@ -57,7 +57,7 @@ PROJECT_ID_KEY = "PROJECT_ID" NOTEBOOK_ID_KEY = "NOTEBOOK_ID" NOTEBOOK_B_ID_KEY = "NOTEBOOK_B_ID" -DATAZONE_ROLE_ARN_KEY = "DATAZONE_ROLE_ARN" +# DATAZONE_ROLE_ARN_KEY = "DATAZONE_ROLE_ARN" sys_test_context_task = ( SystemTestContextBuilder() @@ -65,11 +65,11 @@ .add_variable(PROJECT_ID_KEY) .add_variable(NOTEBOOK_ID_KEY) .add_variable(NOTEBOOK_B_ID_KEY) - .add_variable(DATAZONE_ROLE_ARN_KEY) + # .add_variable(DATAZONE_ROLE_ARN_KEY) .build() ) -DATAZONE_CONN_ID = "aws_datazone_notebook" +# DATAZONE_CONN_ID = "aws_datazone_notebook" @task @@ -101,39 +101,39 @@ def setup_datazone_connection(role_arn: str): project_id = test_context[PROJECT_ID_KEY] notebook_id = test_context[NOTEBOOK_ID_KEY] notebook_b_id = test_context[NOTEBOOK_B_ID_KEY] - configure_conn = setup_datazone_connection(test_context[DATAZONE_ROLE_ARN_KEY]) + # configure_conn = setup_datazone_connection(test_context[DATAZONE_ROLE_ARN_KEY]) # [START howto_operator_sagemaker_unified_studio_notebook] client_token = f"idempotency-token-{int(time.time())}" - run_notebook = SageMakerUnifiedStudioNotebookOperator( - task_id="notebook-task", - aws_conn_id=DATAZONE_CONN_ID, - notebook_identifier=notebook_id, - domain_identifier=domain_id, - owning_project_identifier=project_id, - client_token=client_token, # optional - notebook_parameters={ - "param1": "value1", - "param2": "value2", - }, # optional - compute_configuration={"instanceType": "sc.m5.large"}, # optional - timeout_configuration={"runTimeoutInMinutes": 1440}, # optional - wait_for_completion=True, # optional - waiter_delay=30, # optional - deferrable=False, # optional - ) + # run_notebook = SageMakerUnifiedStudioNotebookOperator( + # task_id="notebook-task", + # aws_conn_id=DATAZONE_CONN_ID, + # notebook_identifier=notebook_id, + # domain_identifier=domain_id, + # owning_project_identifier=project_id, + # client_token=client_token, # optional + # notebook_parameters={ + # "param1": "value1", + # "param2": "value2", + # }, # optional + # compute_configuration={"instanceType": "sc.m5.large"}, # optional + # timeout_configuration={"runTimeoutInMinutes": 1440}, # optional + # wait_for_completion=True, # optional + # waiter_delay=30, # optional + # deferrable=False, # optional + # ) # [END howto_operator_sagemaker_unified_studio_notebook] # [START howto_sensor_sagemaker_unified_studio_notebook] - run_sensor = SageMakerUnifiedStudioNotebookSensor( - task_id="notebook-sensor-task", - aws_conn_id=DATAZONE_CONN_ID, - domain_identifier=domain_id, - owning_project_identifier=project_id, - notebook_identifier=notebook_id, - notebook_run_id=run_notebook.output["notebook_run_id"], - ) + # run_sensor = SageMakerUnifiedStudioNotebookSensor( + # task_id="notebook-sensor-task", + # aws_conn_id=DATAZONE_CONN_ID, + # domain_identifier=domain_id, + # owning_project_identifier=project_id, + # notebook_identifier=notebook_id, + # notebook_run_id=run_notebook.output["notebook_run_id"], + # ) # [END howto_sensor_sagemaker_unified_studio_notebook] # [START howto_operator_sagemaker_unified_studio_notebook_pass_outputs] @@ -141,7 +141,7 @@ def setup_datazone_connection(role_arn: str): # Notebook B consumes those outputs via Jinja templating in notebook_parameters. run_notebook_a = SageMakerUnifiedStudioNotebookOperator( task_id="notebook-a-task", - aws_conn_id=DATAZONE_CONN_ID, + # aws_conn_id=DATAZONE_CONN_ID, notebook_identifier=notebook_id, domain_identifier=domain_id, owning_project_identifier=project_id, @@ -151,13 +151,13 @@ def setup_datazone_connection(role_arn: str): run_notebook_b = SageMakerUnifiedStudioNotebookOperator( task_id="notebook-b-task", - aws_conn_id=DATAZONE_CONN_ID, + # aws_conn_id=DATAZONE_CONN_ID, notebook_identifier=notebook_b_id, domain_identifier=domain_id, owning_project_identifier=project_id, notebook_parameters={ - "employee_name": "{{ task_instance.xcom_pull(task_ids='notebook-a-task', key='NOTEBOOK_OUTPUT.name') }}", - "employee_age": "{{ task_instance.xcom_pull(task_ids='notebook-a-task', key='NOTEBOOK_OUTPUT.age') }}", + "name": "{{ task_instance.xcom_pull(task_ids='notebook-a-task', key='NOTEBOOK_OUTPUT.name') }}", + "age": "{{ task_instance.xcom_pull(task_ids='notebook-a-task', key='NOTEBOOK_OUTPUT.age') }}", }, wait_for_completion=True, deferrable=False, @@ -166,9 +166,9 @@ def setup_datazone_connection(role_arn: str): chain( test_context, - configure_conn, - run_notebook, - run_sensor, + # configure_conn, + # run_notebook, + # run_sensor, run_notebook_a, run_notebook_b, ) diff --git a/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py b/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py index 6fa2d57695ffe..79e37fa801af8 100644 --- a/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py +++ b/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py @@ -510,7 +510,7 @@ def test_get_notebook_outputs_success(self): assert result == outputs expected_key = ( - f"dzd_x/{PROJECT_ID}/dev/sys/notebooks/{NOTEBOOK_ID}/runs/{NOTEBOOK_RUN_ID}/notebook_outputs.json" + f"dzd_x/{PROJECT_ID}/dev/.sys/notebooks/{NOTEBOOK_ID}/runs/{NOTEBOOK_RUN_ID}/notebook_outputs.json" ) mock_s3_hook_cls.return_value.read_key.assert_called_once_with(key=expected_key, bucket_name=bucket) @@ -537,7 +537,7 @@ def test_get_notebook_outputs_iam_mode_no_prefix(self): ) assert result == outputs - expected_key = f"sys/notebooks/{NOTEBOOK_ID}/runs/{NOTEBOOK_RUN_ID}/notebook_outputs.json" + expected_key = f".sys/notebooks/{NOTEBOOK_ID}/runs/{NOTEBOOK_RUN_ID}/notebook_outputs.json" mock_s3_hook_cls.return_value.read_key.assert_called_once_with(key=expected_key, bucket_name=bucket) def test_get_notebook_outputs_no_such_key(self): From 41640f5e71158f4ad4cff55d28876e647ca9a8de Mon Sep 17 00:00:00 2001 From: qaziashikin Date: Wed, 3 Jun 2026 22:06:34 -0400 Subject: [PATCH 3/5] Revert system test updates --- ...ample_sagemaker_unified_studio_notebook.py | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio_notebook.py b/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio_notebook.py index c50c27c456770..3521d1d986297 100644 --- a/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio_notebook.py +++ b/providers/amazon/tests/system/amazon/aws/example_sagemaker_unified_studio_notebook.py @@ -57,7 +57,7 @@ PROJECT_ID_KEY = "PROJECT_ID" NOTEBOOK_ID_KEY = "NOTEBOOK_ID" NOTEBOOK_B_ID_KEY = "NOTEBOOK_B_ID" -# DATAZONE_ROLE_ARN_KEY = "DATAZONE_ROLE_ARN" +DATAZONE_ROLE_ARN_KEY = "DATAZONE_ROLE_ARN" sys_test_context_task = ( SystemTestContextBuilder() @@ -65,11 +65,11 @@ .add_variable(PROJECT_ID_KEY) .add_variable(NOTEBOOK_ID_KEY) .add_variable(NOTEBOOK_B_ID_KEY) - # .add_variable(DATAZONE_ROLE_ARN_KEY) + .add_variable(DATAZONE_ROLE_ARN_KEY) .build() ) -# DATAZONE_CONN_ID = "aws_datazone_notebook" +DATAZONE_CONN_ID = "aws_datazone_notebook" @task @@ -101,39 +101,39 @@ def setup_datazone_connection(role_arn: str): project_id = test_context[PROJECT_ID_KEY] notebook_id = test_context[NOTEBOOK_ID_KEY] notebook_b_id = test_context[NOTEBOOK_B_ID_KEY] - # configure_conn = setup_datazone_connection(test_context[DATAZONE_ROLE_ARN_KEY]) + configure_conn = setup_datazone_connection(test_context[DATAZONE_ROLE_ARN_KEY]) # [START howto_operator_sagemaker_unified_studio_notebook] client_token = f"idempotency-token-{int(time.time())}" - # run_notebook = SageMakerUnifiedStudioNotebookOperator( - # task_id="notebook-task", - # aws_conn_id=DATAZONE_CONN_ID, - # notebook_identifier=notebook_id, - # domain_identifier=domain_id, - # owning_project_identifier=project_id, - # client_token=client_token, # optional - # notebook_parameters={ - # "param1": "value1", - # "param2": "value2", - # }, # optional - # compute_configuration={"instanceType": "sc.m5.large"}, # optional - # timeout_configuration={"runTimeoutInMinutes": 1440}, # optional - # wait_for_completion=True, # optional - # waiter_delay=30, # optional - # deferrable=False, # optional - # ) + run_notebook = SageMakerUnifiedStudioNotebookOperator( + task_id="notebook-task", + aws_conn_id=DATAZONE_CONN_ID, + notebook_identifier=notebook_id, + domain_identifier=domain_id, + owning_project_identifier=project_id, + client_token=client_token, # optional + notebook_parameters={ + "param1": "value1", + "param2": "value2", + }, # optional + compute_configuration={"instanceType": "sc.m5.large"}, # optional + timeout_configuration={"runTimeoutInMinutes": 1440}, # optional + wait_for_completion=True, # optional + waiter_delay=30, # optional + deferrable=False, # optional + ) # [END howto_operator_sagemaker_unified_studio_notebook] # [START howto_sensor_sagemaker_unified_studio_notebook] - # run_sensor = SageMakerUnifiedStudioNotebookSensor( - # task_id="notebook-sensor-task", - # aws_conn_id=DATAZONE_CONN_ID, - # domain_identifier=domain_id, - # owning_project_identifier=project_id, - # notebook_identifier=notebook_id, - # notebook_run_id=run_notebook.output["notebook_run_id"], - # ) + run_sensor = SageMakerUnifiedStudioNotebookSensor( + task_id="notebook-sensor-task", + aws_conn_id=DATAZONE_CONN_ID, + domain_identifier=domain_id, + owning_project_identifier=project_id, + notebook_identifier=notebook_id, + notebook_run_id=run_notebook.output["notebook_run_id"], + ) # [END howto_sensor_sagemaker_unified_studio_notebook] # [START howto_operator_sagemaker_unified_studio_notebook_pass_outputs] @@ -141,7 +141,7 @@ def setup_datazone_connection(role_arn: str): # Notebook B consumes those outputs via Jinja templating in notebook_parameters. run_notebook_a = SageMakerUnifiedStudioNotebookOperator( task_id="notebook-a-task", - # aws_conn_id=DATAZONE_CONN_ID, + aws_conn_id=DATAZONE_CONN_ID, notebook_identifier=notebook_id, domain_identifier=domain_id, owning_project_identifier=project_id, @@ -151,13 +151,13 @@ def setup_datazone_connection(role_arn: str): run_notebook_b = SageMakerUnifiedStudioNotebookOperator( task_id="notebook-b-task", - # aws_conn_id=DATAZONE_CONN_ID, + aws_conn_id=DATAZONE_CONN_ID, notebook_identifier=notebook_b_id, domain_identifier=domain_id, owning_project_identifier=project_id, notebook_parameters={ - "name": "{{ task_instance.xcom_pull(task_ids='notebook-a-task', key='NOTEBOOK_OUTPUT.name') }}", - "age": "{{ task_instance.xcom_pull(task_ids='notebook-a-task', key='NOTEBOOK_OUTPUT.age') }}", + "employee_name": "{{ task_instance.xcom_pull(task_ids='notebook-a-task', key='NOTEBOOK_OUTPUT.name') }}", + "employee_age": "{{ task_instance.xcom_pull(task_ids='notebook-a-task', key='NOTEBOOK_OUTPUT.age') }}", }, wait_for_completion=True, deferrable=False, @@ -166,9 +166,9 @@ def setup_datazone_connection(role_arn: str): chain( test_context, - # configure_conn, - # run_notebook, - # run_sensor, + configure_conn, + run_notebook, + run_sensor, run_notebook_a, run_notebook_b, ) From d6a7a282d6ea589f13263b52713acc56bf0422db Mon Sep 17 00:00:00 2001 From: qaziashikin Date: Wed, 3 Jun 2026 22:10:11 -0400 Subject: [PATCH 4/5] Update comments to clarify paths --- .../aws/hooks/sagemaker_unified_studio_notebook.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py b/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py index 8c782ba450471..2c548a5c45cf3 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/sagemaker_unified_studio_notebook.py @@ -262,11 +262,7 @@ def get_project_s3_path(self, domain_identifier: str, project_id: str) -> tuple[ :param project_id: The ID of the DataZone project. :return: A ``(bucket, prefix)`` tuple. ``bucket`` is the S3 bucket name. ``prefix`` is the path component of the project's - ``s3BucketPath`` (with no leading or trailing ``/``) — typically - ``""`` for IAM domains and - ``"//"`` for IDC domains. - Callers should prepend ``prefix`` to any S3 key they construct so - writes/reads stay within the IAM scope of the project's role. + ``s3BucketPath`` (with no leading or trailing ``/``). :raises RuntimeError: If the default tooling environment or the ``s3BucketPath`` provisioned resource cannot be found. """ @@ -282,8 +278,8 @@ def get_project_s3_path(self, domain_identifier: str, project_id: str) -> tuple[ f"environment {environment_id} for project {project_id} in domain " f"{domain_identifier}" ) - # value looks like "s3://" (IAM) or - # "s3://///" (IDC). Return both + # value looks like "s3:///shared/" (IAM) or + # "s3://///dev/" (IDC). Return both # parts so callers can construct project-scoped keys. parts = urlparse(value, allow_fragments=False) bucket = parts.netloc From b36a6ebb501102fb2d5d72742dbd54914b40441c Mon Sep 17 00:00:00 2001 From: qaziashikin Date: Thu, 4 Jun 2026 00:46:37 -0400 Subject: [PATCH 5/5] Fix CI checks for ruff formatting --- .../aws/hooks/test_sagemaker_unified_studio_notebook.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py b/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py index 79e37fa801af8..728ec55ca02c1 100644 --- a/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py +++ b/providers/amazon/tests/unit/amazon/aws/hooks/test_sagemaker_unified_studio_notebook.py @@ -509,9 +509,7 @@ def test_get_notebook_outputs_success(self): ) assert result == outputs - expected_key = ( - f"dzd_x/{PROJECT_ID}/dev/.sys/notebooks/{NOTEBOOK_ID}/runs/{NOTEBOOK_RUN_ID}/notebook_outputs.json" - ) + expected_key = f"dzd_x/{PROJECT_ID}/dev/.sys/notebooks/{NOTEBOOK_ID}/runs/{NOTEBOOK_RUN_ID}/notebook_outputs.json" mock_s3_hook_cls.return_value.read_key.assert_called_once_with(key=expected_key, bucket_name=bucket) def test_get_notebook_outputs_iam_mode_no_prefix(self):