From e86d47d719aaac6d12e531aca6e6b94312d0e30d Mon Sep 17 00:00:00 2001 From: Niko Oliveira Date: Thu, 28 Apr 2022 15:12:58 -0700 Subject: [PATCH] Update docs Amazon Glacier Docs --- .../example_dags/example_glacier_to_gcs.py | 6 +- .../operators/glacier.rst | 59 ++++++++----------- .../operators/transfer/glacier_to_gcs.rst | 30 +++++----- 3 files changed, 43 insertions(+), 52 deletions(-) diff --git a/airflow/providers/amazon/aws/example_dags/example_glacier_to_gcs.py b/airflow/providers/amazon/aws/example_dags/example_glacier_to_gcs.py index 274c5737c9cac..2c9f732caf444 100644 --- a/airflow/providers/amazon/aws/example_dags/example_glacier_to_gcs.py +++ b/airflow/providers/amazon/aws/example_dags/example_glacier_to_gcs.py @@ -17,7 +17,7 @@ import os from datetime import datetime -from airflow import models +from airflow import DAG from airflow.providers.amazon.aws.operators.glacier import GlacierCreateJobOperator from airflow.providers.amazon.aws.sensors.glacier import GlacierJobOperationSensor from airflow.providers.amazon.aws.transfers.glacier_to_gcs import GlacierToGCSOperator @@ -26,7 +26,7 @@ BUCKET_NAME = os.environ.get("GLACIER_GCS_BUCKET_NAME", "gs://INVALID BUCKET NAME") OBJECT_NAME = os.environ.get("GLACIER_OBJECT", "example-text.txt") -with models.DAG( +with DAG( "example_glacier_to_gcs", schedule_interval=None, start_date=datetime(2021, 1, 1), # Override to match your needs @@ -56,8 +56,6 @@ # If chunk size is bigger than actual file size # then whole file will be downloaded chunk_size=1024, - delegate_to=None, - google_impersonation_chain=None, ) # [END howto_glacier_transfer_data_to_gcs] diff --git a/docs/apache-airflow-providers-amazon/operators/glacier.rst b/docs/apache-airflow-providers-amazon/operators/glacier.rst index 046e7d1a6f908..747b46d46a024 100644 --- a/docs/apache-airflow-providers-amazon/operators/glacier.rst +++ b/docs/apache-airflow-providers-amazon/operators/glacier.rst @@ -16,24 +16,25 @@ under the License. -Amazon Glacier Operator -======================= +Amazon S3 Glacier Operators +=========================== -Amazon Glacier is a secure, durable, and extremely low-cost Amazon S3 cloud storage classes for data archiving and long-term backup. -For more information about the service visit `Amazon Glacier API documentation `_ +`Amazon Glacier `_ is a secure, durable, and extremely low-cost Amazon S3 cloud storage class for data archiving and long-term backup. -.. _howto/operator:GlacierCreateJobOperator: +Prerequisite Tasks +------------------ + +.. include:: _partials/prerequisite_tasks.rst -GlacierCreateJobOperator -^^^^^^^^^^^^^^^^^^^^^^^^ +.. _howto/operator:GlacierCreateJobOperator: -Operator task is to initiate an Amazon Glacier inventory-retrieval job. -The operation returns dictionary of information related to the initiated job like *jobId* what is required for subsequent tasks. +Amazon Glacier Create Job Operator +"""""""""""""""""""""""""""""""""" -To get more information about operator visit: -:class:`~airflow.providers.amazon.aws.transfers.glacier_to_gcs.GlacierCreateJobOperator` +To initiate an Amazon Glacier inventory retrieval job +use :class:`~airflow.providers.amazon.aws.transfers.glacier_to_gcs.GlacierCreateJobOperator` -Example usage: +This Operator returns a dictionary of information related to the initiated job such as *jobId*, which is required for subsequent tasks. .. exampleinclude:: /../../airflow/providers/amazon/aws/example_dags/example_glacier_to_gcs.py :language: python @@ -43,30 +44,22 @@ Example usage: .. _howto/operator:GlacierJobOperationSensor: -GlacierJobOperationSensor -^^^^^^^^^^^^^^^^^^^^^^^^^ +Amazon Glacier Job Sensor +""""""""""""""""""""""""" -Operator task is to wait until task *create_glacier_job* will be completed. -When sensor returns *true* then subsequent tasks can be executed. -In this case subsequent tasks are: *GlacierDownloadArchive* and *GlacierTransferDataToGCS*. +To wait on the status of an Amazon Glacier Job to reach a terminal state +use :class:`~airflow.providers.amazon.aws.sensors.glacier.GlacierJobOperationSensor` -Job states: - -* *Succeeded* – job is finished and for example archives from the vault can be downloaded -* *InProgress* – job is in progress and you have to wait until it's done (*Succeeded*) +.. exampleinclude:: /../../airflow/providers/amazon/aws/example_dags/example_glacier_to_gcs.py + :language: python + :dedent: 4 + :start-after: [START howto_glacier_job_operation_sensor] + :end-before: [END howto_glacier_job_operation_sensor] -GlacierJobOperationSensor checks the job status. -If response status code is *succeeded* then sensor returns *true* and subsequent tasks will be executed. -If response code is *InProgress* then sensor returns *false* and reschedule task with *poke_interval=60 * 20*. -Which means that every next request will be sent every 20 minutes. -To get more information about operator visit: -:class:`~airflow.providers.amazon.aws.sensors.glacier.GlacierJobOperationSensor` +References +---------- -Example usage: +For further information, look at: -.. exampleinclude:: /../../airflow/providers/amazon/aws/example_dags/example_glacier_to_gcs.py - :language: python - :dedent: 4 - :start-after: [START howto_glacier_transfer_data_to_gcs] - :end-before: [END howto_glacier_transfer_data_to_gcs] +* `Boto3 Library Documentation for Amazon Glacier `__ diff --git a/docs/apache-airflow-providers-amazon/operators/transfer/glacier_to_gcs.rst b/docs/apache-airflow-providers-amazon/operators/transfer/glacier_to_gcs.rst index a8f2b1e008768..b23e0cef92e6c 100644 --- a/docs/apache-airflow-providers-amazon/operators/transfer/glacier_to_gcs.rst +++ b/docs/apache-airflow-providers-amazon/operators/transfer/glacier_to_gcs.rst @@ -16,30 +16,30 @@ under the License. -Amazon Glacier Transfer Operator -================================ +Amazon S3 Glacier to GCS Transfer Operator +========================================== -Amazon Glacier is a secure, durable, and extremely low-cost Amazon S3 cloud storage classes for data archiving and long-term backup. -For more information about the service visit `Amazon Glacier API documentation `_ +`Amazon Glacier `_ is a secure, durable, and extremely low-cost Amazon S3 cloud storage class for data archiving and long-term backup. -.. _howto/operator:GlacierToGCSOperator: - -GlacierToGCSOperator -^^^^^^^^^^^^^^^^^^^^ +Prerequisite Tasks +------------------ -Operator task is transfer data from Glacier vault to Google Cloud Storage. +.. include:: ../_partials/prerequisite_tasks.rst -.. note:: - Please be aware that GlacierToGCSOperator may depend on memory usage. - Transferring big files may not work well. +.. _howto/operator:GlacierToGCSOperator: -To get more information about operator visit: -:class:`~airflow.providers.amazon.aws.transfers.glacier_to_gcs.GlacierToGCSOperator` +Glacier To GCS +"""""""""""""" -Example usage: +To transfer data from an Amazon Glacier vault to Google Cloud Storage. +use :class:`~airflow.providers.amazon.aws.transfers.glacier_to_gcs.GlacierToGCSOperator` .. exampleinclude:: /../../airflow/providers/amazon/aws/example_dags/example_glacier_to_gcs.py :language: python :dedent: 4 :start-after: [START howto_glacier_transfer_data_to_gcs] :end-before: [END howto_glacier_transfer_data_to_gcs] + +.. note:: + Please be aware that GlacierToGCSOperator depends on available memory. + Transferring large files may exhaust memory on the worker host.