diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index a108634d229db..145c091e60054 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -1148,6 +1148,8 @@ ok oklch Okta okta +OL +ol Ollama onboarded onboarding diff --git a/generated/provider_dependencies.json.sha256sum b/generated/provider_dependencies.json.sha256sum index 413fc17032478..b7f44443a3ab6 100644 --- a/generated/provider_dependencies.json.sha256sum +++ b/generated/provider_dependencies.json.sha256sum @@ -1 +1 @@ -8609061b1d7c65722ca143c6e54bf569c2b3bb2bfeac9ecc85c97a114a5d83ac +bb7437125421517dcc83ca840e1c068e25179eff8aab93b87766ec29d0dfa3b0 diff --git a/providers/openlineage/src/airflow/providers/openlineage/api/datasets.py b/providers/openlineage/src/airflow/providers/openlineage/api/datasets.py index 81b119f40c1f5..6e83e66fb6f66 100644 --- a/providers/openlineage/src/airflow/providers/openlineage/api/datasets.py +++ b/providers/openlineage/src/airflow/providers/openlineage/api/datasets.py @@ -48,6 +48,7 @@ from airflow.models.taskinstance import TaskInstance from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance + from airflow.sdk.types import RuntimeTaskInstanceProtocol log = logging.getLogger(__name__) @@ -56,9 +57,9 @@ def emit_dataset_lineage( *, - inputs: list[InputDataset] | None = None, - outputs: list[OutputDataset] | None = None, - task_instance: RuntimeTaskInstance | TaskInstance | None = None, + inputs: list[InputDataset | Dataset] | None = None, + outputs: list[OutputDataset | Dataset] | None = None, + task_instance: RuntimeTaskInstanceProtocol | RuntimeTaskInstance | TaskInstance | None = None, additional_run_facets: dict[str, RunFacet] | None = None, additional_job_facets: dict[str, JobFacet] | None = None, raise_on_error: bool = False, @@ -159,8 +160,8 @@ def my_task(): name=lineage_job_name(task_instance), facets=job_facets, ), - inputs=inputs, - outputs=outputs, + inputs=inputs, # type: ignore[arg-type] + outputs=outputs, # type: ignore[arg-type] producer=_PRODUCER, ) diff --git a/providers/openlineage/src/airflow/providers/openlineage/api/sql.py b/providers/openlineage/src/airflow/providers/openlineage/api/sql.py index ea009f920100e..b59b1f2056f6a 100644 --- a/providers/openlineage/src/airflow/providers/openlineage/api/sql.py +++ b/providers/openlineage/src/airflow/providers/openlineage/api/sql.py @@ -38,11 +38,12 @@ if TYPE_CHECKING: from datetime import datetime - from openlineage.client.event_v2 import Dataset + from openlineage.client.event_v2 import Dataset, InputDataset, OutputDataset from openlineage.client.facet_v2 import JobFacet, RunFacet from airflow.models.taskinstance import TaskInstance from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance + from airflow.sdk.types import RuntimeTaskInstanceProtocol log = logging.getLogger(__name__) @@ -54,8 +55,8 @@ def emit_query_lineage( query_id: str | None = None, query_source_namespace: str | None = None, query_text: str | None = None, - inputs: list[Dataset] | None = None, - outputs: list[Dataset] | None = None, + inputs: list[InputDataset | Dataset] | None = None, + outputs: list[OutputDataset | Dataset] | None = None, start_time: datetime | None = None, end_time: datetime | None = None, is_successful: bool = True, @@ -63,7 +64,7 @@ def emit_query_lineage( default_database: str | None = None, default_schema: str | None = None, job_name: str | None = None, - task_instance: TaskInstance | RuntimeTaskInstance | None = None, + task_instance: RuntimeTaskInstanceProtocol | RuntimeTaskInstance | TaskInstance | None = None, additional_run_facets: dict[str, RunFacet] | None = None, additional_job_facets: dict[str, JobFacet] | None = None, raise_on_error: bool = False, diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py new file mode 100644 index 0000000000000..dce37a1d91448 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py @@ -0,0 +1,508 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with a custom operator that emits all importable openlineage.client.facet_v2 facets that OL do not overwrite. + +It checks: + - all run facets + - all job facets + - all input dataset facets + - all output dataset facets + - one custom facet per entity + - all optional parameters are populated for comprehensive serialization coverage +""" + +from __future__ import annotations + +from datetime import datetime + +from openlineage.client.event_v2 import InputDataset, OutputDataset +from openlineage.client.facet_v2 import ( + base_subset_dataset, + catalog_dataset, + column_lineage_dataset, + data_quality_assertions_dataset, + data_quality_metrics_dataset, + data_quality_metrics_input_dataset, + dataset_type_dataset, + dataset_version_dataset, + datasource_dataset, + documentation_dataset, + documentation_job, + environment_variables_run, + error_message_run, + execution_parameters_run, + external_query_run, + extraction_error_run, + hierarchy_dataset, + input_statistics_input_dataset, + job_dependencies_run, + lifecycle_state_change_dataset, + output_statistics_output_dataset, + ownership_dataset, + ownership_job, + schema_dataset, + source_code_job, + source_code_location_job, + sql_job, + storage_dataset, + symlinks_dataset, + tags_dataset, + tags_job, + tags_run, + test_run as test_run_facet_module, +) + +from airflow import DAG +from airflow.providers.common.compat.sdk import BaseOperator +from airflow.providers.openlineage.extractors.base import OperatorLineage + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + + +class AllFacetsOperator(BaseOperator): + def execute(self, context): + pass + + def get_openlineage_facets_on_start(self) -> OperatorLineage: + input_ds = InputDataset( + namespace="s3://all-facets-bucket", + name="input/data.csv", + facets={ + "custom_input_ds_facet": {"key": "value"}, # type: ignore[dict-item] + "schema": schema_dataset.SchemaDatasetFacet( + fields=[ + schema_dataset.SchemaDatasetFacetFields( + name="id", + type="INTEGER", + description="Unique row identifier", + ordinal_position=1, + ), + schema_dataset.SchemaDatasetFacetFields( + name="name", + type="VARCHAR", + description="Full name of the entity", + ordinal_position=2, + ), + schema_dataset.SchemaDatasetFacetFields( + name="address", + type="STRUCT", + description="Nested address record", + ordinal_position=3, + fields=[ + schema_dataset.SchemaDatasetFacetFields( + name="street", + type="VARCHAR", + description="Street address line", + ordinal_position=1, + ), + schema_dataset.SchemaDatasetFacetFields( + name="city", + type="VARCHAR", + description="City name", + ordinal_position=2, + ), + ], + ), + ] + ), + "dataSource": datasource_dataset.DatasourceDatasetFacet( + name="all-facets-source", + uri="s3://all-facets-bucket", + ), + "columnLineage": column_lineage_dataset.ColumnLineageDatasetFacet( + fields={ + "name": column_lineage_dataset.Fields( + inputFields=[ + column_lineage_dataset.InputField( + namespace="s3://upstream", + name="upstream/data.csv", + field="full_name", + transformations=[ + column_lineage_dataset.Transformation( + type="DIRECT", + subtype="IDENTITY", + description="Direct copy of the full_name field", + masking=False, + ) + ], + ) + ], + transformationDescription="Column passed through without modification", + transformationType="IDENTITY", + ), + }, + ), + "documentation": documentation_dataset.DocumentationDatasetFacet( + description="Input dataset for all-facets comprehensive serialization test", + contentType="text/plain", + ), + "inputStatistics": input_statistics_input_dataset.InputStatisticsInputDatasetFacet( # type: ignore[dict-item] + rowCount=1000, + size=8192, + fileCount=1, + ), + "dataQualityAssertions": data_quality_assertions_dataset.DataQualityAssertionsDatasetFacet( # type: ignore[dict-item] + assertions=[ + data_quality_assertions_dataset.Assertion( + assertion="not_null", + success=True, + column="id", + severity="ERROR", + name="id_not_null_check", + description="Checks that id column has no null values", + expected="0 nulls", + actual="0 nulls", + content='{"nullCount": 0}', + contentType="application/json", + params={"sample_size": "1000"}, + ), + data_quality_assertions_dataset.Assertion( + assertion="row_count_above_threshold", + success=True, + severity="WARNING", + name="row_count_check", + description="Checks that row count is above the minimum threshold", + expected=">= 100", + actual="1000", + ), + ], + ), + "ownership": ownership_dataset.OwnershipDatasetFacet( + owners=[ + ownership_dataset.Owner(name="team:data-engineering", type="team"), + ownership_dataset.Owner(name="user:jane.smith@example.com", type="user"), + ], + ), + "tags": tags_dataset.TagsDatasetFacet( + tags=[ + tags_dataset.TagsDatasetFacetFields( + key="env", + value="test", + source="airflow-system-test", + ), + tags_dataset.TagsDatasetFacetFields( + key="source", + value="s3", + source="airflow-system-test", + ), + tags_dataset.TagsDatasetFacetFields( + key="format", + value="csv", + source="airflow-system-test", + field="id", + ), + ], + ), + "catalog": catalog_dataset.CatalogDatasetFacet( + framework="iceberg", + type="TABLE", + name="all_facets_input", + metadataUri="s3://metastore/all_facets.json", + warehouseUri="s3://all-facets-bucket/warehouse", + source="s3://all-facets-bucket/catalog.json", + catalogProperties={"location": "s3://all-facets-bucket/warehouse/all_facets_input"}, + ), + "dataQualityMetrics": data_quality_metrics_dataset.DataQualityMetricsDatasetFacet( + columnMetrics={ + "id": data_quality_metrics_dataset.ColumnMetrics( + nullCount=0, + distinctCount=1000, + sum=500500.0, + count=1000, + min=1.0, + max=1000.0, + quantiles={"0.25": 250.0, "0.5": 500.0, "0.75": 750.0}, + ), + "name": data_quality_metrics_dataset.ColumnMetrics( + nullCount=5, + distinctCount=980, + count=1000, + ), + }, + rowCount=1000, + bytes=8192, + fileCount=1, + ), + "dataQualityMetricsInput": data_quality_metrics_input_dataset.DataQualityMetricsInputDatasetFacet( # type: ignore[dict-item] + columnMetrics={ + "id": data_quality_metrics_input_dataset.ColumnMetrics( + nullCount=0, + distinctCount=1000, + sum=500500.0, + count=1000, + min=1.0, + max=1000.0, + quantiles={"0.25": 250.0, "0.5": 500.0, "0.75": 750.0}, + ), + }, + rowCount=1000, + bytes=8192, + fileCount=1, + ), + "datasetType": dataset_type_dataset.DatasetTypeDatasetFacet( + datasetType="TABLE", + subType="ICEBERG_TABLE", + ), + "hierarchy": hierarchy_dataset.HierarchyDatasetFacet( + hierarchy=[ + hierarchy_dataset.HierarchyDatasetFacetLevel(type="catalog", name="aws-glue"), + hierarchy_dataset.HierarchyDatasetFacetLevel(type="database", name="analytics"), + hierarchy_dataset.HierarchyDatasetFacetLevel(type="schema", name="public"), + ], + ), + "inputSubset": base_subset_dataset.InputSubsetInputDatasetFacet( # type: ignore[dict-item] + inputCondition=base_subset_dataset.PartitionSubsetCondition( + partitions=[ + base_subset_dataset.Partition( + dimensions={"business_date": "2024-10-15", "country": "PL"}, + identifier="2024-01-01/us-east-1", + ) + ], + type="partition", + ), + ), + }, + ) + + output_ds = OutputDataset( + namespace="snowflake://account", + name="analytics.public.all_facets_output", + facets={ + "custom_output_ds_facet": {"key": "value"}, # type: ignore[dict-item] + "outputStatistics": output_statistics_output_dataset.OutputStatisticsOutputDatasetFacet( # type: ignore[dict-item] + rowCount=500, + size=4096, + fileCount=1, + ), + "storage": storage_dataset.StorageDatasetFacet( + storageLayer="snowflake", + fileFormat="table", + ), + "symlinks": symlinks_dataset.SymlinksDatasetFacet( + identifiers=[ + symlinks_dataset.Identifier( + namespace="snowflake://account", + name="analytics.public.all_facets_output_alias", + type="TABLE", + ), + ], + ), + "version": dataset_version_dataset.DatasetVersionDatasetFacet( + datasetVersion="v1.0.0", + ), + "lifecycleStateChange": lifecycle_state_change_dataset.LifecycleStateChangeDatasetFacet( + lifecycleStateChange=lifecycle_state_change_dataset.LifecycleStateChange.CREATE, + previousIdentifier=lifecycle_state_change_dataset.PreviousIdentifier( + name="analytics.public.all_facets_output_v0", + namespace="snowflake://account", + ), + ), + "outputSubset": base_subset_dataset.OutputSubsetOutputDatasetFacet( # type: ignore[dict-item] + outputCondition=base_subset_dataset.PartitionSubsetCondition( + partitions=[ + base_subset_dataset.Partition( + dimensions={"business_date": "2024-10-15", "country": "PL"}, + identifier="2024-01-01", + ) + ], + type="partition", + ), + ), + }, + ) + + return OperatorLineage( + inputs=[input_ds], + outputs=[output_ds], + run_facets={ + "custom_run_facet": {"key": "value"}, + "tags": tags_run.TagsRunFacet( + tags=[ + tags_run.TagsRunFacetFields( + key="test_type", + value="all_facets", + source="airflow-system-test", + ), + ], + ), + "externalQuery": external_query_run.ExternalQueryRunFacet( + externalQueryId="all-facets-query-id-001", + source="snowflake://account", + ), + "testRun": test_run_facet_module.TestRunFacet( + tests=[ + test_run_facet_module.TestExecution( + name="all_facets_test", + status="success", + severity="WARNING", + type="integration", + description="Checks all facets are emitted and serialized correctly", + expected="all_facets_present", + actual="all_facets_present", + content='{"facetCount": 37}', + contentType="application/json", + params={"batch": "1"}, + ), + ], + ), + "environmentVariables": environment_variables_run.EnvironmentVariablesRunFacet( + environmentVariables=[ + environment_variables_run.EnvironmentVariable( + name="SPARK_MASTER", + value="yarn", + ), + environment_variables_run.EnvironmentVariable( + name="JAVA_HOME", + value="/usr/lib/jvm/java-11-openjdk", + ), + ], + ), + "errorMessage": error_message_run.ErrorMessageRunFacet( + message="Non-fatal warning detected during extraction phase", + programmingLanguage="python", + stackTrace=( + "Traceback (most recent call last):\n" + ' File "extract.py", line 42, in run\n' + ' raise ValueError("Partial extraction failure")\n' + "ValueError: Partial extraction failure" + ), + ), + "executionParameters": execution_parameters_run.ExecutionParametersRunFacet( + parameters=[ + execution_parameters_run.ExecutionParameter( + key="executor-cores", + name="executor-cores", + description="Number of CPU cores per executor", + value="4", + ), + execution_parameters_run.ExecutionParameter( + key="executor-memory", + name="executor-memory", + description="Memory allocated per executor in gigabytes", + value="8g", + ), + ], + ), + "extractionError": extraction_error_run.ExtractionErrorRunFacet( + totalTasks=10, + failedTasks=1, + errors=[ + extraction_error_run.Error( + errorMessage="Unable to parse column 'event_ts' as TIMESTAMP", + stackTrace=( + "Traceback (most recent call last):\n" + ' File "schema.py", line 18, in parse_column\n' + ' raise TypeError("Cannot cast to TIMESTAMP")\n' + "TypeError: Cannot cast to TIMESTAMP" + ), + task="parse_schema", + taskNumber=3, + ), + ], + ), + "jobDependencies": job_dependencies_run.JobDependenciesRunFacet( + upstream=[ + job_dependencies_run.JobDependency( + job=job_dependencies_run.JobIdentifier( + namespace="default", + name="upstream_etl_job", + ), + run=job_dependencies_run.RunIdentifier( + runId="00000000-0000-0000-0000-000000000001", + ), + dependency_type="WAIT", + sequence_trigger_rule="ALL_DONE", + status_trigger_rule="ALL_SUCCESS", + ), + ], + downstream=[], + trigger_rule="ALL_SUCCESS", + ), + }, + job_facets={ + "custom_job_facet": {"key": "value"}, + "tags": tags_job.TagsJobFacet( + tags=[ + tags_job.TagsJobFacetFields( + key="domain", + value="data-platform", + source="airflow-system-test", + ), + tags_job.TagsJobFacetFields( + key="team", + value="data-engineering", + source="airflow-system-test", + ), + ], + ), + "sourceCodeLocation": source_code_location_job.SourceCodeLocationJobFacet( + type="git", + url="https://github.com/apache/airflow/blob/main/providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py", + repoUrl="https://github.com/apache/airflow", + path="providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py", + version="main", + tag="v2.10.0", + branch="main", + pullRequestNumber="12345", + ), + "documentation": documentation_job.DocumentationJobFacet( + description="All-facets test operator emitting every available OL facet", + ), + "sql": sql_job.SQLJobFacet( + query="SELECT id, name FROM analytics.public.all_facets_output WHERE id IS NOT NULL", + dialect="snowflake_sql", + ), + "ownership": ownership_job.OwnershipJobFacet( + owners=[ + ownership_job.Owner(name="data-engineering-team", type="team"), + ownership_job.Owner(name="user:john.doe@example.com", type="user"), + ], + ), + "sourceCode": source_code_job.SourceCodeJobFacet( + language="python", + sourceCode=( + "def transform(df):\n return df.select('id', 'name').filter(df.id.isNotNull())\n" + ), + ), + }, + ) + + +DAG_ID = "openlineage_all_facets_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=None, + catchup=False, + default_args={"retries": 0}, +) as dag: + all_facets_task = AllFacetsOperator(task_id="all_facets_task") + + check_events = OpenLineageTestOperator( + task_id="check_events", file_path=get_expected_event_file_path(DAG_ID) + ) + + all_facets_task >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_custom_operator_failure_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_custom_operator_failure_dag.py new file mode 100644 index 0000000000000..6767985c47aff --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_custom_operator_failure_dag.py @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with a custom operator that intentionally fails and implements get_openlineage_facets_on_failure. + +It checks: + - START event carries inputs from get_openlineage_facets_on_start + - FAIL event carries all expected facets + run facets from get_openlineage_facets_on_failure +""" + +from __future__ import annotations + +from datetime import datetime + +from openlineage.client.event_v2 import InputDataset + +from airflow import DAG +from airflow.providers.common.compat.sdk import BaseOperator +from airflow.providers.openlineage.extractors.base import OperatorLineage +from airflow.providers.standard.operators.empty import EmptyOperator +from airflow.utils.trigger_rule import TriggerRule + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + + +class FailingOLOperator(BaseOperator): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.my_test_attr = None + + def execute(self, context): + self.my_test_attr = "123" + raise ValueError("Intentional failure for OpenLineage on_failure testing") + + def get_openlineage_facets_on_start(self) -> OperatorLineage: + return OperatorLineage( + run_facets={"custom_facet": {"random_facet_key": self.my_test_attr}}, + inputs=[InputDataset(namespace="s3://failure-test", name="before_fail.csv")], + ) + + def get_openlineage_facets_on_failure(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"custom_facet": {"random_facet_key": self.my_test_attr}}, + ) + + +DAG_ID = "openlineage_custom_operator_failure_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=None, + catchup=False, + default_args={"retries": 0}, + description="custom_description", + tags=["first", "second"], +) as dag: + failing_task = FailingOLOperator(task_id="failing_task", owner="some_owner1", doc_rst="RST doc") + + empty_task = EmptyOperator(task_id="empty_success", trigger_rule=TriggerRule.ONE_FAILED) + + check_events = OpenLineageTestOperator( + task_id="check_events", + file_path=get_expected_event_file_path(DAG_ID), + ) + + failing_task >> empty_task >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_custom_operator_ol_methods_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_custom_operator_ol_methods_dag.py new file mode 100644 index 0000000000000..846c8a8e7b98a --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_custom_operator_ol_methods_dag.py @@ -0,0 +1,354 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with custom operators implementing every non-empty subset of OpenLineage lifecycle hooks. + +It checks: + - on_start only: COMPLETE event reuses on_start result (DefaultExtractor fallback) + - on_complete only: START event is empty; COMPLETE carries custom facets + - on_failure only: START and COMPLETE are empty on the happy path + - on_start + on_complete: each event carries its own independent facets + - on_start + on_failure: COMPLETE falls back to on_start (no on_complete) + - on_complete + on_failure: START is empty; COMPLETE uses on_complete + - all three: START uses on_start, COMPLETE uses on_complete (on_failure ignored on success) + - run_facets, job_facets, input dataset facets, and output dataset facets are preserved + for each method and appear only in the events driven by that method +""" + +from __future__ import annotations + +from datetime import datetime + +from openlineage.client.event_v2 import InputDataset, OutputDataset + +from airflow import DAG +from airflow.providers.common.compat.sdk import BaseOperator +from airflow.providers.openlineage.extractors.base import OperatorLineage + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + + +class OnlyStartOp(BaseOperator): + def execute(self, context): + pass + + def get_openlineage_facets_on_start(self) -> OperatorLineage: + return OperatorLineage( + run_facets={"only-start-run": {"key": "value"}}, + job_facets={"only-start-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://only-start", + name="input.csv", + facets={"only-start-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://only-start", + name="output.csv", + facets={"only-start-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + +class OnlyCompleteOp(BaseOperator): + def execute(self, context): + pass + + def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"only-complete-run": {"key": "value"}}, + job_facets={"only-complete-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://only-complete", + name="input.csv", + facets={"only-complete-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://only-complete", + name="output.csv", + facets={"only-complete-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + +class OnlyFailureOp(BaseOperator): + def execute(self, context): + pass + + def get_openlineage_facets_on_failure(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"only-failure-run": {"key": "value"}}, + job_facets={"only-failure-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://only-failure", + name="failure_input.csv", + facets={"only-failure-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://only-failure", + name="output.csv", + facets={"only-failure-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + +class StartCompleteOp(BaseOperator): + def execute(self, context): + pass + + def get_openlineage_facets_on_start(self) -> OperatorLineage: + return OperatorLineage( + run_facets={"sc-start-run": {"key": "value"}}, + job_facets={"sc-start-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://start-complete", + name="start_input.csv", + facets={"sc-start-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://start-complete", + name="start_output.csv", + facets={"sc-start-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"sc-complete-run": {"key": "value"}}, + job_facets={"sc-complete-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://start-complete", + name="complete_input.csv", + facets={"sc-complete-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://start-complete", + name="complete_output.csv", + facets={"sc-complete-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + +class StartFailureOp(BaseOperator): + def execute(self, context): + pass + + def get_openlineage_facets_on_start(self) -> OperatorLineage: + return OperatorLineage( + run_facets={"sf-start-run": {"key": "value"}}, + job_facets={"sf-start-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://start-failure", + name="start_input.csv", + facets={"sf-start-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://start-failure", + name="start_output.csv", + facets={"sf-start-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + def get_openlineage_facets_on_failure(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"sf-failure-run": {"key": "value"}}, + job_facets={"sf-failure-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://start-failure", + name="failure_input.csv", + facets={"sf-failure-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://start-failure", + name="failure_output.csv", + facets={"sf-failure-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + +class CompleteFailureOp(BaseOperator): + def execute(self, context): + pass + + def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"cf-complete-run": {"key": "value"}}, + job_facets={"cf-complete-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://complete-failure", + name="complete_input.csv", + facets={"cf-complete-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://complete-failure", + name="complete_output.csv", + facets={"cf-complete-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + def get_openlineage_facets_on_failure(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"cf-failure-run": {"key": "value"}}, + job_facets={"cf-failure-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://complete-failure", + name="failure_input.csv", + facets={"cf-failure-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://complete-failure", + name="failure_output.csv", + facets={"cf-failure-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + +class AllThreeOp(BaseOperator): + def execute(self, context): + pass + + def get_openlineage_facets_on_start(self) -> OperatorLineage: + return OperatorLineage( + run_facets={"at-start-run": {"key": "value"}}, + job_facets={"at-start-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://all-three", + name="start_input.csv", + facets={"at-start-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://all-three", + name="start_output.csv", + facets={"at-start-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"at-complete-run": {"key": "value"}}, + job_facets={"at-complete-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://all-three", + name="complete_input.csv", + facets={"at-complete-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://all-three", + name="complete_output.csv", + facets={"at-complete-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + def get_openlineage_facets_on_failure(self, task_instance) -> OperatorLineage: + return OperatorLineage( + run_facets={"at-failure-run": {"key": "value"}}, + job_facets={"at-failure-job": {"key": "value"}}, + inputs=[ + InputDataset( + namespace="s3://all-three", + name="failure_input.csv", + facets={"at-failure-input-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + outputs=[ + OutputDataset( + namespace="s3://all-three", + name="failure_output.csv", + facets={"at-failure-output-ds": {"key": "value"}}, # type: ignore[dict-item] + ) + ], + ) + + +DAG_ID = "openlineage_custom_operator_ol_methods_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=None, + catchup=False, + default_args={"retries": 0}, +) as dag: + only_start_task = OnlyStartOp(task_id="only_start_task") + only_complete_task = OnlyCompleteOp(task_id="only_complete_task") + only_failure_task = OnlyFailureOp(task_id="only_failure_task") + start_complete_task = StartCompleteOp(task_id="start_complete_task") + start_failure_task = StartFailureOp(task_id="start_failure_task") + complete_failure_task = CompleteFailureOp(task_id="complete_failure_task") + all_three_task = AllThreeOp(task_id="all_three_task") + + check_events = OpenLineageTestOperator( + task_id="check_events", file_path=get_expected_event_file_path(DAG_ID) + ) + + only_start_task >> only_complete_task + only_complete_task >> only_failure_task + only_failure_task >> start_complete_task + start_complete_task >> start_failure_task + start_failure_task >> complete_failure_task + complete_failure_task >> all_three_task + all_three_task >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_defer_simple_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_defer_simple_dag.py index 28dfc81840a43..1925d0209373e 100644 --- a/providers/openlineage/tests/system/openlineage/example_openlineage_defer_simple_dag.py +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_defer_simple_dag.py @@ -42,8 +42,8 @@ default_args={"retries": 0}, ) as dag: # Timedelta is compared to the DAGRun start timestamp, which can occur long before a worker picks up the - # task. We need to ensure the sensor gets deferred at least once, so setting 120s. - wait = TimeDeltaSensor(task_id="wait", delta=timedelta(seconds=120), poke_interval=10, deferrable=True) + # task. We need to ensure the sensor gets deferred at least once, so setting 180s. + wait = TimeDeltaSensor(task_id="wait", delta=timedelta(seconds=180), poke_interval=10, deferrable=True) check_events = OpenLineageTestOperator( task_id="check_events", diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_edge_labels_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_edge_labels_dag.py new file mode 100644 index 0000000000000..d869005bbb7ec --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_edge_labels_dag.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with edge labels on task dependencies. + +It checks: + - edge labels appear in AirflowJobFacet.tasks[task_id].downstream_task_edges + - labeled edges are serialized as {target_id: {"label": "..."}} + - unlabeled edges are serialized as {target_id: {}} (no label key) + - multiple labeled edges from a single task are all captured + - a task with mixed labeled and unlabeled outgoing edges captures both correctly + - a task with no labels at all serializes all its edges as {} +""" + +from __future__ import annotations + +from datetime import datetime + +from airflow import DAG +from airflow.providers.standard.operators.bash import BashOperator +from airflow.utils.edgemodifier import Label + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + +DAG_ID = "openlineage_edge_labels_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=None, + catchup=False, + default_args={"retries": 0}, +) as dag: + # task_1: all outgoing edges are labeled (fan-out with labels) + task_1 = BashOperator(task_id="task_1", bash_command="exit 0") + task_2 = BashOperator(task_id="task_2", bash_command="exit 0") + task_3 = BashOperator(task_id="task_3", bash_command="exit 0") + # task_2: mixed — one labeled edge to task_4 and one unlabeled edge to task_5 + task_4 = BashOperator(task_id="task_4", bash_command="exit 0") + task_5 = BashOperator(task_id="task_5", bash_command="exit 0") + # task_5: no labels at all — both incoming and outgoing edges are unlabeled + task_6 = BashOperator(task_id="task_6", bash_command="exit 0") + + check_events = OpenLineageTestOperator( + task_id="check_events", file_path=get_expected_event_file_path(DAG_ID) + ) + + task_1 >> Label("success path") >> task_2 + task_1 >> Label("alternate path") >> task_3 + task_2 >> Label("follow-up") >> task_4 + task_2 >> task_5 + task_5 >> task_6 + [task_3, task_4, task_6] >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_hitl_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_hitl_dag.py new file mode 100644 index 0000000000000..6227c12fed861 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_hitl_dag.py @@ -0,0 +1,135 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with all four HITL operator variants (requires Airflow 3.1+). + +It checks: + - AirflowJobFacet.tasks[task_id] includes hitl_summary with subject, options, defaults + - task START event airflow run facet includes hitl_summary attribute + - task COMPLETE event is emitted after response_timeout auto-applies defaults + - ApprovalOperator COMPLETE event has hitl_summary.approved + - HITLBranchOperator COMPLETE event has hitl_summary.branches_to_execute + - HITLEntryOperator and base HITLOperator emit correct events +""" + +from __future__ import annotations + +from datetime import datetime, timedelta + +from airflow import DAG +from airflow.providers.standard.operators.bash import BashOperator +from airflow.providers.standard.operators.hitl import ( + ApprovalOperator, + HITLBranchOperator, + HITLEntryOperator, + HITLOperator, +) +from airflow.sdk.definitions.param import Param +from airflow.utils.trigger_rule import TriggerRule + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + +DAG_ID = "openlineage_hitl_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=None, + catchup=False, + default_args={"retries": 0}, +) as dag: + # Base HITLOperator — auto-approves "Yes" after timeout; + # assigned_users exercises non-null serialization of that field + base_hitl = HITLOperator( + task_id="base_hitl", + subject="Base HITL subject", + body="Base HITL body for OpenLineage system test.", + options=["Yes", "No"], + defaults=["Yes"], + response_timeout=timedelta(seconds=2), + assigned_users=[{"id": "user-001", "name": "Test User"}], + ) + + # ApprovalOperator — auto-approves after timeout; + # assigned_users tests serialization on ApprovalOperator specifically + approval = ApprovalOperator( + task_id="approval", + subject="Approval subject", + body="Approval body for OpenLineage system test.", + defaults=["Approve"], + response_timeout=timedelta(seconds=2), + assigned_users=[{"id": "user-002", "name": "Approver"}], + ) + + # HITLBranchOperator — routes to path_a_task after timeout via default; + # options_mapping exercises non-empty mapping serialization + branch_hitl = HITLBranchOperator( + task_id="branch_hitl", + subject="Branch HITL subject", + body="Branch HITL body — routes to path_a.", + options=["Some task", "Another task"], + defaults=["Some task"], + response_timeout=timedelta(seconds=2), + assigned_users=[{"id": "user-003", "name": "Branch User"}], + options_mapping={"Some task": "path_a_task", "Another task": "path_b_task"}, + ) + path_a_task = BashOperator(task_id="path_a_task", bash_command="exit 0;") + path_b_task = BashOperator(task_id="path_b_task", bash_command="exit 0;") + + # HITLEntryOperator — accepts user input form, auto-completes after timeout; + # multiple=True exercises the non-default value of that field; + # params exercises serialized_params serialization (source="task" required for filter) + entry_hitl = HITLEntryOperator( + task_id="entry_hitl", + subject="Entry HITL subject", + body="Entry HITL body for OpenLineage system test.", + defaults=["OK"], + multiple=True, + response_timeout=timedelta(seconds=2), + trigger_rule=TriggerRule.ALL_DONE, + assigned_users=[{"id": "user-004", "name": "Entry User"}], + params={ + "note": Param( + "default note", + description="Optional feedback note", + source="task", + type="string", + ) + }, + ) + + check_events = OpenLineageTestOperator( + task_id="check_events", + file_path=get_expected_event_file_path(DAG_ID), + # HITL operators defer at least once, causing two START events (initial run + resume). + event_count_assertions={ + f"{DAG_ID}.base_hitl.event.start": ">=2", + f"{DAG_ID}.approval.event.start": ">=2", + f"{DAG_ID}.branch_hitl.event.start": ">=2", + f"{DAG_ID}.entry_hitl.event.start": ">=2", + }, + ) + + base_hitl >> approval >> branch_hitl >> [path_a_task, path_b_task] + [path_a_task, path_b_task] >> entry_hitl >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_manual_lineage_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_manual_lineage_dag.py new file mode 100644 index 0000000000000..a829184c989d9 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_manual_lineage_dag.py @@ -0,0 +1,255 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG exercising the public OpenLineage manual lineage API: emit_dataset_lineage and emit_query_lineage. + +It checks: + - emit_dataset_lineage produces a RUNNING event with inputs/outputs + - emit_dataset_lineage accepts additional_run_facets and additional_job_facets + - datasets can carry dataset-level facets (DataQualityAssertionsDatasetFacet) + - emit_query_lineage produces a START + COMPLETE event pair + - query_text attaches a sql job facet + - query_id + query_source_namespace attaches external query run facet + - multiple emit_query_lineage calls increment the job name counter (.1, .2, .3) + - failed query (is_successful=False) produces a FAIL event with error message run facet + - counter resets to .1 in a new task (no cross-task spill) + - explicit job_name= bypasses the counter + - explicit task_instance= bypasses context resolution inside the helper +""" + +from __future__ import annotations + +import datetime as dt + +from openlineage.client.event_v2 import Dataset +from openlineage.client.facet_v2 import data_quality_assertions_dataset, source_code_location_job + +from airflow.providers.openlineage.api import emit_dataset_lineage, emit_query_lineage + +try: + from airflow.sdk import dag, get_current_context, task +except ImportError: + from airflow.decorators import dag, task # type: ignore[no-redef, attr-defined] + from airflow.operators.python import get_current_context # type: ignore[no-redef] + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + + +def _dataset_with_assertions(namespace: str, name: str) -> Dataset: + return Dataset( + namespace=namespace, + name=name, + facets={ + "dataQualityAssertions": data_quality_assertions_dataset.DataQualityAssertionsDatasetFacet( # type: ignore[dict-item] + assertions=[ + data_quality_assertions_dataset.Assertion( + assertion="not_null", success=True, column="id" + ), + data_quality_assertions_dataset.Assertion(assertion="unique", success=True, column="id"), + ], + ), + }, + ) + + +DAG_ID = "openlineage_manual_lineage_dag" + + +@dag( + dag_id=DAG_ID, + start_date=dt.datetime(2024, 1, 1), + schedule=None, + catchup=False, + default_args={"retries": 0}, +) +def openlineage_manual_lineage_dag(): + @task + def datasets_minimal() -> None: + """Minimal emit_dataset_lineage — just one input dataset.""" + emit_dataset_lineage( + inputs=[Dataset(namespace="s3://example-bucket", name="raw/orders.csv")], + ) + + @task + def datasets_maximal() -> None: + """Maximal emit_dataset_lineage — every kwarg set, datasets with facets.""" + ctx = get_current_context() + ti = ctx["task_instance"] + + emit_dataset_lineage( + inputs=[_dataset_with_assertions("s3://example-bucket", "raw/2024/01/01/orders.csv")], + outputs=[ + _dataset_with_assertions("snowflake://example-acct", "analytics.public.orders_enriched") + ], + task_instance=ti, + additional_run_facets={"my_custom_run_facet": {"key": "value"}}, # type: ignore[dict-item] + additional_job_facets={ + "sourceCodeLocation": source_code_location_job.SourceCodeLocationJobFacet( + type="git", + url="https://github.com/apache/airflow", + repoUrl="https://github.com/apache/airflow", + path="providers/openlineage/tests/system/openlineage/example_openlineage_manual_lineage_dag.py", + version="main", + branch="main", + ), + }, + raise_on_error=True, + ) + + @task + def query_minimal() -> None: + """Minimal emit_query_lineage — query_id + source only.""" + emit_query_lineage( + query_id="qid-min-1", + query_source_namespace="snowflake://example-acct", + ) + + @task + def query_maximal() -> None: + """Maximal emit_query_lineage — every kwarg set.""" + ctx = get_current_context() + ti = ctx["task_instance"] + + start = dt.datetime(2024, 5, 1, 10, 0, 0, tzinfo=dt.timezone.utc) + end = dt.datetime(2024, 5, 1, 10, 0, 5, tzinfo=dt.timezone.utc) + + emit_query_lineage( + query_id="qid-max-1", + query_source_namespace="snowflake://example-acct", + query_text=( + "INSERT INTO analytics.public.user_events_summary " + "SELECT user_id, COUNT(*) FROM analytics.public.user_events GROUP BY user_id" + ), + inputs=[ + _dataset_with_assertions("snowflake://example-acct", "analytics.public.user_events_extra") + ], + outputs=[ + _dataset_with_assertions("snowflake://example-acct", "analytics.public.user_events_summary") + ], + start_time=start, + end_time=end, + is_successful=True, + default_database="ANALYTICS", + default_schema="public", + task_instance=ti, + raise_on_error=True, + ) + + @task + def query_multiple_in_one_task() -> None: + """Three emit_query_lineage calls — counter goes .1, .2, .3; third call is FAIL.""" + emit_query_lineage( + query_id="qid-multi-1", + query_source_namespace="snowflake://example-acct", + query_text="SELECT id, email FROM analytics.public.users WHERE active = true", + ) + emit_query_lineage( + query_id="qid-multi-2", + query_source_namespace="snowflake://example-acct", + query_text="SELECT * FROM analytics.public.orders WHERE created_at > '2024-01-01'", + ) + emit_query_lineage( + query_id="qid-multi-3", + query_source_namespace="snowflake://example-acct", + query_text="SELECT broken(", + is_successful=False, + error_message="syntax error at or near 'broken'", + ) + + @task + def query_isolated_task() -> None: + """Single call in a new task — counter resets to .1, proving no cross-task spill.""" + emit_query_lineage( + query_id="qid-isolated-1", + query_source_namespace="snowflake://example-acct", + query_text="SELECT 1", + ) + + @task + def query_with_explicit_job_name() -> None: + """Explicit job_name= bypasses the counter; counter stays unset after the call.""" + emit_query_lineage( + query_id="qid-explicit-name", + query_source_namespace="snowflake://example-acct", + query_text="SELECT version()", + job_name="custom_job_name_set_by_caller", + ) + + @task + def query_with_explicit_task_instance() -> None: + """Explicit task_instance= bypasses context resolution inside the helper.""" + ti = get_current_context()["task_instance"] + emit_query_lineage( + query_id="qid-explicit-ti", + query_source_namespace="snowflake://example-acct", + query_text="SELECT now()", + task_instance=ti, + ) + + check_events = OpenLineageTestOperator( + task_id="check_events", + file_path=get_expected_event_file_path(DAG_ID), + event_count_assertions={ + # emit_dataset_lineage produces a RUNNING event + f"{DAG_ID}.datasets_minimal.event.running": "==1", + f"{DAG_ID}.datasets_maximal.event.running": "==1", + # minimal query: 1 start + 1 complete + f"{DAG_ID}.query_minimal.manual_query.1.event.start": "==1", + f"{DAG_ID}.query_minimal.manual_query.1.event.complete": "==1", + # maximal query + f"{DAG_ID}.query_maximal.manual_query.1.event.start": "==1", + f"{DAG_ID}.query_maximal.manual_query.1.event.complete": "==1", + # three calls in one task: .1 and .2 complete, .3 fail + f"{DAG_ID}.query_multiple_in_one_task.manual_query.1.event.start": "==1", + f"{DAG_ID}.query_multiple_in_one_task.manual_query.1.event.complete": "==1", + f"{DAG_ID}.query_multiple_in_one_task.manual_query.2.event.start": "==1", + f"{DAG_ID}.query_multiple_in_one_task.manual_query.2.event.complete": "==1", + f"{DAG_ID}.query_multiple_in_one_task.manual_query.3.event.start": "==1", + f"{DAG_ID}.query_multiple_in_one_task.manual_query.3.event.fail": "==1", + # isolated task resets counter to .1 + f"{DAG_ID}.query_isolated_task.manual_query.1.event.start": "==1", + f"{DAG_ID}.query_isolated_task.manual_query.1.event.complete": "==1", + # explicit job_name bypasses counter + "custom_job_name_set_by_caller.event.start": "==1", + "custom_job_name_set_by_caller.event.complete": "==1", + # explicit task_instance + f"{DAG_ID}.query_with_explicit_task_instance.manual_query.1.event.start": "==1", + f"{DAG_ID}.query_with_explicit_task_instance.manual_query.1.event.complete": "==1", + }, + ) + + ( + datasets_minimal() + >> datasets_maximal() + >> query_minimal() + >> query_maximal() + >> query_multiple_in_one_task() + >> query_isolated_task() + >> query_with_explicit_job_name() + >> query_with_explicit_task_instance() + >> check_events + ) + + +openlineage_manual_lineage_dag() + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_cron_trigger_timetable_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_cron_trigger_timetable_dag.py new file mode 100644 index 0000000000000..2a0979c2d8e7a --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_cron_trigger_timetable_dag.py @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with CronTriggerTimetable as a standalone schedule (not embedded in AssetOrTimeSchedule). + +It checks: + - CronTriggerTimetable serialization in the airflow dag run facet + - timetable_summary shows the cron expression + - timetable dict contains cron expression and timezone +""" + +from __future__ import annotations + +from datetime import datetime + +from airflow import DAG +from airflow.providers.standard.operators.bash import BashOperator +from airflow.timetables.trigger import CronTriggerTimetable + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + +DAG_ID = "openlineage_schedule_cron_trigger_timetable_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=CronTriggerTimetable("21 13 29 2 4", timezone="UTC"), + catchup=False, + default_args={"retries": 0}, +) as dag: + do_nothing_task = BashOperator(task_id="do_nothing_task", bash_command="exit 0;") + + check_events = OpenLineageTestOperator( + task_id="check_events", file_path=get_expected_event_file_path(DAG_ID) + ) + + do_nothing_task >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_delta_trigger_timetable_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_delta_trigger_timetable_dag.py new file mode 100644 index 0000000000000..7ed88d38e6811 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_delta_trigger_timetable_dag.py @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with DeltaTriggerTimetable schedule. + +It checks: + - DeltaTriggerTimetable serialization in the airflow dag run facet + - timetable dict contains the delta (timedelta) representation + - timetable_summary reflects the delta value +""" + +from __future__ import annotations + +from datetime import datetime, timedelta + +from airflow import DAG +from airflow.providers.standard.operators.bash import BashOperator +from airflow.timetables.trigger import DeltaTriggerTimetable + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + +DAG_ID = "openlineage_schedule_delta_trigger_timetable_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=DeltaTriggerTimetable(timedelta(days=987)), + catchup=False, + default_args={"retries": 0}, +) as dag: + do_nothing_task = BashOperator(task_id="do_nothing_task", bash_command="exit 0;") + + check_events = OpenLineageTestOperator( + task_id="check_events", file_path=get_expected_event_file_path(DAG_ID) + ) + + do_nothing_task >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_multiple_cron_trigger_timetable_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_multiple_cron_trigger_timetable_dag.py new file mode 100644 index 0000000000000..7fe5e06d48b34 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_multiple_cron_trigger_timetable_dag.py @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with MultipleCronTriggerTimetable schedule. + +It checks: + - MultipleCronTriggerTimetable serialization in the airflow dag run facet + - timetable dict contains all cron expressions + - timetable_summary reflects multiple cron expressions +""" + +from __future__ import annotations + +from datetime import datetime + +from airflow import DAG +from airflow.providers.standard.operators.bash import BashOperator +from airflow.timetables.trigger import MultipleCronTriggerTimetable + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + +DAG_ID = "openlineage_schedule_multiple_cron_trigger_timetable_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=MultipleCronTriggerTimetable("21 13 29 2 4", "9 27 29 2 4", timezone="UTC"), + catchup=False, + default_args={"retries": 0}, +) as dag: + do_nothing_task = BashOperator(task_id="do_nothing_task", bash_command="exit 0;") + + check_events = OpenLineageTestOperator( + task_id="check_events", file_path=get_expected_event_file_path(DAG_ID) + ) + + do_nothing_task >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_timedelta_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_timedelta_dag.py new file mode 100644 index 0000000000000..db2370e855108 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_schedule_timedelta_dag.py @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +DAG with a timedelta shorthand schedule (produces DeltaDataIntervalTimetable). + +It checks: + - timedelta schedule serialization in the airflow dag run facet + - timetable dict contains the delta representation + - timetable_summary reflects the timedelta value +""" + +from __future__ import annotations + +from datetime import datetime, timedelta + +from airflow import DAG +from airflow.providers.standard.operators.bash import BashOperator + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + +DAG_ID = "openlineage_schedule_timedelta_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=timedelta(days=987), + catchup=False, + default_args={"retries": 0}, +) as dag: + do_nothing_task = BashOperator(task_id="do_nothing_task", bash_command="exit 0;") + + check_events = OpenLineageTestOperator( + task_id="check_events", file_path=get_expected_event_file_path(DAG_ID) + ) + + do_nothing_task >> check_events + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/example_openlineage_trigger_failed_dag.py b/providers/openlineage/tests/system/openlineage/example_openlineage_trigger_failed_dag.py new file mode 100644 index 0000000000000..6f25749a87f26 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/example_openlineage_trigger_failed_dag.py @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Simple DAG that triggers another simple DAG that fails. + +It checks: + - task's trigger_dag_id + - DAGRun START and FAIL events, for the triggered DAG + - propagation of OL parent and root info from DAGRun conf + - error message run facet on DAG FAIL event + - airflow state run facet on DAG FAIL event showing failed state +""" + +from __future__ import annotations + +from datetime import datetime + +from airflow import DAG +from airflow.providers.standard.operators.bash import BashOperator +from airflow.providers.standard.operators.empty import EmptyOperator +from airflow.providers.standard.operators.trigger_dagrun import TriggerDagRunOperator +from airflow.utils.trigger_rule import TriggerRule + +from system.openlineage.expected_events import get_expected_event_file_path +from system.openlineage.operator import OpenLineageTestOperator + +DAG_ID = "openlineage_trigger_failed_dag" + +with DAG( + dag_id=DAG_ID, + start_date=datetime(2021, 1, 1), + schedule=None, + catchup=False, + default_args={"retries": 0}, +) as dag: + trigger_dagrun = TriggerDagRunOperator( + task_id="trigger_dagrun", + trigger_dag_id="openlineage_trigger_failed_dag_child__notrigger", + trigger_run_id=f"openlineage_trigger_failed_dag_triggering_child_{datetime.now().isoformat()}", + wait_for_completion=True, + conf={ + "some_config": "value1", + "openlineage": { + "parentRunId": "3bb703d1-09c1-4a42-8da5-35a0b3216072", + "parentJobNamespace": "prod_biz", + "parentJobName": "get_files", + "rootParentRunId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e", + "rootParentJobNamespace": "prod_analytics", + "rootParentJobName": "generate_report_sales_e2e", + }, + }, + poke_interval=10, + ) + + empty_task_on_fail = EmptyOperator( + task_id="empty_task_on_fail", + trigger_rule=TriggerRule.ONE_FAILED, + ) + + check_events = OpenLineageTestOperator( + task_id="check_events", + file_path=get_expected_event_file_path(DAG_ID), + ) + + trigger_dagrun >> empty_task_on_fail >> check_events + + +with DAG( + dag_id="openlineage_trigger_failed_dag_child__notrigger", + start_date=datetime(2021, 1, 1), + schedule=None, + catchup=False, + tags=["first", "second@", "with'quote", 'z"e'], + doc_md="MD DAG doc", + description="DAG description", + default_args={"retries": 0}, +) as child_dag: + failing_task = BashOperator(task_id="failing_task", bash_command="exit 1;") + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: contributing-docs/testing/system_tests.rst) +test_run = get_test_run(dag) diff --git a/providers/openlineage/tests/system/openlineage/expected_events/__init__.py b/providers/openlineage/tests/system/openlineage/expected_events/__init__.py index 6b65722cb5372..74b1ba13a174b 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/__init__.py +++ b/providers/openlineage/tests/system/openlineage/expected_events/__init__.py @@ -17,6 +17,7 @@ from __future__ import annotations import os +import re from pathlib import Path from packaging.version import Version @@ -37,9 +38,12 @@ def get_expected_event_file_path(dag_id: str) -> str: 1. A file named `{dag_id}__af{major_version}_{minor_version}.json` (e.g., `example_dag__af2_10.json` for Airflow 2.10.x) - 2. A file named `{dag_id}__af{major_version}.json` + 2. A file named `{dag_id}__af{major_version}_{N}_plus.json` where N <= current minor version + (e.g., `example_dag__af3_3_plus.json` matches Airflow 3.3, 3.4, 3.5, …) + When multiple such files exist, the one with the highest N wins. + 3. A file named `{dag_id}__af{major_version}.json` (e.g., `example_dag__af3.json` for any Airflow 3.x version) - 3. A generic file named `{dag_id}.json` without version suffix + 4. A generic file named `{dag_id}.json` without version suffix (e.g., `example_dag.json` for any Airflow version) The function returns the path to the first existing file found in this order. @@ -59,17 +63,38 @@ def get_expected_event_file_path(dag_id: str) -> str: """ base_path = Path(__file__).parent - paths_to_check = ( - str(base_path / f"{dag_id}__af{AIRFLOW_VERSION.major}_{AIRFLOW_VERSION.minor}.json"), + # 1. Exact minor version match + exact_minor = str(base_path / f"{dag_id}__af{AIRFLOW_VERSION.major}_{AIRFLOW_VERSION.minor}.json") + if os.path.exists(exact_minor): + return exact_minor + + # 2. Minimum-version files: {dag_id}__af{major}_{N}_plus.json where N <= current minor + plus_candidates = [] + for f in base_path.glob(f"{dag_id}__af{AIRFLOW_VERSION.major}_*_plus.json"): + m = re.fullmatch( + rf"{re.escape(dag_id)}__af{AIRFLOW_VERSION.major}_(\d+)_plus\.json", + f.name, + ) + if m: + file_minor = int(m.group(1)) + if file_minor <= AIRFLOW_VERSION.minor: + plus_candidates.append((file_minor, str(f))) + if plus_candidates: + plus_candidates.sort(key=lambda x: x[0], reverse=True) + return plus_candidates[0][1] + + # 3. Major-only and 4. generic fallback + for path in ( str(base_path / f"{dag_id}__af{AIRFLOW_VERSION.major}.json"), str(base_path / f"{dag_id}.json"), - ) - - for path in paths_to_check: + ): if os.path.exists(path): return path raise ValueError( - f"Could not locate expected event files for dag_id {dag_id}. " - f"Expected one of the following files: `{paths_to_check}`" + f"Could not locate expected event files for dag_id {dag_id} " + f"(Airflow {AIRFLOW_VERSION.major}.{AIRFLOW_VERSION.minor}). " + f"Checked: {dag_id}__af{AIRFLOW_VERSION.major}_{AIRFLOW_VERSION.minor}.json, " + f"{dag_id}__af{AIRFLOW_VERSION.major}_N_plus.json (N<={AIRFLOW_VERSION.minor}), " + f"{dag_id}__af{AIRFLOW_VERSION.major}.json, {dag_id}.json" ) diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_all_facets_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_all_facets_dag.json new file mode 100644 index 0000000000000..77ed7dff4c27e --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_all_facets_dag.json @@ -0,0 +1,842 @@ +[ + { + "eventType": "START", + "inputs": [ + { + "namespace": "s3://all-facets-bucket", + "name": "input/data.csv", + "facets": { + "custom_input_ds_facet": { + "key": "value" + }, + "schema": { + "fields": [ + { + "name": "id", + "type": "INTEGER", + "description": "Unique row identifier", + "ordinal_position": 1 + }, + { + "name": "name", + "type": "VARCHAR", + "description": "Full name of the entity", + "ordinal_position": 2 + }, + { + "name": "address", + "type": "STRUCT", + "description": "Nested address record", + "ordinal_position": 3, + "fields": [ + { + "name": "street", + "type": "VARCHAR", + "description": "Street address line", + "ordinal_position": 1 + }, + { + "name": "city", + "type": "VARCHAR", + "description": "City name", + "ordinal_position": 2 + } + ] + } + ] + }, + "dataSource": { + "name": "all-facets-source", + "uri": "s3://all-facets-bucket" + }, + "columnLineage": { + "fields": { + "name": { + "inputFields": [ + { + "namespace": "s3://upstream", + "name": "upstream/data.csv", + "field": "full_name", + "transformations": [ + { + "type": "DIRECT", + "subtype": "IDENTITY", + "description": "Direct copy of the full_name field", + "masking": false + } + ] + } + ], + "transformationDescription": "Column passed through without modification", + "transformationType": "IDENTITY" + } + } + }, + "documentation": { + "description": "Input dataset for all-facets comprehensive serialization test", + "contentType": "text/plain" + }, + "inputStatistics": { + "rowCount": 1000, + "size": 8192, + "fileCount": 1 + }, + "dataQualityAssertions": { + "assertions": [ + { + "assertion": "not_null", + "success": true, + "column": "id", + "severity": "ERROR", + "name": "id_not_null_check", + "description": "Checks that id column has no null values", + "expected": "0 nulls", + "actual": "0 nulls", + "content": "{\"nullCount\": 0}", + "contentType": "application/json", + "params": { + "sample_size": "1000" + } + }, + { + "assertion": "row_count_above_threshold", + "success": true, + "severity": "WARNING", + "name": "row_count_check", + "description": "Checks that row count is above the minimum threshold", + "expected": ">= 100", + "actual": "1000" + } + ] + }, + "ownership": { + "owners": [ + { + "name": "team:data-engineering", + "type": "team" + }, + { + "name": "user:jane.smith@example.com", + "type": "user" + } + ] + }, + "tags": { + "tags": [ + { + "key": "env", + "value": "test", + "source": "airflow-system-test" + }, + { + "key": "source", + "value": "s3", + "source": "airflow-system-test" + }, + { + "key": "format", + "value": "csv", + "source": "airflow-system-test", + "field": "id" + } + ] + }, + "catalog": { + "framework": "iceberg", + "type": "TABLE", + "name": "all_facets_input", + "metadataUri": "s3://metastore/all_facets.json", + "warehouseUri": "s3://all-facets-bucket/warehouse", + "source": "s3://all-facets-bucket/catalog.json", + "catalogProperties": { + "location": "s3://all-facets-bucket/warehouse/all_facets_input" + } + }, + "dataQualityMetrics": { + "columnMetrics": { + "id": { + "nullCount": 0, + "distinctCount": 1000, + "sum": 500500.0, + "count": 1000, + "min": 1.0, + "max": 1000.0, + "quantiles": { + "0.25": 250.0, + "0.5": 500.0, + "0.75": 750.0 + } + }, + "name": { + "nullCount": 5, + "distinctCount": 980, + "count": 1000 + } + }, + "rowCount": 1000, + "bytes": 8192, + "fileCount": 1 + }, + "dataQualityMetricsInput": { + "columnMetrics": { + "id": { + "nullCount": 0, + "distinctCount": 1000, + "sum": 500500.0, + "count": 1000, + "min": 1.0, + "max": 1000.0, + "quantiles": { + "0.25": 250.0, + "0.5": 500.0, + "0.75": 750.0 + } + } + }, + "rowCount": 1000, + "bytes": 8192, + "fileCount": 1 + }, + "datasetType": { + "datasetType": "TABLE", + "subType": "ICEBERG_TABLE" + }, + "hierarchy": { + "hierarchy": [ + { + "type": "catalog", + "name": "aws-glue" + }, + { + "type": "database", + "name": "analytics" + }, + { + "type": "schema", + "name": "public" + } + ] + }, + "inputSubset": { + "inputCondition": { + "partitions": [ + { + "dimensions": { + "business_date": "2024-10-15", + "country": "PL" + }, + "identifier": "2024-01-01/us-east-1" + } + ], + "type": "partition" + } + } + } + } + ], + "outputs": [ + { + "namespace": "snowflake://account", + "name": "analytics.public.all_facets_output", + "facets": { + "custom_output_ds_facet": { + "key": "value" + }, + "outputStatistics": { + "rowCount": 500, + "size": 4096, + "fileCount": 1 + }, + "storage": { + "storageLayer": "snowflake", + "fileFormat": "table" + }, + "symlinks": { + "identifiers": [ + { + "namespace": "snowflake://account", + "name": "analytics.public.all_facets_output_alias", + "type": "TABLE" + } + ] + }, + "version": { + "datasetVersion": "v1.0.0" + }, + "lifecycleStateChange": { + "lifecycleStateChange": "CREATE", + "previousIdentifier": { + "name": "analytics.public.all_facets_output_v0", + "namespace": "snowflake://account" + } + }, + "outputSubset": { + "outputCondition": { + "partitions": [ + { + "dimensions": { + "business_date": "2024-10-15", + "country": "PL" + }, + "identifier": "2024-01-01" + } + ], + "type": "partition" + } + } + } + } + ], + "run": { + "facets": { + "custom_run_facet": { + "key": "value" + }, + "tags": { + "tags": [ + { + "key": "test_type", + "value": "all_facets", + "source": "airflow-system-test" + }, + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ result is string }}" + } + ] + }, + "externalQuery": { + "externalQueryId": "all-facets-query-id-001", + "source": "snowflake://account" + }, + "testRun": { + "tests": [ + { + "name": "all_facets_test", + "status": "success", + "severity": "WARNING", + "type": "integration", + "description": "Checks all facets are emitted and serialized correctly", + "expected": "all_facets_present", + "actual": "all_facets_present", + "content": "{\"facetCount\": 37}", + "contentType": "application/json", + "params": { + "batch": "1" + } + } + ] + }, + "errorMessage": { + "message": "Non-fatal warning detected during extraction phase", + "programmingLanguage": "python", + "stackTrace": "{{ result is string }}" + }, + "executionParameters": { + "parameters": [ + { + "key": "executor-cores", + "name": "executor-cores", + "description": "Number of CPU cores per executor", + "value": "4" + }, + { + "key": "executor-memory", + "name": "executor-memory", + "description": "Memory allocated per executor in gigabytes", + "value": "8g" + } + ] + }, + "extractionError": { + "totalTasks": 10, + "failedTasks": 1, + "errors": [ + { + "errorMessage": "Unable to parse column 'event_ts' as TIMESTAMP", + "stackTrace": "{{ result is string }}", + "task": "parse_schema", + "taskNumber": 3 + } + ] + }, + "jobDependencies": { + "upstream": [ + { + "job": { + "namespace": "default", + "name": "upstream_etl_job" + }, + "run": { + "runId": "00000000-0000-0000-0000-000000000001" + }, + "dependency_type": "WAIT", + "sequence_trigger_rule": "ALL_DONE", + "status_trigger_rule": "ALL_SUCCESS" + } + ], + "downstream": [], + "trigger_rule": "ALL_SUCCESS" + } + } + }, + "job": { + "name": "openlineage_all_facets_dag.all_facets_task", + "facets": { + "custom_job_facet": { + "key": "value" + }, + "tags": { + "tags": [ + { + "key": "domain", + "value": "data-platform", + "source": "airflow-system-test" + }, + { + "key": "team", + "value": "data-engineering", + "source": "airflow-system-test" + } + ] + }, + "sourceCodeLocation": { + "type": "git", + "url": "https://github.com/apache/airflow/blob/main/providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py", + "repoUrl": "https://github.com/apache/airflow", + "path": "providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py", + "version": "main", + "tag": "v2.10.0", + "branch": "main", + "pullRequestNumber": "12345" + }, + "documentation": { + "description": "All-facets test operator emitting every available OL facet" + }, + "sql": { + "query": "SELECT id, name FROM analytics.public.all_facets_output WHERE id IS NOT NULL", + "dialect": "snowflake_sql" + }, + "sourceCode": { + "language": "python", + "sourceCode": "{{ result is string }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "inputs": [ + { + "namespace": "s3://all-facets-bucket", + "name": "input/data.csv", + "facets": { + "schema": { + "fields": [ + { + "name": "id", + "type": "INTEGER", + "description": "Unique row identifier", + "ordinal_position": 1 + }, + { + "name": "name", + "type": "VARCHAR", + "description": "Full name of the entity", + "ordinal_position": 2 + }, + { + "name": "address", + "type": "STRUCT", + "description": "Nested address record", + "ordinal_position": 3, + "fields": [ + { + "name": "street", + "type": "VARCHAR", + "description": "Street address line", + "ordinal_position": 1 + }, + { + "name": "city", + "type": "VARCHAR", + "description": "City name", + "ordinal_position": 2 + } + ] + } + ] + }, + "dataSource": { + "name": "all-facets-source", + "uri": "s3://all-facets-bucket" + }, + "columnLineage": { + "fields": { + "name": { + "inputFields": [ + { + "namespace": "s3://upstream", + "name": "upstream/data.csv", + "field": "full_name", + "transformations": [ + { + "type": "DIRECT", + "subtype": "IDENTITY", + "description": "Direct copy of the full_name field", + "masking": false + } + ] + } + ], + "transformationDescription": "Column passed through without modification", + "transformationType": "IDENTITY" + } + } + }, + "documentation": { + "description": "Input dataset for all-facets comprehensive serialization test", + "contentType": "text/plain" + }, + "inputStatistics": { + "rowCount": 1000, + "size": 8192, + "fileCount": 1 + }, + "dataQualityAssertions": { + "assertions": [ + { + "assertion": "not_null", + "success": true, + "column": "id", + "severity": "ERROR", + "name": "id_not_null_check", + "description": "Checks that id column has no null values", + "expected": "0 nulls", + "actual": "0 nulls", + "content": "{\"nullCount\": 0}", + "contentType": "application/json", + "params": { + "sample_size": "1000" + } + }, + { + "assertion": "row_count_above_threshold", + "success": true, + "severity": "WARNING", + "name": "row_count_check", + "description": "Checks that row count is above the minimum threshold", + "expected": ">= 100", + "actual": "1000" + } + ] + }, + "ownership": { + "owners": [ + { + "name": "team:data-engineering", + "type": "team" + }, + { + "name": "user:jane.smith@example.com", + "type": "user" + } + ] + }, + "tags": { + "tags": [ + { + "key": "env", + "value": "test", + "source": "airflow-system-test" + }, + { + "key": "source", + "value": "s3", + "source": "airflow-system-test" + }, + { + "key": "format", + "value": "csv", + "source": "airflow-system-test", + "field": "id" + } + ] + }, + "catalog": { + "framework": "iceberg", + "type": "TABLE", + "name": "all_facets_input", + "metadataUri": "s3://metastore/all_facets.json", + "warehouseUri": "s3://all-facets-bucket/warehouse", + "source": "s3://all-facets-bucket/catalog.json", + "catalogProperties": { + "location": "s3://all-facets-bucket/warehouse/all_facets_input" + } + }, + "dataQualityMetrics": { + "columnMetrics": { + "id": { + "nullCount": 0, + "distinctCount": 1000, + "sum": 500500.0, + "count": 1000, + "min": 1.0, + "max": 1000.0, + "quantiles": { + "0.25": 250.0, + "0.5": 500.0, + "0.75": 750.0 + } + }, + "name": { + "nullCount": 5, + "distinctCount": 980, + "count": 1000 + } + }, + "rowCount": 1000, + "bytes": 8192, + "fileCount": 1 + }, + "dataQualityMetricsInput": { + "columnMetrics": { + "id": { + "nullCount": 0, + "distinctCount": 1000, + "sum": 500500.0, + "count": 1000, + "min": 1.0, + "max": 1000.0, + "quantiles": { + "0.25": 250.0, + "0.5": 500.0, + "0.75": 750.0 + } + } + }, + "rowCount": 1000, + "bytes": 8192, + "fileCount": 1 + }, + "datasetType": { + "datasetType": "TABLE", + "subType": "ICEBERG_TABLE" + }, + "hierarchy": { + "hierarchy": [ + { + "type": "catalog", + "name": "aws-glue" + }, + { + "type": "database", + "name": "analytics" + }, + { + "type": "schema", + "name": "public" + } + ] + }, + "inputSubset": { + "inputCondition": { + "partitions": [ + { + "dimensions": { + "business_date": "2024-10-15", + "country": "PL" + }, + "identifier": "2024-01-01/us-east-1" + } + ], + "type": "partition" + } + } + } + } + ], + "outputs": [ + { + "namespace": "snowflake://account", + "name": "analytics.public.all_facets_output", + "facets": { + "outputStatistics": { + "rowCount": 500, + "size": 4096, + "fileCount": 1 + }, + "storage": { + "storageLayer": "snowflake", + "fileFormat": "table" + }, + "symlinks": { + "identifiers": [ + { + "namespace": "snowflake://account", + "name": "analytics.public.all_facets_output_alias", + "type": "TABLE" + } + ] + }, + "version": { + "datasetVersion": "v1.0.0" + }, + "lifecycleStateChange": { + "lifecycleStateChange": "CREATE", + "previousIdentifier": { + "name": "analytics.public.all_facets_output_v0", + "namespace": "snowflake://account" + } + }, + "outputSubset": { + "outputCondition": { + "partitions": [ + { + "dimensions": { + "business_date": "2024-10-15", + "country": "PL" + }, + "identifier": "2024-01-01" + } + ], + "type": "partition" + } + } + } + } + ], + "run": { + "facets": { + "tags": { + "tags": [ + { + "key": "test_type", + "value": "all_facets", + "source": "airflow-system-test" + }, + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ result is string }}" + } + ] + }, + "externalQuery": { + "externalQueryId": "all-facets-query-id-001", + "source": "snowflake://account" + }, + "testRun": { + "tests": [ + { + "name": "all_facets_test", + "status": "success", + "severity": "WARNING", + "type": "integration", + "description": "Checks all facets are emitted and serialized correctly", + "expected": "all_facets_present", + "actual": "all_facets_present", + "content": "{\"facetCount\": 37}", + "contentType": "application/json", + "params": { + "batch": "1" + } + } + ] + }, + "errorMessage": { + "message": "Non-fatal warning detected during extraction phase", + "programmingLanguage": "python", + "stackTrace": "{{ result is string }}" + }, + "executionParameters": { + "parameters": [ + { + "key": "executor-cores", + "name": "executor-cores", + "description": "Number of CPU cores per executor", + "value": "4" + }, + { + "key": "executor-memory", + "name": "executor-memory", + "description": "Memory allocated per executor in gigabytes", + "value": "8g" + } + ] + }, + "extractionError": { + "totalTasks": 10, + "failedTasks": 1, + "errors": [ + { + "errorMessage": "Unable to parse column 'event_ts' as TIMESTAMP", + "stackTrace": "{{ result is string }}", + "task": "parse_schema", + "taskNumber": 3 + } + ] + }, + "jobDependencies": { + "upstream": [ + { + "job": { + "namespace": "default", + "name": "upstream_etl_job" + }, + "run": { + "runId": "00000000-0000-0000-0000-000000000001" + }, + "dependency_type": "WAIT", + "sequence_trigger_rule": "ALL_DONE", + "status_trigger_rule": "ALL_SUCCESS" + } + ], + "downstream": [], + "trigger_rule": "ALL_SUCCESS" + } + } + }, + "job": { + "name": "openlineage_all_facets_dag.all_facets_task", + "facets": { + "tags": { + "tags": [ + { + "key": "domain", + "value": "data-platform", + "source": "airflow-system-test" + }, + { + "key": "team", + "value": "data-engineering", + "source": "airflow-system-test" + } + ] + }, + "sourceCodeLocation": { + "type": "git", + "url": "https://github.com/apache/airflow/blob/main/providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py", + "repoUrl": "https://github.com/apache/airflow", + "path": "providers/openlineage/tests/system/openlineage/example_openlineage_all_facets_dag.py", + "version": "main", + "tag": "v2.10.0", + "branch": "main", + "pullRequestNumber": "12345" + }, + "documentation": { + "description": "All-facets test operator emitting every available OL facet" + }, + "sql": { + "query": "SELECT id, name FROM analytics.public.all_facets_output WHERE id IS NOT NULL", + "dialect": "snowflake_sql" + }, + "sourceCode": { + "language": "python", + "sourceCode": "{{ result is string }}" + } + } + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_custom_operator_failure_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_custom_operator_failure_dag.json new file mode 100644 index 0000000000000..448e53e09b9a9 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_custom_operator_failure_dag.json @@ -0,0 +1,170 @@ +[ + { + "eventType": "START", + "inputs": [ + { + "namespace": "s3://failure-test", + "name": "before_fail.csv" + } + ], + "run": { + "facets": { + "custom_facet": { + "random_facet_key": null + } + } + }, + "outputs": [], + "job": { + "name": "openlineage_custom_operator_failure_dag.failing_task" + } + }, + { + "eventType": "FAIL", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "custom_facet": { + "random_facet_key": "123" + }, + "errorMessage": { + "message": "Intentional failure for OpenLineage on_failure testing", + "programmingLanguage": "python", + "stackTrace": "{{ result.startswith('Traceback (most recent call last):\n') and result.endswith('example_openlineage_custom_operator_failure_dag.py\", line 48, in execute\n raise ValueError(\"Intentional failure for OpenLineage on_failure testing\")\nValueError: Intentional failure for OpenLineage on_failure testing\n') }}" + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_custom_operator_failure_dag" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_custom_operator_failure_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "airflow": { + "dag": { + "dag_id": "openlineage_custom_operator_failure_dag", + "description": "custom_description", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_custom_operator_failure_dag.py') }}", + "owner": "{{ result.split(', ') | sort == ['airflow', 'some_owner1'] }}", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "tags": "{{ result[1:-1].split(', ') | sort == [\"'first'\", \"'second'\"] }}", + "timetable": {}, + "timetable_summary": "None" + }, + "dagRun": { + "conf": {}, + "dag_id": "openlineage_custom_operator_failure_dag", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result is string }}", + "run_type": "manual", + "start_date": "{{ is_datetime(result) }}" + }, + "taskInstance": { + "try_number": "{{ result is number }}" + }, + "task": { + "depends_on_past": false, + "downstream_task_ids": "['empty_success']", + "executor_config": {}, + "inlets": "[]", + "mapped": false, + "multiple_outputs": false, + "operator_class": "FailingOLOperator", + "operator_class_path": "{{ result.endswith('.FailingOLOperator') }}", + "outlets": "[]", + "owner": "some_owner1", + "priority_weight": 1, + "queue": "{{ result is string }}", + "retries": "{{ result is number }}", + "retry_exponential_backoff": false, + "task_id": "failing_task", + "trigger_rule": "all_success", + "upstream_task_ids": "[]", + "wait_for_downstream": false, + "wait_for_past_depends_before_skipping": false + }, + "taskUuid": "{{ is_uuid(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^(2|3)\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_custom_operator_failure_dag.failing_task", + "facets": { + "documentation": { + "description": "RST doc", + "contentType": "text/x-rst", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "some_owner1" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "tags": { + "tags": [ + { + "key": "first", + "value": "first", + "source": "AIRFLOW" + }, + { + "key": "second", + "value": "second", + "source": "AIRFLOW" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsJobFacet.json\\#\\/\\$defs\\/TagsJobFacet\") }}" + }, + "jobType": { + "integration": "AIRFLOW", + "jobType": "TASK", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + } + } + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_custom_operator_ol_methods_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_custom_operator_ol_methods_dag.json new file mode 100644 index 0000000000000..cd0da82faf917 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_custom_operator_ol_methods_dag.json @@ -0,0 +1,434 @@ +[ + { + "eventType": "START", + "run": { + "facets": { + "only-start-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.only_start_task", + "facets": { + "only-start-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://only-start", + "name": "input.csv", + "facets": { + "only-start-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://only-start", + "name": "output.csv", + "facets": { + "only-start-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "only-start-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.only_start_task", + "facets": { + "only-start-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://only-start", + "name": "input.csv", + "facets": { + "only-start-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://only-start", + "name": "output.csv", + "facets": { + "only-start-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "START", + "inputs": [], + "outputs": [], + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.only_complete_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "only-complete-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.only_complete_task", + "facets": { + "only-complete-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://only-complete", + "name": "input.csv", + "facets": { + "only-complete-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://only-complete", + "name": "output.csv", + "facets": { + "only-complete-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "START", + "inputs": [], + "outputs": [], + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.only_failure_task" + } + }, + { + "eventType": "COMPLETE", + "inputs": [], + "outputs": [], + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.only_failure_task" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "sc-start-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.start_complete_task", + "facets": { + "sc-start-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://start-complete", + "name": "start_input.csv", + "facets": { + "sc-start-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://start-complete", + "name": "start_output.csv", + "facets": { + "sc-start-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "sc-complete-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.start_complete_task", + "facets": { + "sc-complete-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://start-complete", + "name": "complete_input.csv", + "facets": { + "sc-complete-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://start-complete", + "name": "complete_output.csv", + "facets": { + "sc-complete-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "START", + "run": { + "facets": { + "sf-start-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.start_failure_task", + "facets": { + "sf-start-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://start-failure", + "name": "start_input.csv", + "facets": { + "sf-start-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://start-failure", + "name": "start_output.csv", + "facets": { + "sf-start-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "sf-start-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.start_failure_task", + "facets": { + "sf-start-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://start-failure", + "name": "start_input.csv", + "facets": { + "sf-start-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://start-failure", + "name": "start_output.csv", + "facets": { + "sf-start-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "START", + "inputs": [], + "outputs": [], + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.complete_failure_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "cf-complete-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.complete_failure_task", + "facets": { + "cf-complete-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://complete-failure", + "name": "complete_input.csv", + "facets": { + "cf-complete-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://complete-failure", + "name": "complete_output.csv", + "facets": { + "cf-complete-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "START", + "run": { + "facets": { + "at-start-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.all_three_task", + "facets": { + "at-start-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://all-three", + "name": "start_input.csv", + "facets": { + "at-start-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://all-three", + "name": "start_output.csv", + "facets": { + "at-start-output-ds": { + "key": "value" + } + } + } + ] + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "at-complete-run": { + "key": "value" + } + } + }, + "job": { + "name": "openlineage_custom_operator_ol_methods_dag.all_three_task", + "facets": { + "at-complete-job": { + "key": "value" + } + } + }, + "inputs": [ + { + "namespace": "s3://all-three", + "name": "complete_input.csv", + "facets": { + "at-complete-input-ds": { + "key": "value" + } + } + } + ], + "outputs": [ + { + "namespace": "s3://all-three", + "name": "complete_output.csv", + "facets": { + "at-complete-output-ds": { + "key": "value" + } + } + } + ] + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_docs_file_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_docs_file_dag.json index dcf29892ce4e1..1e46c36581cf6 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_docs_file_dag.json +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_docs_file_dag.json @@ -5,10 +5,10 @@ "name": "openlineage_docs_file_dag", "facets": { "documentation": { - "description": "# MD doc file", - "contentType": "text/markdown", - "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", - "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" + "description": "# MD doc file", + "contentType": "text/markdown", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" } } } @@ -19,10 +19,10 @@ "name": "openlineage_docs_file_dag.do_nothing_task", "facets": { "documentation": { - "description": "# MD doc file", - "contentType": "text/markdown", - "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", - "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" + "description": "# MD doc file", + "contentType": "text/markdown", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" } } } @@ -33,10 +33,10 @@ "name": "openlineage_docs_file_dag.do_nothing_task", "facets": { "documentation": { - "description": "# MD doc file", - "contentType": "text/markdown", - "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", - "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" + "description": "# MD doc file", + "contentType": "text/markdown", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" } } } diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_edge_labels_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_edge_labels_dag.json new file mode 100644 index 0000000000000..b2f84dfbf363a --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_edge_labels_dag.json @@ -0,0 +1,72 @@ +[ + { + "eventType": "START", + "job": { + "name": "openlineage_edge_labels_dag", + "facets": { + "airflow": { + "tasks": { + "task_1": { + "downstream_task_ids": [ + "task_2", + "task_3" + ], + "downstream_task_edges": { + "task_2": { + "label": "success path" + }, + "task_3": { + "label": "alternate path" + } + } + }, + "task_2": { + "downstream_task_ids": [ + "task_4", + "task_5" + ], + "downstream_task_edges": { + "task_4": { + "label": "follow-up" + }, + "task_5": null + } + }, + "task_3": { + "downstream_task_ids": [ + "check_events" + ], + "downstream_task_edges": { + "check_events": null + } + }, + "task_4": { + "downstream_task_ids": [ + "check_events" + ], + "downstream_task_edges": { + "check_events": null + } + }, + "task_5": { + "downstream_task_ids": [ + "task_6" + ], + "downstream_task_edges": { + "task_6": null + } + }, + "task_6": { + "downstream_task_ids": [ + "check_events" + ], + "downstream_task_edges": { + "check_events": null + } + } + } + } + } + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_hitl_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_hitl_dag.json new file mode 100644 index 0000000000000..ea82221347053 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_hitl_dag.json @@ -0,0 +1,353 @@ +[ + { + "$absent": "Skipped operator should not emit OL events.", + "eventType": "START", + "job": { + "name": "openlineage_hitl_dag.path_b_task" + } + }, + { + "$absent": "Skipped operator should not emit OL events.", + "eventType": "COMPLETE", + "job": { + "name": "openlineage_hitl_dag.path_b_task" + } + }, + { + "$absent": "Skipped operator should not emit OL events.", + "eventType": "FAIL", + "job": { + "name": "openlineage_hitl_dag.path_b_task" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "task": { + "hitl_summary": { + "subject": "Base HITL subject", + "body": "Base HITL body for OpenLineage system test.", + "options": [ + "Yes", + "No" + ], + "defaults": [ + "Yes" + ], + "multiple": false, + "assigned_users": [ + { + "id": "user-001", + "name": "Test User" + } + ], + "serialized_params": null + } + } + } + } + }, + "job": { + "name": "openlineage_hitl_dag.base_hitl" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "task": { + "hitl_summary": { + "subject": "Base HITL subject", + "body": "Base HITL body for OpenLineage system test.", + "options": [ + "Yes", + "No" + ], + "defaults": [ + "Yes" + ], + "multiple": false, + "assigned_users": [ + { + "id": "user-001", + "name": "Test User" + } + ], + "serialized_params": null, + "chosen_options": [ + "Yes" + ], + "params_input": {}, + "responded_at": "{{ is_datetime(result) }}", + "responded_by_user": null + } + } + } + } + }, + "job": { + "name": "openlineage_hitl_dag.base_hitl" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "task": { + "hitl_summary": { + "subject": "Approval subject", + "body": "Approval body for OpenLineage system test.", + "options": [ + "Approve", + "Reject" + ], + "defaults": [ + "Approve" + ], + "multiple": false, + "assigned_users": [ + { + "id": "user-002", + "name": "Approver" + } + ], + "serialized_params": null, + "ignore_downstream_trigger_rules": false, + "fail_on_reject": false + } + } + } + } + }, + "job": { + "name": "openlineage_hitl_dag.approval" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "task": { + "hitl_summary": { + "subject": "Approval subject", + "body": "Approval body for OpenLineage system test.", + "options": [ + "Approve", + "Reject" + ], + "defaults": [ + "Approve" + ], + "multiple": false, + "assigned_users": [ + { + "id": "user-002", + "name": "Approver" + } + ], + "serialized_params": null, + "ignore_downstream_trigger_rules": false, + "fail_on_reject": false, + "chosen_options": [ + "Approve" + ], + "params_input": {}, + "responded_at": "{{ is_datetime(result) }}", + "responded_by_user": null, + "approved": true + } + } + } + } + }, + "job": { + "name": "openlineage_hitl_dag.approval" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "task": { + "hitl_summary": { + "subject": "Branch HITL subject", + "body": "Branch HITL body — routes to path_a.", + "options": [ + "Some task", + "Another task" + ], + "defaults": [ + "Some task" + ], + "multiple": false, + "assigned_users": [ + { + "id": "user-003", + "name": "Branch User" + } + ], + "serialized_params": null, + "options_mapping": { + "Some task": "path_a_task", + "Another task": "path_b_task" + } + } + } + } + } + }, + "job": { + "name": "openlineage_hitl_dag.branch_hitl" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "task": { + "hitl_summary": { + "subject": "Branch HITL subject", + "body": "Branch HITL body — routes to path_a.", + "options": [ + "Some task", + "Another task" + ], + "defaults": [ + "Some task" + ], + "multiple": false, + "assigned_users": [ + { + "id": "user-003", + "name": "Branch User" + } + ], + "serialized_params": null, + "options_mapping": { + "Some task": "path_a_task", + "Another task": "path_b_task" + }, + "chosen_options": [ + "Some task" + ], + "params_input": {}, + "responded_at": "{{ is_datetime(result) }}", + "responded_by_user": null, + "branches_to_execute": [ + "path_a_task" + ] + } + } + } + } + }, + "job": { + "name": "openlineage_hitl_dag.branch_hitl" + } + }, + { + "eventType": "START", + "job": { + "name": "openlineage_hitl_dag.path_a_task" + } + }, + { + "eventType": "COMPLETE", + "job": { + "name": "openlineage_hitl_dag.path_a_task" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "task": { + "hitl_summary": { + "subject": "Entry HITL subject", + "body": "Entry HITL body for OpenLineage system test.", + "options": [ + "OK" + ], + "defaults": [ + "OK" + ], + "multiple": true, + "assigned_users": [ + { + "id": "user-004", + "name": "Entry User" + } + ], + "serialized_params": { + "note": { + "value": "default note", + "description": "Optional feedback note", + "schema": { + "type": "string" + } + } + } + } + } + } + } + }, + "job": { + "name": "openlineage_hitl_dag.entry_hitl" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "task": { + "hitl_summary": { + "subject": "Entry HITL subject", + "body": "Entry HITL body for OpenLineage system test.", + "options": [ + "OK" + ], + "defaults": [ + "OK" + ], + "multiple": true, + "assigned_users": [ + { + "id": "user-004", + "name": "Entry User" + } + ], + "serialized_params": { + "note": { + "value": "default note", + "description": "Optional feedback note", + "schema": { + "type": "string" + } + } + }, + "chosen_options": [ + "OK" + ], + "params_input": {}, + "responded_at": "{{ is_datetime(result) }}", + "responded_by_user": null + } + } + } + } + }, + "job": { + "name": "openlineage_hitl_dag.entry_hitl" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_manual_lineage_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_manual_lineage_dag.json new file mode 100644 index 0000000000000..0a0af4aabb817 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_manual_lineage_dag.json @@ -0,0 +1,1498 @@ +[ + { + "eventType": "RUNNING", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [ + { + "namespace": "s3://example-bucket", + "name": "raw/orders.csv" + } + ], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "airflow": { + "dag": { + "dag_id": "openlineage_manual_lineage_dag", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_manual_lineage_dag.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "tags": "[]", + "timetable": {}, + "timetable_summary": "None" + }, + "dagRun": { + "conf": {}, + "dag_id": "openlineage_manual_lineage_dag", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result is string }}", + "run_type": "manual", + "start_date": "{{ is_datetime(result) }}" + }, + "taskInstance": { + "try_number": "{{ result is number }}" + }, + "task": { + "depends_on_past": false, + "downstream_task_ids": "['datasets_maximal']", + "executor_config": {}, + "inlets": "[]", + "multiple_outputs": false, + "operator_class": "{{ result is string }}", + "operator_class_path": "{{ result is string }}", + "outlets": "[]", + "owner": "airflow", + "priority_weight": 1, + "queue": "{{ result is string }}", + "retries": 0, + "retry_exponential_backoff": false, + "task_id": "datasets_minimal", + "trigger_rule": "all_success", + "upstream_task_ids": "[]", + "wait_for_downstream": false, + "wait_for_past_depends_before_skipping": false + }, + "taskUuid": "{{ is_uuid(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "nominalTime": { + "nominalStartTime": "{{ is_datetime(result) }}", + "nominalEndTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.datasets_minimal", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "TASK", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + } + } + } + }, + { + "eventType": "RUNNING", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [ + { + "namespace": "s3://example-bucket", + "name": "raw/2024/01/01/orders.csv", + "facets": { + "dataQualityAssertions": { + "assertions": [ + { + "assertion": "not_null", + "success": true, + "column": "id" + }, + { + "assertion": "unique", + "success": true, + "column": "id" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DataQualityAssertionsDatasetFacet.json\\#\\/\\$defs\\/DataQualityAssertionsDatasetFacet$\") }}" + } + } + } + ], + "outputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.orders_enriched", + "facets": { + "dataQualityAssertions": { + "assertions": [ + { + "assertion": "not_null", + "success": true, + "column": "id" + }, + { + "assertion": "unique", + "success": true, + "column": "id" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DataQualityAssertionsDatasetFacet.json\\#\\/\\$defs\\/DataQualityAssertionsDatasetFacet$\") }}" + } + } + } + ], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "airflow": { + "dag": { + "dag_id": "openlineage_manual_lineage_dag", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_manual_lineage_dag.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "tags": "[]", + "timetable": {}, + "timetable_summary": "None" + }, + "dagRun": { + "conf": {}, + "dag_id": "openlineage_manual_lineage_dag", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result is string }}", + "run_type": "manual", + "start_date": "{{ is_datetime(result) }}" + }, + "taskInstance": { + "try_number": "{{ result is number }}" + }, + "task": { + "depends_on_past": false, + "downstream_task_ids": "['query_minimal']", + "executor_config": {}, + "inlets": "[]", + "multiple_outputs": false, + "operator_class": "{{ result is string }}", + "operator_class_path": "{{ result is string }}", + "outlets": "[]", + "owner": "airflow", + "priority_weight": 1, + "queue": "{{ result is string }}", + "retries": 0, + "retry_exponential_backoff": false, + "task_id": "datasets_maximal", + "trigger_rule": "all_success", + "upstream_task_ids": "['datasets_minimal']", + "wait_for_downstream": false, + "wait_for_past_depends_before_skipping": false + }, + "taskUuid": "{{ is_uuid(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "nominalTime": { + "nominalStartTime": "{{ is_datetime(result) }}", + "nominalEndTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + }, + "my_custom_run_facet": { + "key": "value" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.datasets_maximal", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "TASK", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "sourceCodeLocation": { + "type": "git", + "url": "https://github.com/apache/airflow", + "repoUrl": "https://github.com/apache/airflow", + "path": "providers/openlineage/tests/system/openlineage/example_openlineage_manual_lineage_dag.py", + "version": "main", + "branch": "main", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SourceCodeLocationJobFacet.json\\#\\/\\$defs\\/SourceCodeLocationJobFacet$\") }}" + } + } + } + }, + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_minimal" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-min-1", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_minimal.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_minimal" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-min-1", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_minimal.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + } + } + } + }, + { + "eventType": "START", + "eventTime": "2024-05-01T10:00:00+00:00", + "inputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.user_events_extra", + "facets": { + "dataQualityAssertions": { + "assertions": [ + { + "assertion": "not_null", + "success": true, + "column": "id" + }, + { + "assertion": "unique", + "success": true, + "column": "id" + } + ] + } + } + }, + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.user_events" + } + ], + "outputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.user_events_summary", + "facets": { + "dataQualityAssertions": { + "assertions": [ + { + "assertion": "not_null", + "success": true, + "column": "id" + }, + { + "assertion": "unique", + "success": true, + "column": "id" + } + ] + } + } + }, + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.user_events_summary" + } + ], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_maximal" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-max-1", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_maximal.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "INSERT INTO analytics.public.user_events_summary SELECT user_id, COUNT(*) FROM analytics.public.user_events GROUP BY user_id", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "2024-05-01T10:00:05+00:00", + "inputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.user_events_extra", + "facets": { + "dataQualityAssertions": { + "assertions": [ + { + "assertion": "not_null", + "success": true, + "column": "id" + }, + { + "assertion": "unique", + "success": true, + "column": "id" + } + ] + } + } + }, + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.user_events" + } + ], + "outputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.user_events_summary", + "facets": { + "dataQualityAssertions": { + "assertions": [ + { + "assertion": "not_null", + "success": true, + "column": "id" + }, + { + "assertion": "unique", + "success": true, + "column": "id" + } + ] + } + } + }, + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.user_events_summary" + } + ], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_maximal" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-max-1", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_maximal.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "INSERT INTO analytics.public.user_events_summary SELECT user_id, COUNT(*) FROM analytics.public.user_events GROUP BY user_id", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.users" + } + ], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-multi-1", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT id, email FROM analytics.public.users WHERE active = true", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.users" + } + ], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-multi-1", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT id, email FROM analytics.public.users WHERE active = true", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.orders" + } + ], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-multi-2", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task.manual_query.2", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT * FROM analytics.public.orders WHERE created_at > '2024-01-01'", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [ + { + "namespace": "snowflake://example-acct", + "name": "analytics.public.orders" + } + ], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-multi-2", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task.manual_query.2", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT * FROM analytics.public.orders WHERE created_at > '2024-01-01'", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-multi-3", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "errorMessage": { + "message": "syntax error at or near 'broken'", + "programmingLanguage": "SQL", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ErrorMessageRunFacet.json\\#\\/\\$defs\\/ErrorMessageRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task.manual_query.3", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT broken(", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "FAIL", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-multi-3", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "errorMessage": { + "message": "syntax error at or near 'broken'", + "programmingLanguage": "SQL", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ErrorMessageRunFacet.json\\#\\/\\$defs\\/ErrorMessageRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_multiple_in_one_task.manual_query.3", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT broken(", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_isolated_task" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-isolated-1", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_isolated_task.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT 1", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_isolated_task" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-isolated-1", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_isolated_task.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT 1", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_with_explicit_job_name" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-explicit-name", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "custom_job_name_set_by_caller", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT version()", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_with_explicit_job_name" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-explicit-name", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "custom_job_name_set_by_caller", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT version()", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_with_explicit_task_instance" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-explicit-ti", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_with_explicit_task_instance.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT now()", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_with_explicit_task_instance" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_manual_lineage_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "externalQuery": { + "externalQueryId": "qid-explicit-ti", + "source": "snowflake://example-acct", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ExternalQueryRunFacet.json\\#\\/\\$defs\\/ExternalQueryRunFacet$\") }}" + }, + "tags": { + "tags": [ + { + "key": "openlineage_client_version", + "source": "OPENLINEAGE_CLIENT", + "value": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsRunFacet.json\\#\\/\\$defs\\/TagsRunFacet\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_manual_lineage_dag.query_with_explicit_task_instance.manual_query.1", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "QUERY", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "sql": { + "query": "SELECT now()", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/SQLJobFacet.json\\#\\/\\$defs\\/SQLJobFacet$\") }}" + } + } + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_asset_or_time_dag__af3.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_asset_or_time_dag__af3.json index 509cc731e52d1..16b48be4bc16d 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_asset_or_time_dag__af3.json +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_asset_or_time_dag__af3.json @@ -54,7 +54,9 @@ "uri": "s3://bucket4/file.txt", "name": "s3://bucket4/file.txt", "group": "asset", - "extra": {"b": 2} + "extra": { + "b": 2 + } } ] } @@ -125,7 +127,9 @@ "uri": "s3://bucket4/file.txt", "name": "s3://bucket4/file.txt", "group": "asset", - "extra": {"b": 2} + "extra": { + "b": 2 + } } ] } @@ -196,7 +200,9 @@ "uri": "s3://bucket4/file.txt", "name": "s3://bucket4/file.txt", "group": "asset", - "extra": {"b": 2} + "extra": { + "b": 2 + } } ] } diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_cron_trigger_timetable_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_cron_trigger_timetable_dag.json new file mode 100644 index 0000000000000..e1743f662a435 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_cron_trigger_timetable_dag.json @@ -0,0 +1,65 @@ +[ + { + "eventType": "START", + "run": { + "facets": { + "airflowDagRun": { + "dag": { + "timetable": { + "expression": "21 13 29 2 4", + "timezone": "UTC", + "interval": 0.0, + "run_immediately": false + }, + "timetable_summary": "21 13 29 2 4" + } + } + } + }, + "job": { + "name": "openlineage_schedule_cron_trigger_timetable_dag" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "expression": "21 13 29 2 4", + "timezone": "UTC", + "interval": 0.0, + "run_immediately": false + }, + "timetable_summary": "21 13 29 2 4" + } + } + } + }, + "job": { + "name": "openlineage_schedule_cron_trigger_timetable_dag.do_nothing_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "expression": "21 13 29 2 4", + "timezone": "UTC", + "interval": 0.0, + "run_immediately": false + }, + "timetable_summary": "21 13 29 2 4" + } + } + } + }, + "job": { + "name": "openlineage_schedule_cron_trigger_timetable_dag.do_nothing_task" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_delta_trigger_timetable_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_delta_trigger_timetable_dag.json new file mode 100644 index 0000000000000..c3fb285644225 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_delta_trigger_timetable_dag.json @@ -0,0 +1,59 @@ +[ + { + "eventType": "START", + "run": { + "facets": { + "airflowDagRun": { + "dag": { + "timetable": { + "delta": 85276800.0, + "interval": 0.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_delta_trigger_timetable_dag" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "delta": 85276800.0, + "interval": 0.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_delta_trigger_timetable_dag.do_nothing_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "delta": 85276800.0, + "interval": 0.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_delta_trigger_timetable_dag.do_nothing_task" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_list_complex_assets_dag__af3.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_list_complex_assets_dag__af3.json index 003e149339dec..2afd6936a5fc8 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_list_complex_assets_dag__af3.json +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_list_complex_assets_dag__af3.json @@ -11,51 +11,52 @@ "objects": [ { "__type": "asset_all", - "objects": [ - { - "__type": "asset_any", - "objects": [ - { - "__type": "asset", - "uri": "s3://bucket/file.txt", - "name": "s3://bucket/file.txt", - "group": "asset", - "extra": { - "a": 1 - } - }, - { - "__type": "asset", - "uri": "s3://bucket2/file.txt", - "name": "s3://bucket2/file.txt", - "group": "asset", - "extra": {} - } - ] - }, - { - "__type": "asset_any", "objects": [ { - "__type": "asset", - "uri": "s3://bucket3/file.txt", - "name": "s3://bucket3/file.txt", - "group": "asset", - "extra": {} + "__type": "asset_any", + "objects": [ + { + "__type": "asset", + "uri": "s3://bucket/file.txt", + "name": "s3://bucket/file.txt", + "group": "asset", + "extra": { + "a": 1 + } + }, + { + "__type": "asset", + "uri": "s3://bucket2/file.txt", + "name": "s3://bucket2/file.txt", + "group": "asset", + "extra": {} + } + ] }, { - "__type": "asset", - "uri": "s3://bucket4/file.txt", - "name": "s3://bucket4/file.txt", - "group": "asset", - "extra": {"b": 2} + "__type": "asset_any", + "objects": [ + { + "__type": "asset", + "uri": "s3://bucket3/file.txt", + "name": "s3://bucket3/file.txt", + "group": "asset", + "extra": {} + }, + { + "__type": "asset", + "uri": "s3://bucket4/file.txt", + "name": "s3://bucket4/file.txt", + "group": "asset", + "extra": { + "b": 2 + } + } + ] } ] } ] - } - ] - } }, "timetable_summary": "Asset" @@ -79,51 +80,52 @@ "objects": [ { "__type": "asset_all", - "objects": [ - { - "__type": "asset_any", "objects": [ { - "__type": "asset", - "uri": "s3://bucket/file.txt", - "name": "s3://bucket/file.txt", - "group": "asset", - "extra": { - "a": 1 - } + "__type": "asset_any", + "objects": [ + { + "__type": "asset", + "uri": "s3://bucket/file.txt", + "name": "s3://bucket/file.txt", + "group": "asset", + "extra": { + "a": 1 + } + }, + { + "__type": "asset", + "uri": "s3://bucket2/file.txt", + "name": "s3://bucket2/file.txt", + "group": "asset", + "extra": {} + } + ] }, { - "__type": "asset", - "uri": "s3://bucket2/file.txt", - "name": "s3://bucket2/file.txt", - "group": "asset", - "extra": {} + "__type": "asset_any", + "objects": [ + { + "__type": "asset", + "uri": "s3://bucket3/file.txt", + "name": "s3://bucket3/file.txt", + "group": "asset", + "extra": {} + }, + { + "__type": "asset", + "uri": "s3://bucket4/file.txt", + "name": "s3://bucket4/file.txt", + "group": "asset", + "extra": { + "b": 2 + } + } + ] } ] - }, - { - "__type": "asset_any", - "objects": [ - { - "__type": "asset", - "uri": "s3://bucket3/file.txt", - "name": "s3://bucket3/file.txt", - "group": "asset", - "extra": {} - }, - { - "__type": "asset", - "uri": "s3://bucket4/file.txt", - "name": "s3://bucket4/file.txt", - "group": "asset", - "extra": {"b": 2} - } - ] - } - ] } ] - } }, "timetable_summary": "Asset" @@ -147,51 +149,52 @@ "objects": [ { "__type": "asset_all", - "objects": [ - { - "__type": "asset_any", - "objects": [ - { - "__type": "asset", - "uri": "s3://bucket/file.txt", - "name": "s3://bucket/file.txt", - "group": "asset", - "extra": { - "a": 1 - } - }, - { - "__type": "asset", - "uri": "s3://bucket2/file.txt", - "name": "s3://bucket2/file.txt", - "group": "asset", - "extra": {} - } - ] - }, - { - "__type": "asset_any", "objects": [ { - "__type": "asset", - "uri": "s3://bucket3/file.txt", - "name": "s3://bucket3/file.txt", - "group": "asset", - "extra": {} + "__type": "asset_any", + "objects": [ + { + "__type": "asset", + "uri": "s3://bucket/file.txt", + "name": "s3://bucket/file.txt", + "group": "asset", + "extra": { + "a": 1 + } + }, + { + "__type": "asset", + "uri": "s3://bucket2/file.txt", + "name": "s3://bucket2/file.txt", + "group": "asset", + "extra": {} + } + ] }, { - "__type": "asset", - "uri": "s3://bucket4/file.txt", - "name": "s3://bucket4/file.txt", - "group": "asset", - "extra": {"b": 2} + "__type": "asset_any", + "objects": [ + { + "__type": "asset", + "uri": "s3://bucket3/file.txt", + "name": "s3://bucket3/file.txt", + "group": "asset", + "extra": {} + }, + { + "__type": "asset", + "uri": "s3://bucket4/file.txt", + "name": "s3://bucket4/file.txt", + "group": "asset", + "extra": { + "b": 2 + } + } + ] } ] } ] - } - ] - } }, "timetable_summary": "Asset" diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_multiple_assets_dag__af3.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_multiple_assets_dag__af3.json index 40ae29b228e1b..a038561e9671d 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_multiple_assets_dag__af3.json +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_multiple_assets_dag__af3.json @@ -45,7 +45,9 @@ "uri": "s3://bucket4/file.txt", "name": "s3://bucket4/file.txt", "group": "asset", - "extra": {"b": 2} + "extra": { + "b": 2 + } } ] } @@ -107,7 +109,9 @@ "uri": "s3://bucket4/file.txt", "name": "s3://bucket4/file.txt", "group": "asset", - "extra": {"b": 2} + "extra": { + "b": 2 + } } ] } @@ -169,7 +173,9 @@ "uri": "s3://bucket4/file.txt", "name": "s3://bucket4/file.txt", "group": "asset", - "extra": {"b": 2} + "extra": { + "b": 2 + } } ] } diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_multiple_cron_trigger_timetable_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_multiple_cron_trigger_timetable_dag.json new file mode 100644 index 0000000000000..c989e61330f06 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_multiple_cron_trigger_timetable_dag.json @@ -0,0 +1,74 @@ +[ + { + "eventType": "START", + "run": { + "facets": { + "airflowDagRun": { + "dag": { + "timetable": { + "expressions": [ + "21 13 29 2 4", + "9 27 29 2 4" + ], + "timezone": "UTC", + "interval": 0.0, + "run_immediately": false + }, + "timetable_summary": "21 13 29 2 4, 9 27 29 2 4" + } + } + } + }, + "job": { + "name": "openlineage_schedule_multiple_cron_trigger_timetable_dag" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "expressions": [ + "21 13 29 2 4", + "9 27 29 2 4" + ], + "timezone": "UTC", + "interval": 0.0, + "run_immediately": false + }, + "timetable_summary": "21 13 29 2 4, 9 27 29 2 4" + } + } + } + }, + "job": { + "name": "openlineage_schedule_multiple_cron_trigger_timetable_dag.do_nothing_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "expressions": [ + "21 13 29 2 4", + "9 27 29 2 4" + ], + "timezone": "UTC", + "interval": 0.0, + "run_immediately": false + }, + "timetable_summary": "21 13 29 2 4, 9 27 29 2 4" + } + } + } + }, + "job": { + "name": "openlineage_schedule_multiple_cron_trigger_timetable_dag.do_nothing_task" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_timedelta_dag__af2.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_timedelta_dag__af2.json new file mode 100644 index 0000000000000..b745e1ea14c2c --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_timedelta_dag__af2.json @@ -0,0 +1,56 @@ +[ + { + "eventType": "START", + "run": { + "facets": { + "airflowDagRun": { + "dag": { + "timetable": { + "delta": 85276800.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_timedelta_dag" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "delta": 85276800.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_timedelta_dag.do_nothing_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "delta": 85276800.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_timedelta_dag.do_nothing_task" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_timedelta_dag__af3.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_timedelta_dag__af3.json new file mode 100644 index 0000000000000..0c75db8130994 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_schedule_timedelta_dag__af3.json @@ -0,0 +1,59 @@ +[ + { + "eventType": "START", + "run": { + "facets": { + "airflowDagRun": { + "dag": { + "timetable": { + "delta": 85276800.0, + "interval": 0.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_timedelta_dag" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "delta": 85276800.0, + "interval": 0.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_timedelta_dag.do_nothing_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "dag": { + "timetable": { + "delta": 85276800.0, + "interval": 0.0 + }, + "timetable_summary": "987 days, 0:00:00" + } + } + } + }, + "job": { + "name": "openlineage_schedule_timedelta_dag.do_nothing_task" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_task_groups_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_task_groups_dag.json index c1b2124c6b715..93abd7b1cdba3 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_task_groups_dag.json +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_task_groups_dag.json @@ -45,7 +45,7 @@ "name": "openlineage_task_groups_dag.tg1.task_1" } }, - { + { "eventType": "START", "run": { "facets": { @@ -59,7 +59,7 @@ "tooltip": "", "upstream_group_ids": "['tg1']", "upstream_task_ids": "['tg1.task_1']" - } + } } } } diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_dag__af3_3_plus.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_dag__af3_3_plus.json new file mode 100644 index 0000000000000..ec2f37f73f535 --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_dag__af3_3_plus.json @@ -0,0 +1,428 @@ +[ + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "airflowDagRun": { + "dag": { + "dag_id": "openlineage_trigger_dag_child__notrigger", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_trigger_dag.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "tags": "{{ result[1:-1].split(', ') | sort == ['\"with\\'quote\"', \"'first'\", \"'second@'\", '\\'z\"e\\''] }}", + "timetable": {} + }, + "dagRun": { + "conf": { + "some_config": "value1", + "openlineage": { + "parentRunId": "3bb703d1-09c1-4a42-8da5-35a0b3216072", + "parentJobNamespace": "prod_biz", + "parentJobName": "get_files", + "rootParentRunId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e", + "rootParentJobNamespace": "prod_analytics", + "rootParentJobName": "generate_report_sales_e2e" + } + }, + "dag_id": "openlineage_trigger_dag_child__notrigger", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result.startswith('openlineage_trigger_dag_triggering_child') }}", + "run_type": "operator_triggered", + "start_date": "{{ is_datetime(result) }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "prod_biz", + "name": "get_files" + }, + "run": { + "runId": "3bb703d1-09c1-4a42-8da5-35a0b3216072" + }, + "root": { + "job": { + "name": "generate_report_sales_e2e", + "namespace": "prod_analytics" + }, + "run": { + "runId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_child__notrigger", + "facets": { + "documentation": { + "description": "MD DAG doc", + "contentType": "text/markdown", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" + }, + "jobType": { + "integration": "AIRFLOW", + "jobType": "DAG", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "tags": { + "tags": [ + { + "key": "first", + "value": "first", + "source": "AIRFLOW" + }, + { + "key": "second@", + "value": "second@", + "source": "AIRFLOW" + }, + { + "key": "with'quote", + "value": "with'quote", + "source": "AIRFLOW" + }, + { + "key": "z\"e", + "value": "z\"e", + "source": "AIRFLOW" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsJobFacet.json\\#\\/\\$defs\\/TagsJobFacet\") }}" + }, + "airflow": { + "taskGroups": {}, + "taskTree": {}, + "tasks": { + "do_nothing_task": { + "downstream_task_ids": [], + "emits_ol_events": "{{ result == true }}", + "is_setup": false, + "is_teardown": false, + "operator": "airflow.providers.standard.operators.bash.BashOperator", + "ui_label": "do_nothing_task" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/JobFacet\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "airflowState": { + "dagRunState": "success", + "tasksState": { + "do_nothing_task": "success" + }, + "tasksDuration": { + "do_nothing_task": "{{ result is number }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "airflowDagRun": { + "dag": { + "dag_id": "openlineage_trigger_dag_child__notrigger", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_trigger_dag.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "tags": "{{ result[1:-1].split(', ') | sort == ['\"with\\'quote\"', \"'first'\", \"'second@'\", '\\'z\"e\\''] }}", + "timetable": {} + }, + "dagRun": { + "conf": { + "some_config": "value1", + "openlineage": { + "parentRunId": "3bb703d1-09c1-4a42-8da5-35a0b3216072", + "parentJobNamespace": "prod_biz", + "parentJobName": "get_files", + "rootParentRunId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e", + "rootParentJobNamespace": "prod_analytics", + "rootParentJobName": "generate_report_sales_e2e" + } + }, + "dag_id": "openlineage_trigger_dag_child__notrigger", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result.startswith('openlineage_trigger_dag_triggering_child') }}", + "run_type": "operator_triggered", + "start_date": "{{ is_datetime(result) }}", + "duration": "{{ result is number }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "prod_biz", + "name": "get_files" + }, + "run": { + "runId": "3bb703d1-09c1-4a42-8da5-35a0b3216072" + }, + "root": { + "job": { + "name": "generate_report_sales_e2e", + "namespace": "prod_analytics" + }, + "run": { + "runId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_child__notrigger", + "facets": { + "documentation": { + "description": "MD DAG doc", + "contentType": "text/markdown", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "tags": { + "tags": [ + { + "key": "first", + "value": "first", + "source": "AIRFLOW" + }, + { + "key": "second@", + "value": "second@", + "source": "AIRFLOW" + }, + { + "key": "with'quote", + "value": "with'quote", + "source": "AIRFLOW" + }, + { + "key": "z\"e", + "value": "z\"e", + "source": "AIRFLOW" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsJobFacet.json\\#\\/\\$defs\\/TagsJobFacet\") }}" + }, + "jobType": { + "integration": "AIRFLOW", + "jobType": "DAG", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + } + } + } + }, + { + "eventType": "START", + "run": { + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_child__notrigger" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "generate_report_sales_e2e", + "namespace": "prod_analytics" + }, + "run": { + "runId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag_child__notrigger.do_nothing_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_child__notrigger" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "generate_report_sales_e2e", + "namespace": "prod_analytics" + }, + "run": { + "runId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag_child__notrigger.do_nothing_task" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "task": { + "trigger_dag_id": "openlineage_trigger_dag_child__notrigger", + "trigger_run_id": "{{ result.startswith('openlineage_trigger_dag_triggering_child_202') }}" + } + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag.trigger_dagrun" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "task": { + "trigger_dag_id": "openlineage_trigger_dag_child__notrigger", + "trigger_run_id": "{{ result.startswith('openlineage_trigger_dag_triggering_child_202') }}" + } + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag.trigger_dagrun" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_dag_deferrable__af3_3_plus.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_dag_deferrable__af3_3_plus.json new file mode 100644 index 0000000000000..506da9437521e --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_dag_deferrable__af3_3_plus.json @@ -0,0 +1,660 @@ +[ + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "airflowDagRun": { + "dag": { + "dag_id": "openlineage_trigger_dag_deferrable_child__notrigger", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_trigger_dag_deferrable.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "timetable": {} + }, + "dagRun": { + "conf": { + "some_config": "value1", + "openlineage": { + "parentRunId": "{{ is_uuid(result) }}", + "parentJobNamespace": "{{ result is string }}", + "parentJobName": "openlineage_trigger_dag_deferrable.trigger_dagrun", + "rootParentRunId": "{{ is_uuid(result) }}", + "rootParentJobNamespace": "{{ result is string }}", + "rootParentJobName": "openlineage_trigger_dag_deferrable" + } + }, + "dag_id": "openlineage_trigger_dag_deferrable_child__notrigger", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result.startswith('manual__202') }}", + "run_type": "operator_triggered", + "start_date": "{{ is_datetime(result) }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable.trigger_dagrun" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child__notrigger", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "DAG", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "airflow": { + "taskGroups": {}, + "taskTree": {}, + "tasks": { + "trigger_dagrun2": { + "downstream_task_ids": [], + "emits_ol_events": "{{ result == true }}", + "is_setup": false, + "is_teardown": false, + "operator": "airflow.providers.standard.operators.trigger_dagrun.TriggerDagRunOperator", + "ui_label": "trigger_dagrun2" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/JobFacet\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "airflowState": { + "dagRunState": "success", + "tasksState": { + "trigger_dagrun2": "success" + }, + "tasksDuration": { + "trigger_dagrun2": "{{ result is number }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "airflowDagRun": { + "dag": { + "dag_id": "openlineage_trigger_dag_deferrable_child__notrigger", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_trigger_dag_deferrable.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "timetable": {} + }, + "dagRun": { + "conf": { + "some_config": "value1", + "openlineage": { + "parentRunId": "{{ is_uuid(result) }}", + "parentJobNamespace": "{{ result is string }}", + "parentJobName": "openlineage_trigger_dag_deferrable.trigger_dagrun", + "rootParentRunId": "{{ is_uuid(result) }}", + "rootParentJobNamespace": "{{ result is string }}", + "rootParentJobName": "openlineage_trigger_dag_deferrable" + } + }, + "dag_id": "openlineage_trigger_dag_deferrable_child__notrigger", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result.startswith('manual__202') }}", + "run_type": "operator_triggered", + "start_date": "{{ is_datetime(result) }}", + "duration": "{{ result is number }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable.trigger_dagrun" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child__notrigger", + "facets": { + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "jobType": { + "integration": "AIRFLOW", + "jobType": "DAG", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + } + } + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "task": { + "trigger_dag_id": "openlineage_trigger_dag_deferrable_child2__notrigger" + } + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child__notrigger" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag_deferrable_child__notrigger.trigger_dagrun2" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "task": { + "trigger_dag_id": "openlineage_trigger_dag_deferrable_child2__notrigger", + "trigger_run_id": "{{ result.startswith('manual__202') }}" + } + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child__notrigger" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag_deferrable_child__notrigger.trigger_dagrun2" + } + }, + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "airflowDagRun": { + "dag": { + "dag_id": "openlineage_trigger_dag_deferrable_child2__notrigger", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_trigger_dag_deferrable.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "timetable": {} + }, + "dagRun": { + "conf": { + "openlineage": { + "parentRunId": "{{ is_uuid(result) }}", + "parentJobNamespace": "{{ result is string }}", + "parentJobName": "openlineage_trigger_dag_deferrable_child__notrigger.trigger_dagrun2", + "rootParentRunId": "{{ is_uuid(result) }}", + "rootParentJobNamespace": "{{ result is string }}", + "rootParentJobName": "openlineage_trigger_dag_deferrable" + } + }, + "dag_id": "openlineage_trigger_dag_deferrable_child2__notrigger", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result.startswith('manual__202') }}", + "run_type": "operator_triggered", + "start_date": "{{ is_datetime(result) }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child__notrigger.trigger_dagrun2" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child2__notrigger", + "facets": { + "jobType": { + "integration": "AIRFLOW", + "jobType": "DAG", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "airflow": { + "taskGroups": {}, + "taskTree": {}, + "tasks": { + "do_nothing_task": { + "downstream_task_ids": [], + "emits_ol_events": "{{ result == true }}", + "is_setup": false, + "is_teardown": false, + "operator": "airflow.providers.standard.operators.bash.BashOperator", + "ui_label": "do_nothing_task" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/JobFacet\") }}" + } + } + } + }, + { + "eventType": "COMPLETE", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "airflowState": { + "dagRunState": "success", + "tasksState": { + "do_nothing_task": "success" + }, + "tasksDuration": { + "do_nothing_task": "{{ result is number }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "airflowDagRun": { + "dag": { + "dag_id": "openlineage_trigger_dag_deferrable_child2__notrigger", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_trigger_dag_deferrable.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "timetable": {} + }, + "dagRun": { + "conf": { + "openlineage": { + "parentRunId": "{{ is_uuid(result) }}", + "parentJobNamespace": "{{ result is string }}", + "parentJobName": "openlineage_trigger_dag_deferrable_child__notrigger.trigger_dagrun2", + "rootParentRunId": "{{ is_uuid(result) }}", + "rootParentJobNamespace": "{{ result is string }}", + "rootParentJobName": "openlineage_trigger_dag_deferrable" + } + }, + "dag_id": "openlineage_trigger_dag_deferrable_child2__notrigger", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result.startswith('manual__202') }}", + "run_type": "operator_triggered", + "start_date": "{{ is_datetime(result) }}", + "duration": "{{ result is number }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child__notrigger.trigger_dagrun2" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child2__notrigger", + "facets": { + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "jobType": { + "integration": "AIRFLOW", + "jobType": "DAG", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + } + } + } + }, + { + "eventType": "START", + "run": { + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child2__notrigger" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag_deferrable_child2__notrigger.do_nothing_task" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable_child2__notrigger" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag_deferrable_child2__notrigger.do_nothing_task" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "task": { + "deferrable": "{{ result == true }}", + "trigger_dag_id": "openlineage_trigger_dag_deferrable_child__notrigger" + } + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag_deferrable.trigger_dagrun" + } + }, + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "task": { + "deferrable": "{{ result == true }}", + "trigger_dag_id": "openlineage_trigger_dag_deferrable_child__notrigger", + "trigger_run_id": "{{ result.startswith('manual__') }}" + } + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_dag_deferrable" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_dag_deferrable", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_dag_deferrable.trigger_dagrun" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_failed_dag.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_failed_dag.json new file mode 100644 index 0000000000000..77d671db9312a --- /dev/null +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_trigger_failed_dag.json @@ -0,0 +1,441 @@ +[ + { + "eventType": "START", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "airflowDagRun": { + "dag": { + "dag_id": "openlineage_trigger_failed_dag_child__notrigger", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_trigger_failed_dag.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "tags": "{{ result[1:-1].split(', ') | sort == ['\"with\\'quote\"', \"'first'\", \"'second@'\", '\\'z\"e\\''] }}", + "timetable": {} + }, + "dagRun": { + "conf": { + "some_config": "value1", + "openlineage": { + "parentRunId": "3bb703d1-09c1-4a42-8da5-35a0b3216072", + "parentJobNamespace": "prod_biz", + "parentJobName": "get_files", + "rootParentRunId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e", + "rootParentJobNamespace": "prod_analytics", + "rootParentJobName": "generate_report_sales_e2e" + } + }, + "dag_id": "openlineage_trigger_failed_dag_child__notrigger", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result.startswith('openlineage_trigger_failed_dag_triggering_child') }}", + "run_type": "manual", + "start_date": "{{ is_datetime(result) }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "prod_biz", + "name": "get_files" + }, + "run": { + "runId": "3bb703d1-09c1-4a42-8da5-35a0b3216072" + }, + "root": { + "job": { + "name": "generate_report_sales_e2e", + "namespace": "prod_analytics" + }, + "run": { + "runId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_failed_dag_child__notrigger", + "facets": { + "documentation": { + "description": "MD DAG doc", + "contentType": "text/markdown", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" + }, + "jobType": { + "integration": "AIRFLOW", + "jobType": "DAG", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "tags": { + "tags": [ + { + "key": "first", + "value": "first", + "source": "AIRFLOW" + }, + { + "key": "second@", + "value": "second@", + "source": "AIRFLOW" + }, + { + "key": "with'quote", + "value": "with'quote", + "source": "AIRFLOW" + }, + { + "key": "z\"e", + "value": "z\"e", + "source": "AIRFLOW" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsJobFacet.json\\#\\/\\$defs\\/TagsJobFacet\") }}" + }, + "airflow": { + "taskGroups": {}, + "taskTree": {}, + "tasks": { + "failing_task": { + "downstream_task_ids": [], + "emits_ol_events": "{{ result == true }}", + "is_setup": false, + "is_teardown": false, + "operator": "airflow.providers.standard.operators.bash.BashOperator", + "ui_label": "failing_task" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/JobFacet\") }}" + } + } + } + }, + { + "eventType": "FAIL", + "eventTime": "{{ is_datetime(result) }}", + "producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunEvent$\") }}", + "inputs": [], + "outputs": [], + "run": { + "runId": "{{ is_uuid(result) }}", + "facets": { + "errorMessage": { + "message": "task_failure", + "programmingLanguage": "python", + "stackTrace": null, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ErrorMessageRunFacet.json\\#\\/\\$defs\\/ErrorMessageRunFacet\") }}" + }, + "airflowState": { + "dagRunState": "failed", + "tasksState": { + "failing_task": "failed" + }, + "tasksDuration": { + "failing_task": "{{ result is number }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "airflowDagRun": { + "dag": { + "dag_id": "openlineage_trigger_failed_dag_child__notrigger", + "fileloc": "{{ result.endswith('openlineage/example_openlineage_trigger_failed_dag.py') }}", + "owner": "airflow", + "owner_links": {}, + "start_date": "{{ is_datetime(result) }}", + "tags": "{{ result[1:-1].split(', ') | sort == ['\"with\\'quote\"', \"'first'\", \"'second@'\", '\\'z\"e\\''] }}", + "timetable": {} + }, + "dagRun": { + "conf": { + "some_config": "value1", + "openlineage": { + "parentRunId": "3bb703d1-09c1-4a42-8da5-35a0b3216072", + "parentJobNamespace": "prod_biz", + "parentJobName": "get_files", + "rootParentRunId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e", + "rootParentJobNamespace": "prod_analytics", + "rootParentJobName": "generate_report_sales_e2e" + } + }, + "dag_id": "openlineage_trigger_failed_dag_child__notrigger", + "data_interval_end": "{{ is_datetime(result) }}", + "data_interval_start": "{{ is_datetime(result) }}", + "logical_date": "{{ is_datetime(result) }}", + "run_id": "{{ result.startswith('openlineage_trigger_failed_dag_triggering_child') }}", + "run_type": "manual", + "start_date": "{{ is_datetime(result) }}", + "duration": "{{ result is number }}" + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/[\\d-]+\\/OpenLineage.json\\#\\/\\$defs\\/RunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "prod_biz", + "name": "get_files" + }, + "run": { + "runId": "3bb703d1-09c1-4a42-8da5-35a0b3216072" + }, + "root": { + "job": { + "name": "generate_report_sales_e2e", + "namespace": "prod_analytics" + }, + "run": { + "runId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + }, + "nominalTime": { + "nominalEndTime": "{{ is_datetime(result) }}", + "nominalStartTime": "{{ is_datetime(result) }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/NominalTimeRunFacet.json\\#\\/\\$defs\\/NominalTimeRunFacet$\") }}" + }, + "processing_engine": { + "name": "Airflow", + "openlineageAdapterVersion": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "version": "{{ regex_match(result, \"^[\\d]+\\.[\\d]+\\.[\\d]+.*\") }}", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ProcessingEngineRunFacet.json\\#\\/\\$defs\\/ProcessingEngineRunFacet$\") }}" + } + } + }, + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_failed_dag_child__notrigger", + "facets": { + "documentation": { + "description": "MD DAG doc", + "contentType": "text/markdown", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/DocumentationJobFacet.json\\#\\/\\$defs\\/DocumentationJobFacet\") }}" + }, + "ownership": { + "owners": [ + { + "name": "airflow" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/OwnershipJobFacet.json\\#\\/\\$defs\\/OwnershipJobFacet\") }}" + }, + "tags": { + "tags": [ + { + "key": "first", + "value": "first", + "source": "AIRFLOW" + }, + { + "key": "second@", + "value": "second@", + "source": "AIRFLOW" + }, + { + "key": "with'quote", + "value": "with'quote", + "source": "AIRFLOW" + }, + { + "key": "z\"e", + "value": "z\"e", + "source": "AIRFLOW" + } + ], + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/TagsJobFacet.json\\#\\/\\$defs\\/TagsJobFacet\") }}" + }, + "jobType": { + "integration": "AIRFLOW", + "jobType": "DAG", + "processingType": "BATCH", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/JobTypeJobFacet.json\\#\\/\\$defs\\/JobTypeJobFacet\") }}" + } + } + } + }, + { + "eventType": "START", + "run": { + "facets": { + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_failed_dag_child__notrigger" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "generate_report_sales_e2e", + "namespace": "prod_analytics" + }, + "run": { + "runId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_failed_dag_child__notrigger.failing_task" + } + }, + { + "eventType": "FAIL", + "run": { + "facets": { + "errorMessage": { + "message": "Bash command failed. The command returned a non-zero exit code 1.", + "programmingLanguage": "python", + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ErrorMessageRunFacet.json\\#\\/\\$defs\\/ErrorMessageRunFacet\") }}" + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_failed_dag_child__notrigger" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "generate_report_sales_e2e", + "namespace": "prod_analytics" + }, + "run": { + "runId": "9d3b14f7-de91-40b6-aeef-e887e2c7673e" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_failed_dag_child__notrigger.failing_task" + } + }, + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "task": { + "trigger_dag_id": "openlineage_trigger_failed_dag_child__notrigger", + "trigger_run_id": "{{ result.startswith('openlineage_trigger_failed_dag_triggering_child_202') }}" + } + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_failed_dag" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_failed_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_failed_dag.trigger_dagrun" + } + }, + { + "eventType": "FAIL", + "run": { + "facets": { + "airflow": { + "task": { + "trigger_dag_id": "openlineage_trigger_failed_dag_child__notrigger", + "trigger_run_id": "{{ result.startswith('openlineage_trigger_failed_dag_triggering_child_202') }}" + } + }, + "parent": { + "job": { + "namespace": "{{ result is string }}", + "name": "openlineage_trigger_failed_dag" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + }, + "root": { + "job": { + "name": "openlineage_trigger_failed_dag", + "namespace": "{{ result is string }}" + }, + "run": { + "runId": "{{ is_uuid(result) }}" + } + }, + "_producer": "{{ regex_match(result, \"^https:\\/\\/github.com/apache/airflow/tree/providers-openlineage\\/[\\d]+\\.[\\d]+\\.[\\d]+.*$\") }}", + "_schemaURL": "{{ regex_match(result, \"^https:\\/\\/openlineage.io\\/spec\\/facets\\/[\\d-]+\\/ParentRunFacet.json\\#\\/\\$defs\\/ParentRunFacet$\") }}" + } + } + }, + "job": { + "name": "openlineage_trigger_failed_dag.trigger_dagrun" + } + } +] diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_versioned_dag__af3.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_versioned_dag__af3.json index 09ac861c065be..f472d5e6c8705 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_versioned_dag__af3.json +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_versioned_dag__af3.json @@ -1,53 +1,53 @@ [ - { - "eventType": "START", - "run": { - "facets": { - "airflowDagRun": { - "dagRun": { - "dag_id": "openlineage_versioned_dag", - "dag_bundle_name": "{{ result is string }}", - "dag_bundle_version": "{{ result is string }}", - "dag_version_id": "{{ is_uuid(result) }}", - "dag_version_number": "{{ result is number }}" - } + { + "eventType": "START", + "run": { + "facets": { + "airflowDagRun": { + "dagRun": { + "dag_id": "openlineage_versioned_dag", + "dag_bundle_name": "{{ result is string }}", + "dag_bundle_version": "{{ result is string }}", + "dag_version_id": "{{ is_uuid(result) }}", + "dag_version_number": "{{ result is number }}" + } + } + } + }, + "job": { + "name": "openlineage_versioned_dag" } - } }, - "job": { - "name": "openlineage_versioned_dag" - } - }, - { - "eventType": "START", - "run": { - "facets": { - "airflow": { - "taskInstance": { - "dag_bundle_name": "{{ result is string }}", - "dag_bundle_version": "{{ result is string }}" - } + { + "eventType": "START", + "run": { + "facets": { + "airflow": { + "taskInstance": { + "dag_bundle_name": "{{ result is string }}", + "dag_bundle_version": "{{ result is string }}" + } + } + } + }, + "job": { + "name": "openlineage_versioned_dag.do_nothing_task" } - } }, - "job": { - "name": "openlineage_versioned_dag.do_nothing_task" - } - }, - { - "eventType": "COMPLETE", - "run": { - "facets": { - "airflow": { - "taskInstance": { - "dag_bundle_name": "{{ result is string }}", - "dag_bundle_version": "{{ result is string }}" - } + { + "eventType": "COMPLETE", + "run": { + "facets": { + "airflow": { + "taskInstance": { + "dag_bundle_name": "{{ result is string }}", + "dag_bundle_version": "{{ result is string }}" + } + } + } + }, + "job": { + "name": "openlineage_versioned_dag.do_nothing_task" } - } - }, - "job": { - "name": "openlineage_versioned_dag.do_nothing_task" } - } ] diff --git a/providers/openlineage/tests/system/openlineage/operator.py b/providers/openlineage/tests/system/openlineage/operator.py index c0716b6c8f37e..a122a38f83dc9 100644 --- a/providers/openlineage/tests/system/openlineage/operator.py +++ b/providers/openlineage/tests/system/openlineage/operator.py @@ -47,6 +47,15 @@ def any(result: Any) -> Any: return result +def is_numeric(result: Any) -> str: + try: + float(result) + return "true" + except Exception: + pass + return "false" + + def is_datetime(result: Any) -> str: try: parse(result) @@ -107,6 +116,7 @@ def setup_jinja() -> Environment: env = Environment() env.globals["any"] = any env.globals["is_datetime"] = is_datetime + env.globals["is_numeric"] = is_numeric env.globals["is_uuid"] = is_uuid env.globals["regex_match"] = regex_match env.globals["env_var"] = env_var @@ -116,7 +126,7 @@ def setup_jinja() -> Environment: return env -def match(expected, result, env: Environment) -> bool: +def match(expected, result, env: Environment, path: list | None = None) -> bool: """ Check if result is "equal" to expected value. @@ -137,16 +147,26 @@ def match(expected, result, env: Environment) -> bool: ``"$optional"``. """ + if path is None: + path = [] + path_str = " > ".join(str(p) for p in path) if path else "" + if isinstance(expected, dict): # Only keys present in expected are checked — extra keys in the actual event are ignored. if not isinstance(result, dict): - log.error("Not a dict: %s\nExpected %s", result, expected) + log.error("Path `%s`: expected a dict but got `%s` (%s)", path_str, result, type(result).__name__) return False for k, v in expected.items(): # null sentinel: assert the key must NOT appear in the actual event. if v is None: if k in result: - log.error("Key %s should not be present in received event %s", k, result) + log.error( + "Path `%s`: key `%s` must be absent but got `%s` (%s)", + path_str, + k, + result[k], + type(result[k]).__name__, + ) return False continue @@ -169,32 +189,18 @@ def match(expected, result, env: Environment) -> bool: # At this point v is a plain template (dict, list, or scalar) with no sentinels. if k not in result: - log.error("Key %s not in received event %s\nExpected %s", k, result, expected) + log.error("Path `%s`: key `%s` not found in received event", path_str, k) return False - if not match(v, result[k], env): - log.error( - "For key %s, expected value %s not equals received %s\n\nExpected: %s,\n\n request: %s", - k, - v, - result[k], - expected, - result, - ) + if not match(v, result[k], env, path + [k]): return False elif isinstance(expected, list): # Lists must match exactly in length and order. Each element is compared recursively, # so nested sentinels (null, $optional) work inside list items too. if len(expected) != len(result): - log.error("Length does not match: expected %d, result: %d", len(expected), len(result)) + log.error("Path `%s`: expected %d item(s) but got %d", path_str, len(expected), len(result)) return False for i, x in enumerate(expected): - if not match(x, result[i], env): - log.error( - "List not matched at %d\nexpected:\n%s\nresult: \n%s", - i, - json.dumps(x), - json.dumps(result[i]), - ) + if not match(x, result[i], env, path + [i]): return False elif isinstance(expected, str): if "{{" in expected: @@ -205,19 +211,39 @@ def match(expected, result, env: Environment) -> bool: try: rendered = env.from_string(expected).render(result=result) except ValueError as e: - log.error("Error rendering jinja template %s: %s", expected, e) + log.error("Path `%s`: failed to render template `%s`: %s", path_str, expected, e) return False if str(rendered).lower() == "true" or rendered == result: return True - log.error("Rendered value %s does not equal 'true' or %s", rendered, result) + log.error( + "Path `%s`: template `%s` rendered to `%s` but expected `true` or actual value `%s` (%s)", + path_str, + expected, + rendered, + result, + type(result).__name__, + ) return False # Plain string: exact equality check. if expected != result: - log.error("Expected value %s does not equal result %s", expected, result) + log.error( + "Path `%s`: expected `%s` (str) but got `%s` (%s)", + path_str, + expected, + result, + type(result).__name__, + ) return False elif expected != result: # Scalar (int, bool, float, …): exact equality check. - log.error("Object of type %s: %s does not match %s", type(expected), expected, result) + log.error( + "Path `%s`: expected `%s` (%s) but got `%s` (%s)", + path_str, + expected, + type(expected).__name__, + result, + type(result).__name__, + ) return False return True