From fa73dff39f33b4c568c4438cb0709735add1c4d5 Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Fri, 29 Sep 2023 12:42:21 +0200 Subject: [PATCH 01/11] Add support for ZoneInfo and generic UTC Certain providers rely on other datetime implementations and fail to serialize. --- airflow/serialization/serializers/timezone.py | 49 ++++++++++++++++--- setup.py | 4 ++ .../serializers/test_serializers.py | 19 ++++++- 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index b55b51610b41b..73b8364037c1b 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -17,8 +17,11 @@ # under the License. from __future__ import annotations -from typing import TYPE_CHECKING +import datetime +from typing import TYPE_CHECKING, cast + +from airflow import PY39 from airflow.utils.module_loading import qualname if TYPE_CHECKING: @@ -27,7 +30,12 @@ from airflow.serialization.serde import U -serializers = ["pendulum.tz.timezone.FixedTimezone", "pendulum.tz.timezone.Timezone"] +serializers = [ + "pendulum.tz.timezone.FixedTimezone", + "pendulum.tz.timezone.Timezone", + "backports.zoneinfo.ZoneInfo", + "zoneinfo.ZoneInfo", +] deserializers = serializers __version__ = 1 @@ -43,21 +51,26 @@ def serialize(o: object) -> tuple[U, str, int, bool]: 0 without the special case), but passing 0 into ``pendulum.timezone`` does not give us UTC (but ``+00:00``). """ - from pendulum.tz.timezone import FixedTimezone, Timezone + from pendulum.tz.timezone import FixedTimezone name = qualname(o) + if isinstance(o, FixedTimezone): if o.offset == 0: return "UTC", name, __version__, True return o.offset, name, __version__, True - if isinstance(o, Timezone): - return o.name, name, __version__, True + tz_name = _get_tzinfo_name(cast(datetime.tzinfo, o)) + if tz_name is not None: + return tz_name, name, __version__, True + + if cast(datetime.tzinfo, o).utcoffset(None) == datetime.timedelta(0): + return "UTC", qualname(FixedTimezone), __version__, True return "", "", 0, False -def deserialize(classname: str, version: int, data: object) -> Timezone: +def deserialize(classname: str, version: int, data: object) -> Timezone | "ZoneInfo": from pendulum.tz import fixed_timezone, timezone if not isinstance(data, (str, int)): @@ -69,4 +82,28 @@ def deserialize(classname: str, version: int, data: object) -> Timezone: if isinstance(data, int): return fixed_timezone(data) + if "zoneinfo.ZoneInfo" in classname: # capturing backports and stdlib + if PY39: + from zoneinfo import ZoneInfo + else: + from backports.zoneinfo import ZoneInfo + return ZoneInfo(data) + return timezone(data) + + +def _get_tzinfo_name(tzinfo: datetime.tzinfo | None) -> str | None: + if tzinfo is None: + return None + + if hasattr(tzinfo, "key"): + # zoneinfo timezone + return tzinfo.key + elif hasattr(tzinfo, "name"): + # Pendulum timezone + return tzinfo.name + elif hasattr(tzinfo, "zone"): + # pytz timezone + return tzinfo.zone # type: ignore[no-any-return] + + return None diff --git a/setup.py b/setup.py index d5b8c333d0c93..6c8fde34b3337 100644 --- a/setup.py +++ b/setup.py @@ -270,6 +270,9 @@ def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_ve atlas = [ "atlasclient>=0.1.2", ] +backports = [ + "backports.zoneinfo>=0.2.1;python_version<'3.9'", +] celery = [ # The Celery is known to introduce problems when upgraded to a MAJOR version. Airflow Core # Uses Celery for CeleryExecutor, and we also know that Kubernetes Python client follows SemVer @@ -556,6 +559,7 @@ def get_unique_dependency_list(req_list_iterable: Iterable[list[str]]): CORE_EXTRAS_DEPENDENCIES: dict[str, list[str]] = { "aiobotocore": aiobotocore, "async": async_packages, + "backports": backports, "celery": celery, # TODO: remove and move to a regular provider package in a separate PR "cgroups": cgroups, "cncf.kubernetes": kubernetes, # TODO: remove and move to a regular provider package in a separate PR diff --git a/tests/serialization/serializers/test_serializers.py b/tests/serialization/serializers/test_serializers.py index 26e4ecea0eac1..01a619933b1c1 100644 --- a/tests/serialization/serializers/test_serializers.py +++ b/tests/serialization/serializers/test_serializers.py @@ -22,11 +22,18 @@ import numpy as np import pendulum.tz import pytest +from dateutil.tz import tzutc from pendulum import DateTime +from airflow import PY39 from airflow.models.param import Param, ParamsDict from airflow.serialization.serde import DATA, deserialize, serialize +if PY39: + from zoneinfo import ZoneInfo +else: + from backports.zoneinfo import ZoneInfo + class TestSerializers: def test_datetime(self): @@ -62,8 +69,18 @@ def test_datetime(self): d = deserialize(s) assert i.timestamp() == d.timestamp() - def test_deserialize_datetime_v1(self): + i = DateTime(2022, 7, 10, tzinfo=tzutc()) + s = serialize(i) + d = deserialize(s) + assert i.timestamp() == d.timestamp() + i = DateTime(2022, 7, 10, tzinfo=ZoneInfo("Europe/Paris")) + s = serialize(i) + d = deserialize(s) + assert i.timestamp() == d.timestamp() + + + def test_deserialize_datetime_v1(self): s = { "__classname__": "pendulum.datetime.DateTime", "__version__": 1, From 80ffdf63f783709dfc86c51ba6ea8fcffa06b1d6 Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Fri, 29 Sep 2023 19:45:33 +0200 Subject: [PATCH 02/11] Make backports part of core; Be forward and backward compatible --- airflow/serialization/serializers/timezone.py | 20 ++++++++++--------- setup.cfg | 1 + setup.py | 4 ---- .../serializers/test_serializers.py | 1 - tests/test_utils/db.py | 2 +- 5 files changed, 13 insertions(+), 15 deletions(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 73b8364037c1b..333c877fc4014 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -18,12 +18,18 @@ from __future__ import annotations import datetime - +import sys from typing import TYPE_CHECKING, cast -from airflow import PY39 from airflow.utils.module_loading import qualname +PY39 = sys.version_info >= (3, 9) + +if PY39: + from zoneinfo import ZoneInfo +else: + from backports.zoneinfo import ZoneInfo + if TYPE_CHECKING: from pendulum.tz.timezone import Timezone @@ -33,8 +39,7 @@ serializers = [ "pendulum.tz.timezone.FixedTimezone", "pendulum.tz.timezone.Timezone", - "backports.zoneinfo.ZoneInfo", - "zoneinfo.ZoneInfo", + qualname(ZoneInfo), ] deserializers = serializers @@ -70,7 +75,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]: return "", "", 0, False -def deserialize(classname: str, version: int, data: object) -> Timezone | "ZoneInfo": +def deserialize(classname: str, version: int, data: object) -> Timezone | ZoneInfo: from pendulum.tz import fixed_timezone, timezone if not isinstance(data, (str, int)): @@ -83,15 +88,12 @@ def deserialize(classname: str, version: int, data: object) -> Timezone | "ZoneI return fixed_timezone(data) if "zoneinfo.ZoneInfo" in classname: # capturing backports and stdlib - if PY39: - from zoneinfo import ZoneInfo - else: - from backports.zoneinfo import ZoneInfo return ZoneInfo(data) return timezone(data) +# ported from pendulum.tz.timezone._get_tzinfo_name def _get_tzinfo_name(tzinfo: datetime.tzinfo | None) -> str | None: if tzinfo is None: return None diff --git a/setup.cfg b/setup.cfg index 26c8cb2e0312a..93201cd3ac6b2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -72,6 +72,7 @@ install_requires = argcomplete>=1.10 asgiref attrs>=22.1.0 + backports.zoneinfo>=0.2.1;python_version<"3.9" blinker cattrs>=22.1.0 # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x diff --git a/setup.py b/setup.py index 6c8fde34b3337..d5b8c333d0c93 100644 --- a/setup.py +++ b/setup.py @@ -270,9 +270,6 @@ def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_ve atlas = [ "atlasclient>=0.1.2", ] -backports = [ - "backports.zoneinfo>=0.2.1;python_version<'3.9'", -] celery = [ # The Celery is known to introduce problems when upgraded to a MAJOR version. Airflow Core # Uses Celery for CeleryExecutor, and we also know that Kubernetes Python client follows SemVer @@ -559,7 +556,6 @@ def get_unique_dependency_list(req_list_iterable: Iterable[list[str]]): CORE_EXTRAS_DEPENDENCIES: dict[str, list[str]] = { "aiobotocore": aiobotocore, "async": async_packages, - "backports": backports, "celery": celery, # TODO: remove and move to a regular provider package in a separate PR "cgroups": cgroups, "cncf.kubernetes": kubernetes, # TODO: remove and move to a regular provider package in a separate PR diff --git a/tests/serialization/serializers/test_serializers.py b/tests/serialization/serializers/test_serializers.py index 01a619933b1c1..2c9c94e5c81cb 100644 --- a/tests/serialization/serializers/test_serializers.py +++ b/tests/serialization/serializers/test_serializers.py @@ -79,7 +79,6 @@ def test_datetime(self): d = deserialize(s) assert i.timestamp() == d.timestamp() - def test_deserialize_datetime_v1(self): s = { "__classname__": "pendulum.datetime.DateTime", diff --git a/tests/test_utils/db.py b/tests/test_utils/db.py index 43b6185eea7c8..bfcc6d90313d4 100644 --- a/tests/test_utils/db.py +++ b/tests/test_utils/db.py @@ -18,7 +18,7 @@ from __future__ import annotations from airflow.auth.managers.fab.models import Permission, Resource, assoc_permission_role -from airflow.jobs.job import Job +#from airflow.jobs.job import Job from airflow.models import ( Connection, DagModel, From 56f4a05ac9835ff1f321f446be6aa4d58bb12bdd Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Fri, 29 Sep 2023 19:56:16 +0200 Subject: [PATCH 03/11] Keep lazy --- airflow/serialization/serializers/timezone.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 333c877fc4014..9e2183c9012d4 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -25,11 +25,6 @@ PY39 = sys.version_info >= (3, 9) -if PY39: - from zoneinfo import ZoneInfo -else: - from backports.zoneinfo import ZoneInfo - if TYPE_CHECKING: from pendulum.tz.timezone import Timezone @@ -39,8 +34,13 @@ serializers = [ "pendulum.tz.timezone.FixedTimezone", "pendulum.tz.timezone.Timezone", - qualname(ZoneInfo), ] + +if PY39: + serializers.append("zoneinfo.ZoneInfo") +else: + serializers.append("backports.zoneinfo.ZoneInfo") + deserializers = serializers __version__ = 1 @@ -75,7 +75,9 @@ def serialize(o: object) -> tuple[U, str, int, bool]: return "", "", 0, False -def deserialize(classname: str, version: int, data: object) -> Timezone | ZoneInfo: +def deserialize( + classname: str, version: int, data: object +) -> Timezone | "zoneinfo.ZoneInfo" | "backports.zoneinfo.ZoneInfo": from pendulum.tz import fixed_timezone, timezone if not isinstance(data, (str, int)): @@ -88,6 +90,11 @@ def deserialize(classname: str, version: int, data: object) -> Timezone | ZoneIn return fixed_timezone(data) if "zoneinfo.ZoneInfo" in classname: # capturing backports and stdlib + if PY39: + from zoneinfo import ZoneInfo + else: + from backports.zoneinfo import ZoneInfo + return ZoneInfo(data) return timezone(data) From 0fe49b27c81b2b46cd79c8c873cfdc9e2647f729 Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Fri, 29 Sep 2023 22:44:52 +0200 Subject: [PATCH 04/11] Fix --- airflow/serialization/serializers/timezone.py | 11 +++-------- tests/test_utils/db.py | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 9e2183c9012d4..17b2ea011e391 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -38,8 +38,10 @@ if PY39: serializers.append("zoneinfo.ZoneInfo") + from zoneinfo import ZoneInfo else: serializers.append("backports.zoneinfo.ZoneInfo") + from backports.zoneinfo import ZoneInfo deserializers = serializers @@ -75,9 +77,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]: return "", "", 0, False -def deserialize( - classname: str, version: int, data: object -) -> Timezone | "zoneinfo.ZoneInfo" | "backports.zoneinfo.ZoneInfo": +def deserialize(classname: str, version: int, data: object) -> Timezone | ZoneInfo: from pendulum.tz import fixed_timezone, timezone if not isinstance(data, (str, int)): @@ -90,11 +90,6 @@ def deserialize( return fixed_timezone(data) if "zoneinfo.ZoneInfo" in classname: # capturing backports and stdlib - if PY39: - from zoneinfo import ZoneInfo - else: - from backports.zoneinfo import ZoneInfo - return ZoneInfo(data) return timezone(data) diff --git a/tests/test_utils/db.py b/tests/test_utils/db.py index bfcc6d90313d4..43b6185eea7c8 100644 --- a/tests/test_utils/db.py +++ b/tests/test_utils/db.py @@ -18,7 +18,7 @@ from __future__ import annotations from airflow.auth.managers.fab.models import Permission, Resource, assoc_permission_role -#from airflow.jobs.job import Job +from airflow.jobs.job import Job from airflow.models import ( Connection, DagModel, From beec76c2de4be8075b318b9e9ac99b91fa807f7b Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Thu, 5 Oct 2023 10:15:22 +0200 Subject: [PATCH 05/11] Always support backports --- airflow/serialization/serializers/timezone.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 17b2ea011e391..788052d9bee66 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -34,15 +34,10 @@ serializers = [ "pendulum.tz.timezone.FixedTimezone", "pendulum.tz.timezone.Timezone", + "zoneinfo.ZoneInfo", + "backports.zoneinfo.ZoneInfo", ] -if PY39: - serializers.append("zoneinfo.ZoneInfo") - from zoneinfo import ZoneInfo -else: - serializers.append("backports.zoneinfo.ZoneInfo") - from backports.zoneinfo import ZoneInfo - deserializers = serializers __version__ = 1 @@ -89,7 +84,17 @@ def deserialize(classname: str, version: int, data: object) -> Timezone | ZoneIn if isinstance(data, int): return fixed_timezone(data) - if "zoneinfo.ZoneInfo" in classname: # capturing backports and stdlib + if classname == "zoneinfo.ZoneInfo": + from zoneinfo import ZoneInfo + return ZoneInfo(data) + + if classname == "backports.zoneinfo.ZoneInfo": + # python version might have been upgraded, so we need to check + try: + from backports.zoneinfo import ZoneInfo + except ImportError: + from zoneinfo import ZoneInfo + return ZoneInfo(data) return timezone(data) From c035d29c73b85a4d22f192b0e5876c35d8e5ad2c Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Thu, 5 Oct 2023 10:18:05 +0200 Subject: [PATCH 06/11] Only import backports zoneinfo for tests --- setup.cfg | 1 - setup.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 93201cd3ac6b2..26c8cb2e0312a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -72,7 +72,6 @@ install_requires = argcomplete>=1.10 asgiref attrs>=22.1.0 - backports.zoneinfo>=0.2.1;python_version<"3.9" blinker cattrs>=22.1.0 # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x diff --git a/setup.py b/setup.py index d5b8c333d0c93..c69fffc7ae89f 100644 --- a/setup.py +++ b/setup.py @@ -462,6 +462,7 @@ def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_ve _devel_only_tests = [ "aioresponses", + "backports.zoneinfo>=0.2.1;python_version<'3.9'", "beautifulsoup4>=4.7.1", "coverage>=7.2", "pytest", From 3fc9917aff2362aa56dc8ee3f72bf5f3817a3743 Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Thu, 5 Oct 2023 10:20:58 +0200 Subject: [PATCH 07/11] Change typing --- airflow/serialization/serializers/timezone.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 788052d9bee66..2c68cc4f10189 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -19,7 +19,7 @@ import datetime import sys -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING, cast, Any from airflow.utils.module_loading import qualname @@ -72,7 +72,7 @@ def serialize(o: object) -> tuple[U, str, int, bool]: return "", "", 0, False -def deserialize(classname: str, version: int, data: object) -> Timezone | ZoneInfo: +def deserialize(classname: str, version: int, data: object) -> Any: from pendulum.tz import fixed_timezone, timezone if not isinstance(data, (str, int)): From 24553dc99326761008a6175a1d528e371ad42b3c Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Thu, 5 Oct 2023 10:36:21 +0200 Subject: [PATCH 08/11] Typing update --- airflow/serialization/serializers/timezone.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 2c68cc4f10189..86178333c662c 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -26,8 +26,6 @@ PY39 = sys.version_info >= (3, 9) if TYPE_CHECKING: - from pendulum.tz.timezone import Timezone - from airflow.serialization.serde import U From ffb09aa21ca9ae5eb833539d402ccd62e8722fec Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Thu, 5 Oct 2023 11:28:46 +0200 Subject: [PATCH 09/11] Adjust for linting --- airflow/serialization/serializers/timezone.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 86178333c662c..684290b11599c 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -19,7 +19,7 @@ import datetime import sys -from typing import TYPE_CHECKING, cast, Any +from typing import TYPE_CHECKING, Any, cast from airflow.utils.module_loading import qualname @@ -84,6 +84,7 @@ def deserialize(classname: str, version: int, data: object) -> Any: if classname == "zoneinfo.ZoneInfo": from zoneinfo import ZoneInfo + return ZoneInfo(data) if classname == "backports.zoneinfo.ZoneInfo": From 4b1e8af76c17b45e87628794f747a5f35f50465f Mon Sep 17 00:00:00 2001 From: Bolke de Bruin Date: Thu, 5 Oct 2023 11:29:39 +0200 Subject: [PATCH 10/11] Remove PY39 check --- airflow/serialization/serializers/timezone.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 684290b11599c..1d04685f92698 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -23,8 +23,6 @@ from airflow.utils.module_loading import qualname -PY39 = sys.version_info >= (3, 9) - if TYPE_CHECKING: from airflow.serialization.serde import U From 2915ae9e1f5e3a725e54c68da8e1f1d7032a0762 Mon Sep 17 00:00:00 2001 From: bolkedebruin Date: Fri, 6 Oct 2023 09:41:55 +0200 Subject: [PATCH 11/11] Update timezone.py --- airflow/serialization/serializers/timezone.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index 1d04685f92698..5d3b940cd78f4 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -18,7 +18,6 @@ from __future__ import annotations import datetime -import sys from typing import TYPE_CHECKING, Any, cast from airflow.utils.module_loading import qualname