From f69ad7b33cc0ea9956bd684dd98afc4fc633ca50 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Mon, 14 Feb 2022 14:55:33 +0100 Subject: [PATCH 01/10] Add support for zoneinfo timezones --- cpp/src/arrow/python/datetime.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cpp/src/arrow/python/datetime.cc b/cpp/src/arrow/python/datetime.cc index 8c954998f0e..310938af29e 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/cpp/src/arrow/python/datetime.cc @@ -427,17 +427,22 @@ Result StringToTzinfo(const std::string& tz) { Result TzinfoToString(PyObject* tzinfo) { OwnedRef module_pytz; // import pytz OwnedRef module_datetime; // import datetime + OwnedRef module_zoneinfo; // import zoneinfo OwnedRef class_timezone; // from datetime import timezone OwnedRef class_fixedoffset; // from pytz import _FixedOffset + OwnedRef class_zoneinfo; // from zoneinfo import ZoneInfo // import necessary modules RETURN_NOT_OK(internal::ImportModule("pytz", &module_pytz)); RETURN_NOT_OK(internal::ImportModule("datetime", &module_datetime)); + RETURN_NOT_OK(internal::ImportModule("zoneinfo", &module_zoneinfo)); // import necessary classes RETURN_NOT_OK( internal::ImportFromModule(module_pytz.obj(), "_FixedOffset", &class_fixedoffset)); RETURN_NOT_OK( internal::ImportFromModule(module_datetime.obj(), "timezone", &class_timezone)); + RETURN_NOT_OK( + internal::ImportFromModule(module_zoneinfo.obj(), "ZoneInfo", &class_zoneinfo)); // check that it's a valid tzinfo object if (!PyTZInfo_Check(tzinfo)) { @@ -470,6 +475,15 @@ Result TzinfoToString(PyObject* tzinfo) { return result; } + // try to look up key attribute + if (PyObject_HasAttrString(tzinfo, "key")) { + OwnedRef key(PyObject_GetAttrString(tzinfo, "key")); + RETURN_IF_PYERROR(); + std::string result; + RETURN_NOT_OK(internal::PyUnicode_AsStdString(key.obj(), &result)); + return result; + } + // attempt to call tzinfo.tzname(None) OwnedRef tzname_object(PyObject_CallMethod(tzinfo, "tzname", "O", Py_None)); RETURN_IF_PYERROR(); From 062a4ef527ad8ab5076186f43748b2438068e627 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Mon, 14 Feb 2022 15:39:56 +0100 Subject: [PATCH 02/10] Add support for dateutil timezones --- cpp/src/arrow/python/datetime.cc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/python/datetime.cc b/cpp/src/arrow/python/datetime.cc index 310938af29e..fc2884a7189 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/cpp/src/arrow/python/datetime.cc @@ -428,21 +428,20 @@ Result TzinfoToString(PyObject* tzinfo) { OwnedRef module_pytz; // import pytz OwnedRef module_datetime; // import datetime OwnedRef module_zoneinfo; // import zoneinfo + OwnedRef module_dateutil; // import dateutil OwnedRef class_timezone; // from datetime import timezone OwnedRef class_fixedoffset; // from pytz import _FixedOffset - OwnedRef class_zoneinfo; // from zoneinfo import ZoneInfo // import necessary modules RETURN_NOT_OK(internal::ImportModule("pytz", &module_pytz)); RETURN_NOT_OK(internal::ImportModule("datetime", &module_datetime)); RETURN_NOT_OK(internal::ImportModule("zoneinfo", &module_zoneinfo)); + RETURN_NOT_OK(internal::ImportModule("dateutil", &module_dateutil)); // import necessary classes RETURN_NOT_OK( internal::ImportFromModule(module_pytz.obj(), "_FixedOffset", &class_fixedoffset)); RETURN_NOT_OK( internal::ImportFromModule(module_datetime.obj(), "timezone", &class_timezone)); - RETURN_NOT_OK( - internal::ImportFromModule(module_zoneinfo.obj(), "ZoneInfo", &class_zoneinfo)); // check that it's a valid tzinfo object if (!PyTZInfo_Check(tzinfo)) { @@ -484,6 +483,17 @@ Result TzinfoToString(PyObject* tzinfo) { return result; } + // try to look up _filename attribute + if (PyObject_HasAttrString(tzinfo, "_filename")) { + OwnedRef _filename(PyObject_GetAttrString(tzinfo, "_filename")); + RETURN_IF_PYERROR(); + std::string result; + RETURN_NOT_OK(internal::PyUnicode_AsStdString(_filename.obj(), &result)); + std::size_t pos = result.find("zoneinfo/"); + if (pos > 0) {return result.substr (pos+9);} + return result; + } + // attempt to call tzinfo.tzname(None) OwnedRef tzname_object(PyObject_CallMethod(tzinfo, "tzname", "O", Py_None)); RETURN_IF_PYERROR(); From 39b06167b538b266eca6d1f8767042a15e9e203e Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Mon, 14 Feb 2022 18:16:09 +0100 Subject: [PATCH 03/10] Linter corrections --- cpp/src/arrow/python/datetime.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/python/datetime.cc b/cpp/src/arrow/python/datetime.cc index fc2884a7189..624f4fa8f7f 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/cpp/src/arrow/python/datetime.cc @@ -490,7 +490,9 @@ Result TzinfoToString(PyObject* tzinfo) { std::string result; RETURN_NOT_OK(internal::PyUnicode_AsStdString(_filename.obj(), &result)); std::size_t pos = result.find("zoneinfo/"); - if (pos > 0) {return result.substr (pos+9);} + if (pos > 0) { + return result.substr(pos + 9); + } return result; } From 3523b9899309e99c84205b1437ca449cd5b5a05c Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 15 Feb 2022 08:48:01 +0100 Subject: [PATCH 04/10] Add tests for zoneinfo anf dateutil --- python/pyarrow/tests/test_types.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 07715b985bd..3b2149899f2 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -24,6 +24,8 @@ import pickle import pytest import pytz +import zoneinfo +import dateutil.tz import hypothesis as h import hypothesis.strategies as st import hypothesis.extra.pytz as tzst @@ -297,7 +299,11 @@ def test_is_primitive(): (pytz.timezone('Etc/GMT-9'), 'Etc/GMT-9'), (pytz.FixedOffset(180), '+03:00'), (datetime.timezone.utc, 'UTC'), - (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30') + (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30'), + (zoneinfo.ZoneInfo('Europe/Paris'), 'Europe/Paris'), + (zoneinfo.ZoneInfo('UTC'), 'UTC'), + (dateutil.tz.gettz('Europe/Brussels'), 'Europe/Brussels'), + (dateutil.tz.UTC, 'UTC') ]) def test_tzinfo_to_string(tz, expected): assert pa.lib.tzinfo_to_string(tz) == expected From 06ba5e0ddf79ee54ff29f79242ccfb804577cd65 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 15 Feb 2022 11:13:55 +0100 Subject: [PATCH 05/10] Remove ImportModule for zoneinfo and dateutil --- cpp/src/arrow/python/datetime.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/arrow/python/datetime.cc b/cpp/src/arrow/python/datetime.cc index 624f4fa8f7f..00022132091 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/cpp/src/arrow/python/datetime.cc @@ -435,8 +435,6 @@ Result TzinfoToString(PyObject* tzinfo) { // import necessary modules RETURN_NOT_OK(internal::ImportModule("pytz", &module_pytz)); RETURN_NOT_OK(internal::ImportModule("datetime", &module_datetime)); - RETURN_NOT_OK(internal::ImportModule("zoneinfo", &module_zoneinfo)); - RETURN_NOT_OK(internal::ImportModule("dateutil", &module_dateutil)); // import necessary classes RETURN_NOT_OK( internal::ImportFromModule(module_pytz.obj(), "_FixedOffset", &class_fixedoffset)); From fba3fa0733fd4e3b0b5b17b680ec64890702c1b7 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 15 Feb 2022 11:29:49 +0100 Subject: [PATCH 06/10] Add comment about key and _filename --- cpp/src/arrow/python/datetime.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/arrow/python/datetime.cc b/cpp/src/arrow/python/datetime.cc index 00022132091..fd1b43c5ac4 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/cpp/src/arrow/python/datetime.cc @@ -473,6 +473,7 @@ Result TzinfoToString(PyObject* tzinfo) { } // try to look up key attribute + // in case of zoneinfo object if (PyObject_HasAttrString(tzinfo, "key")) { OwnedRef key(PyObject_GetAttrString(tzinfo, "key")); RETURN_IF_PYERROR(); @@ -482,11 +483,14 @@ Result TzinfoToString(PyObject* tzinfo) { } // try to look up _filename attribute + // in case of dateutil.tz object if (PyObject_HasAttrString(tzinfo, "_filename")) { OwnedRef _filename(PyObject_GetAttrString(tzinfo, "_filename")); RETURN_IF_PYERROR(); std::string result; RETURN_NOT_OK(internal::PyUnicode_AsStdString(_filename.obj(), &result)); + // _filename returns a full path in general ('/usr/share/zoneinfo/Europe/Paris') + // or POSIX name on Windows ('Europe/Paris') - we need a substring in first case std::size_t pos = result.find("zoneinfo/"); if (pos > 0) { return result.substr(pos + 9); From 2b0467dc4065760c4bc2614855af7b79df164282 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 15 Feb 2022 12:10:05 +0100 Subject: [PATCH 07/10] Split test - zoneinfo available only with Python 3.9 --- python/pyarrow/tests/test_types.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 3b2149899f2..6f30cb81a3b 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -24,7 +24,6 @@ import pickle import pytest import pytz -import zoneinfo import dateutil.tz import hypothesis as h import hypothesis.strategies as st @@ -300,8 +299,6 @@ def test_is_primitive(): (pytz.FixedOffset(180), '+03:00'), (datetime.timezone.utc, 'UTC'), (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30'), - (zoneinfo.ZoneInfo('Europe/Paris'), 'Europe/Paris'), - (zoneinfo.ZoneInfo('UTC'), 'UTC'), (dateutil.tz.gettz('Europe/Brussels'), 'Europe/Brussels'), (dateutil.tz.UTC, 'UTC') ]) @@ -309,6 +306,15 @@ def test_tzinfo_to_string(tz, expected): assert pa.lib.tzinfo_to_string(tz) == expected +def test_zoneinfo_tzinfo_to_string(): + zoneinfo = pytest.importorskip('zoneinfo') + + tz = zoneinfo.ZoneInfo('UTC') + assert pa.lib.tzinfo_to_string(tz) == 'UTC' + tz = zoneinfo.ZoneInfo('Europe/Paris') + assert pa.lib.tzinfo_to_string(tz) == 'Europe/Paris' + + def test_tzinfo_to_string_errors(): msg = "Not an instance of datetime.tzinfo" with pytest.raises(TypeError): From 4bcf767b10c6bfc56db4ec56aea4030765b64a8f Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 16 Feb 2022 08:45:19 +0100 Subject: [PATCH 08/10] Change the check for pos != -1 --- cpp/src/arrow/python/datetime.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/python/datetime.cc b/cpp/src/arrow/python/datetime.cc index fd1b43c5ac4..91f8c58016d 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/cpp/src/arrow/python/datetime.cc @@ -492,7 +492,7 @@ Result TzinfoToString(PyObject* tzinfo) { // _filename returns a full path in general ('/usr/share/zoneinfo/Europe/Paris') // or POSIX name on Windows ('Europe/Paris') - we need a substring in first case std::size_t pos = result.find("zoneinfo/"); - if (pos > 0) { + if (pos != std::string::npos) { return result.substr(pos + 9); } return result; From 262162b1a2a8cc09964b6aa73523098da56c4418 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Fri, 18 Feb 2022 06:44:57 +0100 Subject: [PATCH 09/10] Remove declarations and split test for dateutil --- cpp/src/arrow/python/datetime.cc | 2 -- python/pyarrow/tests/test_types.py | 14 ++++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/python/datetime.cc b/cpp/src/arrow/python/datetime.cc index 91f8c58016d..89d92f101c0 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/cpp/src/arrow/python/datetime.cc @@ -427,8 +427,6 @@ Result StringToTzinfo(const std::string& tz) { Result TzinfoToString(PyObject* tzinfo) { OwnedRef module_pytz; // import pytz OwnedRef module_datetime; // import datetime - OwnedRef module_zoneinfo; // import zoneinfo - OwnedRef module_dateutil; // import dateutil OwnedRef class_timezone; // from datetime import timezone OwnedRef class_fixedoffset; // from pytz import _FixedOffset diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 6f30cb81a3b..ac656dd829e 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -24,7 +24,6 @@ import pickle import pytest import pytz -import dateutil.tz import hypothesis as h import hypothesis.strategies as st import hypothesis.extra.pytz as tzst @@ -298,14 +297,21 @@ def test_is_primitive(): (pytz.timezone('Etc/GMT-9'), 'Etc/GMT-9'), (pytz.FixedOffset(180), '+03:00'), (datetime.timezone.utc, 'UTC'), - (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30'), - (dateutil.tz.gettz('Europe/Brussels'), 'Europe/Brussels'), - (dateutil.tz.UTC, 'UTC') + (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30') ]) def test_tzinfo_to_string(tz, expected): assert pa.lib.tzinfo_to_string(tz) == expected +def test_dateutil_tzinfo_to_string(): + import dateutil.tz + + tz = dateutil.tz.UTC + assert pa.lib.tzinfo_to_string(tz) == 'UTC' + tz = dateutil.tz.gettz('Europe/Paris') + assert pa.lib.tzinfo_to_string(tz) == 'Europe/Paris' + + def test_zoneinfo_tzinfo_to_string(): zoneinfo = pytest.importorskip('zoneinfo') From 76e91dd207895ede283300e4340f95603f704cd2 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 22 Feb 2022 12:26:13 +0100 Subject: [PATCH 10/10] Update python/pyarrow/tests/test_types.py Co-authored-by: Joris Van den Bossche --- python/pyarrow/tests/test_types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index ac656dd829e..a46049f9a0a 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -304,6 +304,7 @@ def test_tzinfo_to_string(tz, expected): def test_dateutil_tzinfo_to_string(): + pytest.importorskip("dateutil") import dateutil.tz tz = dateutil.tz.UTC