From ef43a9df8348ba224b190a8729e13a2109187eee Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 19 Oct 2022 17:29:36 +0200 Subject: [PATCH 1/3] ARROW-18088: [CI][Python] Fix pandas master/nightly build failure related to timedelta --- python/pyarrow/tests/parquet/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py index cbff41c7b105..3a5b42a2969e 100644 --- a/python/pyarrow/tests/parquet/common.py +++ b/python/pyarrow/tests/parquet/common.py @@ -173,7 +173,7 @@ def alltypes_sample(size=10000, seed=0, categorical=False): # them 'datetime': np.arange("2016-01-01T00:00:00.001", size, dtype='datetime64[ms]'), - 'timedelta': np.arange(0, size, dtype="timedelta64[s]"), + 'timedelta': np.arange(0, size, dtype="timedelta64[ns]"), 'str': pd.Series([str(x) for x in range(size)]), 'empty_str': [''] * size, 'str_with_nulls': [None] + [str(x) for x in range(size - 2)] + [None], From 5cdcc132407368983916d91e292bbafef9aeedc4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 20 Oct 2022 09:46:22 +0200 Subject: [PATCH 2/3] also fix for datetime64 --- python/pyarrow/tests/parquet/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py index 3a5b42a2969e..09bc1ed7b3b4 100644 --- a/python/pyarrow/tests/parquet/common.py +++ b/python/pyarrow/tests/parquet/common.py @@ -172,7 +172,7 @@ def alltypes_sample(size=10000, seed=0, categorical=False): # TODO(wesm): Test other timestamp resolutions now that arrow supports # them 'datetime': np.arange("2016-01-01T00:00:00.001", size, - dtype='datetime64[ms]'), + dtype='datetime64[ms]').astype('datetime64[ns]'), 'timedelta': np.arange(0, size, dtype="timedelta64[ns]"), 'str': pd.Series([str(x) for x in range(size)]), 'empty_str': [''] * size, From ea809430582184cfba0df81aaf6ed4e24af8abed Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 21 Oct 2022 13:08:03 +0200 Subject: [PATCH 3/3] more fixes --- python/pyarrow/tests/parquet/test_dataset.py | 3 +++ python/pyarrow/tests/test_pandas.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index 6fdc74354189..ef75c3efac78 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -1252,6 +1252,7 @@ def _test_write_to_dataset_with_partitions(base_path, 'nan': [np.nan] * 10, 'date': np.arange('2017-01-01', '2017-01-11', dtype='datetime64[D]')}) + output_df["date"] = output_df["date"].astype('datetime64[ns]') cols = output_df.columns.tolist() partition_by = ['group1', 'group2'] output_table = pa.Table.from_pandas(output_df, schema=schema, safe=False, @@ -1312,6 +1313,7 @@ def _test_write_to_dataset_no_partitions(base_path, 'num': list(range(10)), 'date': np.arange('2017-01-01', '2017-01-11', dtype='datetime64[D]')}) + output_df["date"] = output_df["date"].astype('datetime64[ns]') cols = output_df.columns.tolist() output_table = pa.Table.from_pandas(output_df) @@ -1440,6 +1442,7 @@ def test_write_to_dataset_with_partitions_and_custom_filenames( 'nan': [np.nan] * 10, 'date': np.arange('2017-01-01', '2017-01-11', dtype='datetime64[D]')}) + output_df["date"] = output_df["date"].astype('datetime64[ns]') partition_by = ['group1', 'group2'] output_table = pa.Table.from_pandas(output_df) path = str(tempdir) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 7022441396e9..f843904f1264 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -69,7 +69,7 @@ def _alltypes_example(size=100): # TODO(wesm): Pandas only support ns resolution, Arrow supports s, ms, # us, ns 'datetime': np.arange("2016-01-01T00:00:00.001", size, - dtype='datetime64[ms]'), + dtype='datetime64[ms]').astype("datetime64[ns]"), 'str': [str(x) for x in range(size)], 'str_with_nulls': [None] + [str(x) for x in range(size - 2)] + [None], 'empty_str': [''] * size @@ -1016,7 +1016,7 @@ def test_timestamps_with_timezone(self): '2007-07-13T01:23:34.123', '2006-01-13T12:34:56.432', '2010-08-13T05:46:57.437'], - dtype='datetime64[ms]') + dtype='datetime64[ms]').astype("datetime64[ns]") }) df['datetime64'] = df['datetime64'].dt.tz_localize('US/Eastern') _check_pandas_roundtrip(df) @@ -2718,7 +2718,7 @@ def test_strided_data_import(self): cases.append(boolean_objects) cases.append(np.arange("2016-01-01T00:00:00.001", N * K, - dtype='datetime64[ms]') + dtype='datetime64[ms]').astype("datetime64[ns]") .reshape(N, K).copy()) strided_mask = (random_numbers > 0).astype(bool)[:, 0]