diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 38cc6221a2..923140bc80 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -491,7 +491,7 @@ class DayTransform(TimeTransform[S]): """Transforms a datetime value into a day value. Example: - >>> transform = MonthTransform() + >>> transform = DayTransform() >>> transform.transform(DateType())(17501) 17501 """ @@ -517,9 +517,6 @@ def day_func(v: Any) -> int: def can_transform(self, source: IcebergType) -> bool: return isinstance(source, (DateType, TimestampType, TimestamptzType)) - def result_type(self, source: IcebergType) -> IcebergType: - return DateType() - @property def granularity(self) -> TimeResolution: return TimeResolution.DAY diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py index 68b10f3262..6e7c7801e3 100644 --- a/tests/integration/test_inspect_table.py +++ b/tests/integration/test_inspect_table.py @@ -282,7 +282,7 @@ def test_inspect_entries_partitioned(spark: SparkSession, session_catalog: Catal df = session_catalog.load_table(identifier).inspect.entries() - assert df.to_pydict()["data_file"][0]["partition"] == {"dt_day": date(2021, 2, 1), "dt_month": None} + assert df.to_pydict()["data_file"][0]["partition"] == {"dt_day": 18659, "dt_month": None} assert df.to_pydict()["data_file"][1]["partition"] == {"dt_day": None, "dt_month": 612} @@ -452,7 +452,9 @@ def test_inspect_partitions_partitioned(spark: SparkSession, session_catalog: Ca def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None: lhs = df.to_pandas().sort_values("spec_id") - rhs = spark_df.toPandas().sort_values("spec_id") + # Spark does not store day partition values in the right type so we need to convert them + spark_df_arrow = pa.Table.from_pandas(spark_df.toPandas(), schema=df.schema) + rhs = spark_df_arrow.to_pandas().sort_values("spec_id") for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): assert left == right, f"Difference in column {column}: {left} != {right}" diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py index b199f00210..1a1ab23645 100644 --- a/tests/integration/test_writes/test_partitioned_writes.py +++ b/tests/integration/test_writes/test_partitioned_writes.py @@ -450,9 +450,7 @@ def test_append_ymd_transform_partitioned( [ pytest.param(YearTransform(), {53, 54, None}, id="year_transform"), pytest.param(MonthTransform(), {647, 648, 649, None}, id="month_transform"), - pytest.param( - DayTransform(), {date(2023, 12, 31), date(2024, 1, 1), date(2024, 1, 31), date(2024, 2, 1), None}, id="day_transform" - ), + pytest.param(DayTransform(), {19722, 19723, 19753, 19754, None}, id="day_transform"), pytest.param(HourTransform(), {473328, 473352, 474072, 474096, 474102, None}, id="hour_transform"), ], ) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 3a9ffd6009..ceaae429cd 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -280,7 +280,7 @@ def test_time_methods(type_var: PrimitiveType) -> None: assert DayTransform().preserves_order assert YearTransform().result_type(type_var) == IntegerType() assert MonthTransform().result_type(type_var) == IntegerType() - assert DayTransform().result_type(type_var) == DateType() + assert DayTransform().result_type(type_var) == IntegerType() assert YearTransform().dedup_name == "time" assert MonthTransform().dedup_name == "time" assert DayTransform().dedup_name == "time"