From 4a40c1c51a8efb304c9450fcc59df69788b13348 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sun, 24 May 2026 13:59:43 -0700 Subject: [PATCH] partition + sort --- pyiceberg/partitioning.py | 3 ++- pyiceberg/table/sorting.py | 3 ++- tests/table/test_partitioning.py | 6 ++++++ tests/table/test_sorting.py | 6 ++++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index 3de185d886..3074c30ea1 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -109,7 +109,8 @@ def __init__( @classmethod def map_source_ids_onto_source_id(cls, data: Any) -> Any: if isinstance(data, dict): - if "source-id" not in data and (source_ids := data["source-ids"]): + if "source-id" not in data and "source-ids" in data: + source_ids = data["source-ids"] if isinstance(source_ids, list): if len(source_ids) == 0: raise ValueError("Empty source-ids is not allowed") diff --git a/pyiceberg/table/sorting.py b/pyiceberg/table/sorting.py index 4a8d548612..61f34c4780 100644 --- a/pyiceberg/table/sorting.py +++ b/pyiceberg/table/sorting.py @@ -101,7 +101,8 @@ def set_null_order(cls, values: dict[str, Any]) -> dict[str, Any]: @classmethod def map_source_ids_onto_source_id(cls, data: Any) -> Any: if isinstance(data, dict): - if "source-id" not in data and (source_ids := data["source-ids"]): + if "source-id" not in data and "source-ids" in data: + source_ids = data["source-ids"] if isinstance(source_ids, list): if len(source_ids) == 0: raise ValueError("Empty source-ids is not allowed") diff --git a/tests/table/test_partitioning.py b/tests/table/test_partitioning.py index a27046ef30..b150fc2f67 100644 --- a/tests/table/test_partitioning.py +++ b/tests/table/test_partitioning.py @@ -267,6 +267,12 @@ def test_deserialize_partition_field_v3() -> None: assert field == PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=19), name="str_truncate") +def test_deserialize_partition_field_empty_source_ids_rejected() -> None: + json_partition_spec = """{"source-ids": [], "field-id": 1000, "transform": "identity", "name": "x"}""" + with pytest.raises(Exception, match="Empty source-ids is not allowed"): + PartitionField.model_validate_json(json_partition_spec) + + def test_incompatible_source_column_not_found() -> None: schema = Schema(NestedField(1, "foo", IntegerType()), NestedField(2, "bar", IntegerType())) diff --git a/tests/table/test_sorting.py b/tests/table/test_sorting.py index 91c7a25b03..5f7f5d016e 100644 --- a/tests/table/test_sorting.py +++ b/tests/table/test_sorting.py @@ -138,6 +138,12 @@ def test_serialize_sort_field_v3() -> None: assert SortField.model_validate_json(payload) == expected +def test_deserialize_sort_field_empty_source_ids_rejected() -> None: + payload = '{"source-ids":[],"transform":"identity","direction":"asc","null-order":"nulls-first"}' + with pytest.raises(Exception, match="Empty source-ids is not allowed"): + SortField.model_validate_json(payload) + + def test_incompatible_source_column_not_found(sort_order: SortOrder) -> None: schema = Schema(NestedField(1, "foo", IntegerType()), NestedField(2, "bar", IntegerType()))