|
35 | 35 | TableAlreadyExistsError, |
36 | 36 | ) |
37 | 37 | from pyiceberg.io import WAREHOUSE |
| 38 | +from pyiceberg.io.pyarrow import schema_to_pyarrow |
38 | 39 | from pyiceberg.partitioning import PartitionField, PartitionSpec |
39 | 40 | from pyiceberg.schema import Schema |
40 | 41 | from pyiceberg.table import ( |
41 | 42 | Table, |
| 43 | + TableProperties, |
42 | 44 | ) |
43 | 45 | from pyiceberg.table.update import ( |
44 | 46 | AddSchemaUpdate, |
@@ -563,3 +565,60 @@ def test_table_properties_raise_for_none_value(catalog: InMemoryCatalog) -> None |
563 | 565 | with pytest.raises(ValidationError) as exc_info: |
564 | 566 | _ = given_catalog_has_a_table(catalog, properties=property_with_none) |
565 | 567 | assert "None type is not a supported value in properties: property_name" in str(exc_info.value) |
| 568 | + |
| 569 | + |
| 570 | +def test_table_writes_metadata_to_custom_location(catalog: InMemoryCatalog) -> None: |
| 571 | + metadata_path = f"{catalog._warehouse_location}/custom/path" |
| 572 | + catalog.create_namespace(TEST_TABLE_NAMESPACE) |
| 573 | + table = catalog.create_table( |
| 574 | + identifier=TEST_TABLE_IDENTIFIER, |
| 575 | + schema=TEST_TABLE_SCHEMA, |
| 576 | + partition_spec=TEST_TABLE_PARTITION_SPEC, |
| 577 | + properties={TableProperties.WRITE_METADATA_PATH: metadata_path}, |
| 578 | + ) |
| 579 | + df = pa.Table.from_pylist([{"x": 123, "y": 456, "z": 789}], schema=schema_to_pyarrow(TEST_TABLE_SCHEMA)) |
| 580 | + table.append(df) |
| 581 | + manifests = table.current_snapshot().manifests(table.io) # type: ignore |
| 582 | + location_provider = table.location_provider() |
| 583 | + |
| 584 | + assert location_provider.new_metadata_location("").startswith(metadata_path) |
| 585 | + assert manifests[0].manifest_path.startswith(metadata_path) |
| 586 | + assert table.location() != metadata_path |
| 587 | + assert table.metadata_location.startswith(metadata_path) |
| 588 | + |
| 589 | + |
| 590 | +def test_table_writes_metadata_to_default_path(catalog: InMemoryCatalog) -> None: |
| 591 | + catalog.create_namespace(TEST_TABLE_NAMESPACE) |
| 592 | + table = catalog.create_table( |
| 593 | + identifier=TEST_TABLE_IDENTIFIER, |
| 594 | + schema=TEST_TABLE_SCHEMA, |
| 595 | + partition_spec=TEST_TABLE_PARTITION_SPEC, |
| 596 | + properties=TEST_TABLE_PROPERTIES, |
| 597 | + ) |
| 598 | + metadata_path = f"{table.location()}/metadata" |
| 599 | + df = pa.Table.from_pylist([{"x": 123, "y": 456, "z": 789}], schema=schema_to_pyarrow(TEST_TABLE_SCHEMA)) |
| 600 | + table.append(df) |
| 601 | + manifests = table.current_snapshot().manifests(table.io) # type: ignore |
| 602 | + location_provider = table.location_provider() |
| 603 | + |
| 604 | + assert location_provider.new_metadata_location("").startswith(metadata_path) |
| 605 | + assert manifests[0].manifest_path.startswith(metadata_path) |
| 606 | + assert table.metadata_location.startswith(metadata_path) |
| 607 | + |
| 608 | + |
| 609 | +def test_table_metadata_writes_reflect_latest_path(catalog: InMemoryCatalog) -> None: |
| 610 | + catalog.create_namespace(TEST_TABLE_NAMESPACE) |
| 611 | + table = catalog.create_table( |
| 612 | + identifier=TEST_TABLE_IDENTIFIER, |
| 613 | + schema=TEST_TABLE_SCHEMA, |
| 614 | + partition_spec=TEST_TABLE_PARTITION_SPEC, |
| 615 | + ) |
| 616 | + |
| 617 | + initial_metadata_path = f"{table.location()}/metadata" |
| 618 | + assert table.location_provider().new_metadata_location("metadata.json") == f"{initial_metadata_path}/metadata.json" |
| 619 | + |
| 620 | + # update table with new path for metadata |
| 621 | + new_metadata_path = f"{table.location()}/custom/path" |
| 622 | + table.transaction().set_properties({TableProperties.WRITE_METADATA_PATH: new_metadata_path}).commit_transaction() |
| 623 | + |
| 624 | + assert table.location_provider().new_metadata_location("metadata.json") == f"{new_metadata_path}/metadata.json" |
0 commit comments