apache · Fokko · Feb 7, 2025 · Feb 4, 2025 · Feb 5, 2025 · Feb 7, 2025
diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
@@ -54,18 +54,19 @@ Iceberg tables support table properties to configure table behavior.
 
 ### Write options
 
-| Key                                      | Options                           | Default | Description                                                                                                                                                                                                         |
-|------------------------------------------|-----------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `write.parquet.compression-codec`        | `{uncompressed,zstd,gzip,snappy}` | zstd    | Sets the Parquet compression coddec.                                                                                                                                                                                |
-| `write.parquet.compression-level`        | Integer                           | null    | Parquet compression level for the codec. If not set, it is up to PyIceberg                                                                                                                                          |
-| `write.parquet.row-group-limit`          | Number of rows                    | 1048576 | The upper bound of the number of entries within a single row group                                                                                                                                                  |
-| `write.parquet.page-size-bytes`          | Size in bytes                     | 1MB     | Set a target threshold for the approximate encoded size of data pages within a column chunk                                                                                                                         |
-| `write.parquet.page-row-limit`           | Number of rows                    | 20000   | Set a target threshold for the maximum number of rows within a column chunk                                                                                                                                         |
-| `write.parquet.dict-size-bytes`          | Size in bytes                     | 2MB     | Set the dictionary page size limit per row group                                                                                                                                                                    |
-| `write.metadata.previous-versions-max`   | Integer                           | 100     | The max number of previous version metadata files to keep before deleting after commit.                                                                                                                             |
-| `write.object-storage.enabled`           | Boolean                           | True    | Enables the [`ObjectStoreLocationProvider`](configuration.md#object-store-location-provider) that adds a hash component to file paths. Note: the default value of `True` differs from Iceberg's Java implementation |
-| `write.object-storage.partitioned-paths` | Boolean                           | True    | Controls whether [partition values are included in file paths](configuration.md#partition-exclusion) when object storage is enabled                                                                                 |
-| `write.py-location-provider.impl`        | String of form `module.ClassName` | null    | Optional, [custom `LocationProvider`](configuration.md#loading-a-custom-location-provider) implementation                                                                                                           |
+| Key                                      | Options                            | Default | Description                                                                                                                                                                                                         |
+|------------------------------------------|------------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `write.parquet.compression-codec`        | `{uncompressed,zstd,gzip,snappy}`  | zstd    | Sets the Parquet compression coddec.                                                                                                                                                                                |
+| `write.parquet.compression-level`        | Integer                            | null    | Parquet compression level for the codec. If not set, it is up to PyIceberg                                                                                                                                          |
+| `write.parquet.row-group-limit`          | Number of rows                     | 1048576 | The upper bound of the number of entries within a single row group                                                                                                                                                  |
+| `write.parquet.page-size-bytes`          | Size in bytes                      | 1MB     | Set a target threshold for the approximate encoded size of data pages within a column chunk                                                                                                                         |
+| `write.parquet.page-row-limit`           | Number of rows                     | 20000   | Set a target threshold for the maximum number of rows within a column chunk                                                                                                                                         |
+| `write.parquet.dict-size-bytes`          | Size in bytes                      | 2MB     | Set the dictionary page size limit per row group                                                                                                                                                                    |
+| `write.metadata.previous-versions-max`   | Integer                            | 100     | The max number of previous version metadata files to keep before deleting after commit.                                                                                                                             |
+| `write.object-storage.enabled`           | Boolean                            | True    | Enables the [`ObjectStoreLocationProvider`](configuration.md#object-store-location-provider) that adds a hash component to file paths. Note: the default value of `True` differs from Iceberg's Java implementation |
+| `write.object-storage.partitioned-paths` | Boolean                            | True    | Controls whether [partition values are included in file paths](configuration.md#partition-exclusion) when object storage is enabled                                                                                 |
+| `write.py-location-provider.impl`        | String of form `module.ClassName`  | null    | Optional, [custom `LocationProvider`](configuration.md#loading-a-custom-location-provider) implementation                                                                                                           |
+| `write.data.path`                        | String pointing to location        | ∅        | Sets the location where to write the data. If not set, it will use the table location postfixed with `data/`.                                                                                                       |
-| `write.data.path`                        | String pointing to location        | ∅        | Sets the location where to write the data. If not set, it will use the table location postfixed with `data/`.                                                                                                       |
+| `write.data.path`                        | String referencing a location        | ∅        | Sets the location under which data is written. If not set, the table location (`table.metadata.location`) postfixed with `data/` is used.                                                                                                       |
 The `SimpleLocationProvider` places a table's file names underneath a `data` directory in the table's base storage 
 Paths still contain partitions just before the file name, in Hive-style, and a `data` directory beneath the table's location, 
-| `write.data.path`                        | String pointing to location        | ∅        | Sets the location where to write the data. If not set, it will use the table location postfixed with `data/`.                                                                                                       |
+| `write.data.path`                        | String referencing a location        | ∅        | Sets the location under which data is written. If not set, the table location (`table.metadata.location`) postfixed with `data/` is used.                                                                                                       |
 The `SimpleLocationProvider` places a table's file names underneath a `data` directory in the table's base storage 
 Paths still contain partitions just before the file name, in Hive-style, and a `data` directory beneath the table's location, 
 
 ### Table behavior options
 

diff --git a/pyiceberg/table/locations.py b/pyiceberg/table/locations.py
@@ -28,6 +28,8 @@
 
 logger = logging.getLogger(__name__)
 
+WRITE_DATA_PATH = "write.data.path"
+
 
 class LocationProvider(ABC):
     """A base class for location providers, that provide data file locations for a table's write tasks.
@@ -40,10 +42,17 @@ class LocationProvider(ABC):
     table_location: str
     table_properties: Properties
 
+    data_path: str
+
     def __init__(self, table_location: str, table_properties: Properties):
         self.table_location = table_location
         self.table_properties = table_properties
 
+        if path := table_properties.get(WRITE_DATA_PATH):
+            self.data_path = path.rstrip("/")
+        else:
+            self.data_path = f"{self.table_location.rstrip('/')}/data"
+
     @abstractmethod
     def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
         """Return a fully-qualified data file location for the given filename.
@@ -62,8 +71,11 @@ def __init__(self, table_location: str, table_properties: Properties):
         super().__init__(table_location, table_properties)
 
     def new_data_location(self, data_file_name: str, partition_key: Optional[PartitionKey] = None) -> str:
-        prefix = f"{self.table_location}/data"
-        return f"{prefix}/{partition_key.to_path()}/{data_file_name}" if partition_key else f"{prefix}/{data_file_name}"
+        return (
+            f"{self.data_path}/{partition_key.to_path()}/{data_file_name}"
+            if partition_key
+            else f"{self.data_path}/{data_file_name}"
+        )
 
 
 class ObjectStoreLocationProvider(LocationProvider):
@@ -85,13 +97,12 @@ def new_data_location(self, data_file_name: str, partition_key: Optional[Partiti
         if self._include_partition_paths and partition_key:
             return self.new_data_location(f"{partition_key.to_path()}/{data_file_name}")
 
-        prefix = f"{self.table_location}/data"
         hashed_path = self._compute_hash(data_file_name)
 
         return (
-            f"{prefix}/{hashed_path}/{data_file_name}"
+            f"{self.data_path}/{hashed_path}/{data_file_name}"
             if self._include_partition_paths
-            else f"{prefix}/{hashed_path}-{data_file_name}"
+            else f"{self.data_path}/{hashed_path}-{data_file_name}"
         )
 
     @staticmethod

diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
@@ -20,7 +20,7 @@
 
 from pyiceberg.partitioning import PartitionField, PartitionFieldValue, PartitionKey, PartitionSpec
 from pyiceberg.schema import Schema
-from pyiceberg.table.locations import LocationProvider, load_location_provider
+from pyiceberg.table.locations import WRITE_DATA_PATH, LocationProvider, load_location_provider
 from pyiceberg.transforms import IdentityTransform
 from pyiceberg.typedef import EMPTY_DICT
 from pyiceberg.types import NestedField, StringType
@@ -133,3 +133,13 @@ def test_hash_injection(data_file_name: str, expected_hash: str) -> None:
     provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
 
     assert provider.new_data_location(data_file_name) == f"table_location/data/{expected_hash}/{data_file_name}"
+
+
+def test_write_data_path() -> None:
+    provider = load_location_provider(
+        table_location="s3://table-location/table", table_properties={WRITE_DATA_PATH: "s3://table-location/custom/data/path"}
+    )
+
+    assert (
+        provider.new_data_location("file.parquet") == "s3://table-location/custom/data/path/0010/1111/0101/11011101/file.parquet"
+    )