From 9893925a4f1aeb685594a295a2c6dfab03c5cad1 Mon Sep 17 00:00:00 2001 From: qqzhang Date: Wed, 12 Feb 2025 01:07:40 -0500 Subject: [PATCH] Add properties support for HadoopTables.load() (#12251) Description: Currently, HadoopTables.load() doesn't support passing custom properties when loading tables. While HiveCatalog and HadoopCatalog support manifest caching through their initialize() method (as implemented in #4518), HadoopTables lacks this capability. This enhancement adds property support to HadoopTables.load() to enable manifest caching and other configurations. Problem: - HadoopTables lacks the ability to configure manifest caching during table loading - Unlike HiveCatalog and HadoopCatalog which can enable manifest caching through initialize(), HadoopTables has no mechanism to pass these settings - This creates inconsistency in how manifest caching can be configured across different catalog implementations --- .../main/java/org/apache/iceberg/Tables.java | 9 +++++++- .../apache/iceberg/hadoop/HadoopTables.java | 22 ++++++++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/Tables.java b/api/src/main/java/org/apache/iceberg/Tables.java index eae6146a76a1..f871f9f67c41 100644 --- a/api/src/main/java/org/apache/iceberg/Tables.java +++ b/api/src/main/java/org/apache/iceberg/Tables.java @@ -51,7 +51,14 @@ default Table create( this.getClass().getName() + " does not implement create with a sort order"); } - Table load(String tableIdentifier); + default Table load(String tableIdentifier) { + return load(tableIdentifier, null); + } + + default Table load(String tableIdentifier, Map properties) { + throw new UnsupportedOperationException( + this.getClass().getName() + " does not implement load with properties"); + } boolean exists(String tableIdentifier); } diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java index 764d0d7d863a..600c2fed821f 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java @@ -43,6 +43,7 @@ import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -77,11 +78,15 @@ public HadoopTables(Configuration conf) { /** * Loads the table location from a FileSystem path location. * + *

If properties are provided, they will be used to initialize the table operations. The main + * purpose of this approach is to pass in properties related to manifest caching. + * * @param location a path URI (e.g. hdfs:///warehouse/my_table/) + * @param properties catalog properties * @return table implementation */ @Override - public Table load(String location) { + public Table load(String location, Map properties) { Table result; Pair parsedMetadataType = parseMetadataType(location); @@ -90,7 +95,7 @@ public Table load(String location) { result = loadMetadataTable(parsedMetadataType.first(), location, parsedMetadataType.second()); } else { // Load a normal table - TableOperations ops = newTableOps(location); + TableOperations ops = newTableOps(location, properties); if (ops.current() != null) { result = new BaseTable(ops, location); } else { @@ -205,11 +210,18 @@ public boolean dropTable(String location, boolean purge) { @VisibleForTesting TableOperations newTableOps(String location) { + return newTableOps(location, null); + } + + TableOperations newTableOps(String location, Map properties) { + FileIO io = new HadoopFileIO(conf); + if (properties != null) { + io.initialize(properties); + } if (location.contains(METADATA_JSON)) { - return new StaticTableOperations(location, new HadoopFileIO(conf)); + return new StaticTableOperations(location, io); } else { - return new HadoopTableOperations( - new Path(location), new HadoopFileIO(conf), conf, createOrGetLockManager(this)); + return new HadoopTableOperations(new Path(location), io, conf, createOrGetLockManager(this)); } }