From f917932b955626b93374a2469821027dbbfbb91b Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Thu, 23 Jan 2020 12:47:57 -0800 Subject: [PATCH 1/3] Fix metadata tables that use manifestListLocation. --- .../main/java/org/apache/iceberg/DataFilesTable.java | 11 +---------- .../java/org/apache/iceberg/ManifestEntriesTable.java | 11 +---------- .../main/java/org/apache/iceberg/ManifestsTable.java | 3 ++- 3 files changed, 4 insertions(+), 21 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/DataFilesTable.java b/core/src/main/java/org/apache/iceberg/DataFilesTable.java index 4b153b19fc2b..98a7912e224f 100644 --- a/core/src/main/java/org/apache/iceberg/DataFilesTable.java +++ b/core/src/main/java/org/apache/iceberg/DataFilesTable.java @@ -107,16 +107,7 @@ protected long targetSplitSize(TableOperations ops) { @Override protected CloseableIterable planFiles( TableOperations ops, Snapshot snapshot, Expression rowFilter, boolean caseSensitive, boolean colStats) { - CloseableIterable manifests = Avro - .read(ops.io().newInputFile(snapshot.manifestListLocation())) - .rename("manifest_file", GenericManifestFile.class.getName()) - .rename("partitions", GenericPartitionFieldSummary.class.getName()) - // 508 is the id used for the partition field, and r508 is the record name created for it in Avro schemas - .rename("r508", GenericPartitionFieldSummary.class.getName()) - .project(ManifestFile.schema()) - .reuseContainers(false) - .build(); - + CloseableIterable manifests = CloseableIterable.withNoopClose(snapshot.manifests()); String schemaString = SchemaParser.toJson(schema()); String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned()); ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(rowFilter); diff --git a/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java b/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java index 8185f0aeda46..b775c5308093 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java +++ b/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java @@ -105,16 +105,7 @@ protected long targetSplitSize(TableOperations ops) { @Override protected CloseableIterable planFiles( TableOperations ops, Snapshot snapshot, Expression rowFilter, boolean caseSensitive, boolean colStats) { - CloseableIterable manifests = Avro - .read(ops.io().newInputFile(snapshot.manifestListLocation())) - .rename("manifest_file", GenericManifestFile.class.getName()) - .rename("partitions", GenericPartitionFieldSummary.class.getName()) - // 508 is the id used for the partition field, and r508 is the record name created for it in Avro schemas - .rename("r508", GenericPartitionFieldSummary.class.getName()) - .project(ManifestFile.schema()) - .reuseContainers(false) - .build(); - + CloseableIterable manifests = CloseableIterable.withNoopClose(snapshot.manifests()); String schemaString = SchemaParser.toJson(schema()); String specString = PartitionSpecParser.toJson(PartitionSpec.unpartitioned()); diff --git a/core/src/main/java/org/apache/iceberg/ManifestsTable.java b/core/src/main/java/org/apache/iceberg/ManifestsTable.java index 14f46bcd17b2..4eb776edce3f 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestsTable.java +++ b/core/src/main/java/org/apache/iceberg/ManifestsTable.java @@ -79,8 +79,9 @@ public Schema schema() { } protected DataTask task(TableScan scan) { + String manifestListLocation = scan.snapshot().manifestListLocation(); return StaticDataTask.of( - ops.io().newInputFile(scan.snapshot().manifestListLocation()), + ops.io().newInputFile(manifestListLocation != null ? manifestListLocation : ops.current().file().location()), scan.snapshot().manifests(), this::manifestFileToRow); } From e5d57f3c41f92429da292255cea7cd7da97efd05 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 16 Feb 2020 15:41:16 -0800 Subject: [PATCH 2/3] Fix checkstyle. --- core/src/main/java/org/apache/iceberg/DataFilesTable.java | 1 - core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java | 1 - 2 files changed, 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/DataFilesTable.java b/core/src/main/java/org/apache/iceberg/DataFilesTable.java index 98a7912e224f..abc06a48fbeb 100644 --- a/core/src/main/java/org/apache/iceberg/DataFilesTable.java +++ b/core/src/main/java/org/apache/iceberg/DataFilesTable.java @@ -23,7 +23,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; import java.util.Collection; -import org.apache.iceberg.avro.Avro; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.io.CloseableIterable; diff --git a/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java b/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java index b775c5308093..11b430ebf1b5 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java +++ b/core/src/main/java/org/apache/iceberg/ManifestEntriesTable.java @@ -22,7 +22,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; import java.util.Collection; -import org.apache.iceberg.avro.Avro; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.io.CloseableIterable; From dcca55e2341ca308021f5c728639a1a1d03d42f8 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 17 Feb 2020 09:40:52 -0800 Subject: [PATCH 3/3] Update fix to metadata tables without current snapshot. --- core/src/main/java/org/apache/iceberg/HistoryTable.java | 7 +++++++ core/src/main/java/org/apache/iceberg/SnapshotsTable.java | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/HistoryTable.java b/core/src/main/java/org/apache/iceberg/HistoryTable.java index 1ee50225d5d2..abc18290759e 100644 --- a/core/src/main/java/org/apache/iceberg/HistoryTable.java +++ b/core/src/main/java/org/apache/iceberg/HistoryTable.java @@ -24,6 +24,7 @@ import java.util.Map; import java.util.Set; import java.util.function.Function; +import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.SnapshotUtil; @@ -81,6 +82,12 @@ private class HistoryScan extends StaticTableScan { HistoryScan() { super(ops, table, HISTORY_SCHEMA, HistoryTable.this::task); } + + @Override + public CloseableIterable planFiles() { + // override planFiles to avoid the check for a current snapshot because this metadata table is for all snapshots + return CloseableIterable.withNoopClose(HistoryTable.this.task(this)); + } } private static Function convertHistoryEntryFunc(Table table) { diff --git a/core/src/main/java/org/apache/iceberg/SnapshotsTable.java b/core/src/main/java/org/apache/iceberg/SnapshotsTable.java index 633be050e7e4..bcb59f37fb2a 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotsTable.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotsTable.java @@ -19,6 +19,7 @@ package org.apache.iceberg; +import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.types.Types; /** @@ -78,6 +79,12 @@ private class SnapshotsTableScan extends StaticTableScan { SnapshotsTableScan() { super(ops, table, SNAPSHOT_SCHEMA, SnapshotsTable.this::task); } + + @Override + public CloseableIterable planFiles() { + // override planFiles to avoid the check for a current snapshot because this metadata table is for all snapshots + return CloseableIterable.withNoopClose(SnapshotsTable.this.task(this)); + } } private static StaticDataTask.Row snapshotToRow(Snapshot snap) {