From 339e206fd22d5618f106083a0c5efc7e648c7a8d Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Wed, 7 May 2025 21:23:06 -0700 Subject: [PATCH 1/9] Add scan planning apis to REST Catalog Co-authored-by: Rahil Co-authored-by: Prashant --- .../exceptions/EntityNotFoundException.java | 34 +++ .../java/org/apache/iceberg/BaseFile.java | 5 + .../org/apache/iceberg/ContentFileParser.java | 127 ++++++++- .../iceberg/RESTFileScanTaskParser.java | 89 +++++++ .../org/apache/iceberg/RESTPlanningMode.java | 47 ++++ .../java/org/apache/iceberg/RESTTable.java | 71 ++++++ .../org/apache/iceberg/RESTTableScan.java | 240 ++++++++++++++++++ .../org/apache/iceberg/ScanTasksIterable.java | 216 ++++++++++++++++ .../iceberg/UnboundBaseFileScanTask.java | 63 +++++ .../iceberg/UnboundGenericDataFile.java | 76 ++++++ .../iceberg/UnboundGenericDeleteFile.java | 86 +++++++ .../org/apache/iceberg/rest/PlanStatus.java | 48 ++++ .../apache/iceberg/rest/RESTSerializers.java | 113 +++++++++ .../iceberg/rest/RESTSessionCatalog.java | 44 ++++ .../apache/iceberg/rest/ResourcePaths.java | 34 +++ .../rest/requests/FetchScanTasksRequest.java | 47 ++++ .../requests/FetchScanTasksRequestParser.java | 57 +++++ .../rest/requests/PlanTableScanRequest.java | 172 +++++++++++++ .../requests/PlanTableScanRequestParser.java | 135 ++++++++++ .../FetchPlanningResultResponse.java | 126 +++++++++ .../FetchPlanningResultResponseParser.java | 86 +++++++ .../responses/FetchScanTasksResponse.java | 111 ++++++++ .../FetchScanTasksResponseParser.java | 77 ++++++ .../rest/responses/PlanTableScanResponse.java | 158 ++++++++++++ .../PlanTableScanResponseParser.java | 95 +++++++ .../rest/responses/TableScanResponse.java | 23 ++ .../responses/TableScanResponseParser.java | 120 +++++++++ .../java/org/apache/iceberg/TestBase.java | 9 +- .../apache/iceberg/catalog/CatalogTests.java | 41 +++ .../iceberg/rest/RESTCatalogAdapter.java | 186 ++++++++++++++ .../apache/iceberg/rest/TestRESTCatalog.java | 70 +++++ .../requests/TestFetchScanTasksRequest.java | 53 ++++ .../requests/TestPlanTableScanRequest.java | 153 +++++++++++ ...TestFetchPlanningResultResponseParser.java | 206 +++++++++++++++ .../TestFetchScanTasksResponseParser.java | 172 +++++++++++++ .../TestPlanTableScanResponseParser.java | 236 +++++++++++++++++ 36 files changed, 3610 insertions(+), 16 deletions(-) create mode 100644 api/src/main/java/org/apache/iceberg/exceptions/EntityNotFoundException.java create mode 100644 core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java create mode 100644 core/src/main/java/org/apache/iceberg/RESTPlanningMode.java create mode 100644 core/src/main/java/org/apache/iceberg/RESTTable.java create mode 100644 core/src/main/java/org/apache/iceberg/RESTTableScan.java create mode 100644 core/src/main/java/org/apache/iceberg/ScanTasksIterable.java create mode 100644 core/src/main/java/org/apache/iceberg/UnboundBaseFileScanTask.java create mode 100644 core/src/main/java/org/apache/iceberg/UnboundGenericDataFile.java create mode 100644 core/src/main/java/org/apache/iceberg/UnboundGenericDeleteFile.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/PlanStatus.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequest.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequestParser.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequestParser.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponse.java create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponseParser.java create mode 100644 core/src/test/java/org/apache/iceberg/rest/requests/TestFetchScanTasksRequest.java create mode 100644 core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java create mode 100644 core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java create mode 100644 core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java create mode 100644 core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java diff --git a/api/src/main/java/org/apache/iceberg/exceptions/EntityNotFoundException.java b/api/src/main/java/org/apache/iceberg/exceptions/EntityNotFoundException.java new file mode 100644 index 000000000000..1d06a5d2bc26 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/exceptions/EntityNotFoundException.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.exceptions; + +import com.google.errorprone.annotations.FormatMethod; + +/** Exception raised when an entity is not found. */ +public class EntityNotFoundException extends RESTException implements CleanableFailure { + @FormatMethod + public EntityNotFoundException(String message, Object... args) { + super(message, args); + } + + @FormatMethod + public EntityNotFoundException(Throwable cause, String message, Object... args) { + super(cause, message, args); + } +} diff --git a/core/src/main/java/org/apache/iceberg/BaseFile.java b/core/src/main/java/org/apache/iceberg/BaseFile.java index a02e0eff55a2..d5dfd8ec0ba9 100644 --- a/core/src/main/java/org/apache/iceberg/BaseFile.java +++ b/core/src/main/java/org/apache/iceberg/BaseFile.java @@ -483,6 +483,11 @@ public StructLike partition() { return partitionData; } + public void setPartitionData(PartitionData partitionData) { + // TODO for binding in REST scan + this.partitionData = partitionData; + } + @Override public long recordCount() { return recordCount; diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 63cd606356db..182acab8759c 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -27,7 +27,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; -class ContentFileParser { +public class ContentFileParser { private static final String SPEC_ID = "spec-id"; private static final String CONTENT = "content"; private static final String FILE_PATH = "file-path"; @@ -52,6 +52,97 @@ class ContentFileParser { private ContentFileParser() {} + public static void unboundContentFileToJson( + ContentFile contentFile, PartitionSpec spec, JsonGenerator generator) throws IOException { + Preconditions.checkArgument(contentFile != null, "Invalid content file: null"); + Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + Preconditions.checkArgument( + contentFile.specId() == spec.specId(), + "Invalid partition spec id from content file: expected = %s, actual = %s", + spec.specId(), + contentFile.specId()); + + generator.writeStartObject(); + // ignore the ordinal position (ContentFile#pos) of the file in a manifest, + // as it isn't used and BaseFile constructor doesn't support it. + + generator.writeNumberField(SPEC_ID, contentFile.specId()); + generator.writeStringField(CONTENT, contentFile.content().name()); + generator.writeStringField(FILE_PATH, contentFile.path().toString()); + generator.writeStringField(FILE_FORMAT, contentFile.format().name()); + + if (contentFile.partition() != null) { + generator.writeFieldName(PARTITION); + SingleValueParser.toJson(spec.partitionType(), contentFile.partition(), generator); + } + + generator.writeNumberField(FILE_SIZE, contentFile.fileSizeInBytes()); + + metricsToJson(contentFile, generator); + + if (contentFile.keyMetadata() != null) { + generator.writeFieldName(KEY_METADATA); + SingleValueParser.toJson(DataFile.KEY_METADATA.type(), contentFile.keyMetadata(), generator); + } + + if (contentFile.splitOffsets() != null) { + JsonUtil.writeLongArray(SPLIT_OFFSETS, contentFile.splitOffsets(), generator); + } + + if (contentFile.equalityFieldIds() != null) { + JsonUtil.writeIntegerArray(EQUALITY_IDS, contentFile.equalityFieldIds(), generator); + } + + if (contentFile.sortOrderId() != null) { + generator.writeNumberField(SORT_ORDER_ID, contentFile.sortOrderId()); + } + + generator.writeEndObject(); + } + + public static ContentFile unboundContentFileFromJson(JsonNode jsonNode) { + Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for content file: null"); + + int specId = JsonUtil.getInt(SPEC_ID, jsonNode); + FileContent fileContent = FileContent.valueOf(JsonUtil.getString(CONTENT, jsonNode)); + String filePath = JsonUtil.getString(FILE_PATH, jsonNode); + FileFormat fileFormat = FileFormat.fromString(JsonUtil.getString(FILE_FORMAT, jsonNode)); + + long fileSizeInBytes = JsonUtil.getLong(FILE_SIZE, jsonNode); + Metrics metrics = metricsFromJson(jsonNode); + ByteBuffer keyMetadata = JsonUtil.getByteBufferOrNull(KEY_METADATA, jsonNode); + List splitOffsets = JsonUtil.getLongListOrNull(SPLIT_OFFSETS, jsonNode); + int[] equalityFieldIds = JsonUtil.getIntArrayOrNull(EQUALITY_IDS, jsonNode); + Integer sortOrderId = JsonUtil.getIntOrNull(SORT_ORDER_ID, jsonNode); + + if (fileContent == FileContent.DATA) { + return new UnboundGenericDataFile( + specId, + filePath, + fileFormat, + jsonNode.get(PARTITION), + fileSizeInBytes, + metrics, + keyMetadata, + splitOffsets, + sortOrderId); + } else { + return new UnboundGenericDeleteFile( + specId, + fileContent, + filePath, + fileFormat, + jsonNode.get(PARTITION), + fileSizeInBytes, + metrics, + equalityFieldIds, + sortOrderId, + splitOffsets, + keyMetadata); + } + } + private static boolean hasPartitionData(StructLike partitionData) { return partitionData != null && partitionData.size() > 0; } @@ -147,18 +238,7 @@ static ContentFile fromJson(JsonNode jsonNode, PartitionSpec spec) { PartitionData partitionData = null; if (jsonNode.has(PARTITION)) { - partitionData = new PartitionData(spec.partitionType()); - StructLike structLike = - (StructLike) SingleValueParser.fromJson(spec.partitionType(), jsonNode.get(PARTITION)); - Preconditions.checkState( - partitionData.size() == structLike.size(), - "Invalid partition data size: expected = %s, actual = %s", - partitionData.size(), - structLike.size()); - for (int pos = 0; pos < partitionData.size(); ++pos) { - Class javaClass = spec.partitionType().fields().get(pos).type().typeId().javaClass(); - partitionData.set(pos, structLike.get(pos, javaClass)); - } + partitionData = partitionDataFromRawValue(jsonNode.get(PARTITION), spec); } long fileSizeInBytes = JsonUtil.getLong(FILE_SIZE, jsonNode); @@ -203,6 +283,27 @@ static ContentFile fromJson(JsonNode jsonNode, PartitionSpec spec) { } } + static PartitionData partitionDataFromRawValue(JsonNode rawPartitionValue, PartitionSpec spec) { + if (rawPartitionValue == null) { + return null; + } + + PartitionData partitionData = new PartitionData(spec.partitionType()); + StructLike structLike = + (StructLike) SingleValueParser.fromJson(spec.partitionType(), rawPartitionValue); + Preconditions.checkState( + partitionData.size() == structLike.size(), + "Invalid partition data size: expected = %s, actual = %s", + partitionData.size(), + structLike.size()); + for (int pos = 0; pos < partitionData.size(); ++pos) { + Class javaClass = spec.partitionType().fields().get(pos).type().typeId().javaClass(); + partitionData.set(pos, structLike.get(pos, javaClass)); + } + + return partitionData; + } + private static void metricsToJson(ContentFile contentFile, JsonGenerator generator) throws IOException { generator.writeNumberField(RECORD_COUNT, contentFile.recordCount()); diff --git a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java new file mode 100644 index 000000000000..15e3392999f3 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.List; +import java.util.Set; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.ExpressionParser; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.util.JsonUtil; + +public class RESTFileScanTaskParser { + private static final String DATA_FILE = "data-file"; + private static final String DELETE_FILE_REFERENCES = "delete-file-references"; + private static final String RESIDUAL = "residual-filter"; + + private RESTFileScanTaskParser() {} + + public static void toJson( + FileScanTask fileScanTask, + Set deleteFileReferences, + PartitionSpec partitionSpec, + JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(fileScanTask != null, "Invalid file scan task: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + + generator.writeStartObject(); + generator.writeFieldName(DATA_FILE); + ContentFileParser.unboundContentFileToJson(fileScanTask.file(), partitionSpec, generator); + if (deleteFileReferences != null) { + JsonUtil.writeIntegerArray(DELETE_FILE_REFERENCES, deleteFileReferences, generator); + } + + if (fileScanTask.residual() != null) { + generator.writeFieldName(RESIDUAL); + ExpressionParser.toJson(fileScanTask.residual(), generator); + } + generator.writeEndObject(); + } + + public static FileScanTask fromJson(JsonNode jsonNode, List allDeleteFiles) { + Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for file scan task: null"); + Preconditions.checkArgument( + jsonNode.isObject(), "Invalid JSON node for file scan task: non-object (%s)", jsonNode); + + UnboundGenericDataFile dataFile = + (UnboundGenericDataFile) + ContentFileParser.unboundContentFileFromJson(JsonUtil.get(DATA_FILE, jsonNode)); + + UnboundGenericDeleteFile[] deleteFiles = null; + Set deleteFileReferences = Sets.newHashSet(); + if (jsonNode.has(DELETE_FILE_REFERENCES)) { + deleteFileReferences.addAll(JsonUtil.getIntegerList(DELETE_FILE_REFERENCES, jsonNode)); + ImmutableList.Builder builder = ImmutableList.builder(); + deleteFileReferences.forEach( + delIdx -> builder.add((UnboundGenericDeleteFile) allDeleteFiles.get(delIdx))); + deleteFiles = builder.build().toArray(new UnboundGenericDeleteFile[0]); + } + + Expression filter = null; + if (jsonNode.has(RESIDUAL)) { + filter = ExpressionParser.fromJson(jsonNode.get(RESIDUAL)); + } + + return new UnboundBaseFileScanTask(dataFile, deleteFiles, filter); + } +} diff --git a/core/src/main/java/org/apache/iceberg/RESTPlanningMode.java b/core/src/main/java/org/apache/iceberg/RESTPlanningMode.java new file mode 100644 index 000000000000..185276ecbff7 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/RESTPlanningMode.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.util.Locale; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public enum RESTPlanningMode { + REQUIRED("required"), + SUPPORTED("supported"), + UNSUPPORTED("unsupported"); + private final String planningMode; + + RESTPlanningMode(String planningMode) { + this.planningMode = planningMode; + } + + public String mode() { + return planningMode; + } + + public static RESTPlanningMode fromName(String planningMode) { + Preconditions.checkArgument(planningMode != null, "planningMode is null"); + try { + return RESTPlanningMode.valueOf(planningMode.toUpperCase(Locale.ENGLISH)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException( + String.format("Invalid planningMode name: %s", planningMode), e); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/RESTTable.java b/core/src/main/java/org/apache/iceberg/RESTTable.java new file mode 100644 index 000000000000..c45c9feb581c --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/RESTTable.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.util.Map; +import java.util.function.Supplier; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.metrics.MetricsReporter; +import org.apache.iceberg.rest.RESTClient; +import org.apache.iceberg.rest.ResourcePaths; + +public class RESTTable extends BaseTable { + private final RESTClient client; + private final String path; + private final Supplier> headers; + private final MetricsReporter reporter; + private final ResourcePaths resourcePaths; + private final TableIdentifier tableIdentifier; + + public RESTTable( + TableOperations ops, + String name, + MetricsReporter reporter, + RESTClient client, + String path, + Supplier> headers, + TableIdentifier tableIdentifier, + ResourcePaths resourcePaths) { + super(ops, name, reporter); + this.reporter = reporter; + this.client = client; + this.headers = headers; + this.path = path; + this.tableIdentifier = tableIdentifier; + this.resourcePaths = resourcePaths; + } + + @Override + public TableScan newScan() { + // TODO when looking at ImmutableTableScanContext how do we ensure + // correct snapshotId to use for point in time cases. When looking at spark + // it seems it follows similar approach, see class SparkDistributedDataScan + + return new RESTTableScan( + this, + schema(), + ImmutableTableScanContext.builder().metricsReporter(reporter).build(), + client, + path, + headers, + operations(), + tableIdentifier, + resourcePaths); + } +} diff --git a/core/src/main/java/org/apache/iceberg/RESTTableScan.java b/core/src/main/java/org/apache/iceberg/RESTTableScan.java new file mode 100644 index 000000000000..76a1a9d69548 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/RESTTableScan.java @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.rest.ErrorHandlers; +import org.apache.iceberg.rest.PlanStatus; +import org.apache.iceberg.rest.RESTClient; +import org.apache.iceberg.rest.ResourcePaths; +import org.apache.iceberg.rest.requests.PlanTableScanRequest; +import org.apache.iceberg.rest.responses.FetchPlanningResultResponse; +import org.apache.iceberg.rest.responses.PlanTableScanResponse; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.ParallelIterable; + +public class RESTTableScan extends DataTableScan { + private final RESTClient client; + private final String path; + private final Supplier> headers; + private final TableOperations operations; + private final Table table; + private final ResourcePaths resourcePaths; + private final TableIdentifier tableIdentifier; + + // TODO revisit if this property should be configurable + private static final int FETCH_PLANNING_SLEEP_DURATION_MS = 1000; + + public RESTTableScan( + Table table, + Schema schema, + TableScanContext context, + RESTClient client, + String path, + Supplier> headers, + TableOperations operations, + TableIdentifier tableIdentifier, + ResourcePaths resourcePaths) { + super(table, schema, context); + this.table = table; + this.client = client; + this.headers = headers; + this.path = path; + this.operations = operations; + this.tableIdentifier = tableIdentifier; + this.resourcePaths = resourcePaths; + } + + @Override + protected TableScan newRefinedScan( + Table refinedTable, Schema refinedSchema, TableScanContext refinedContext) { + return new RESTTableScan( + refinedTable, + refinedSchema, + refinedContext, + client, + path, + headers, + operations, + tableIdentifier, + resourcePaths); + } + + @Override + public CloseableIterable planFiles() { + List selectedColumns = + schema().columns().stream().map(Types.NestedField::name).collect(Collectors.toList()); + + List statsFields = null; + if (columnsToKeepStats() != null) { + statsFields = + columnsToKeepStats().stream() + .map(columnId -> schema().findColumnName(columnId)) + .collect(Collectors.toList()); + } + + Long startSnapshotId = context().fromSnapshotId(); + Long endSnapshotId = context().toSnapshotId(); + Long snapshotId = snapshotId(); + + PlanTableScanRequest.Builder planTableScanRequestBuilder = + new PlanTableScanRequest.Builder() + .withSelect(selectedColumns) + .withFilter(filter()) + .withCaseSensitive(isCaseSensitive()) + .withStatsFields(statsFields); + + if (startSnapshotId != null && endSnapshotId != null) { + planTableScanRequestBuilder + .withStartSnapshotId(startSnapshotId) + .withEndSnapshotId(endSnapshotId) + .withUseSnapshotSchema(true); + + } else if (snapshotId != null) { + boolean useSnapShotSchema = snapshotId != table.currentSnapshot().snapshotId(); + planTableScanRequestBuilder + .withSnapshotId(snapshotId) + .withUseSnapshotSchema(useSnapShotSchema); + + } else { + planTableScanRequestBuilder.withSnapshotId(table().currentSnapshot().snapshotId()); + } + + return planTableScan(planTableScanRequestBuilder.build()); + } + + private CloseableIterable planTableScan(PlanTableScanRequest planTableScanRequest) { + PlanTableScanResponse response = + client.post( + resourcePaths.planTableScan(tableIdentifier), + planTableScanRequest, + PlanTableScanResponse.class, + headers, + ErrorHandlers.defaultErrorHandler()); + + PlanStatus planStatus = response.planStatus(); + switch (planStatus) { + case COMPLETED: + // List fileScanTasks = bindFileScanTasksWithSpec(response.fileScanTasks()); + return getScanTasksIterable(response.planTasks(), response.fileScanTasks()); + case SUBMITTED: + return fetchPlanningResult(response.planId()); + case FAILED: + throw new RuntimeException( + "Received \"failed\" status from service when planning a table scan"); + default: + throw new RuntimeException( + String.format("Invalid planStatus during planTableScan: %s", planStatus)); + } + } + + private CloseableIterable fetchPlanningResult(String planId) { + + // TODO need to introduce a max wait time for this loop potentially + boolean planningFinished = false; + while (!planningFinished) { + FetchPlanningResultResponse response = + client.get( + resourcePaths.fetchPlanningResult(tableIdentifier, planId), + FetchPlanningResultResponse.class, + headers, + ErrorHandlers.defaultErrorHandler()); + + PlanStatus planStatus = response.planStatus(); + switch (planStatus) { + case COMPLETED: + // List fileScanTasks = bindFileScanTasksWithSpec(response.fileScanTasks()); + return getScanTasksIterable(response.planTasks(), response.fileScanTasks()); + case SUBMITTED: + try { + Thread.sleep(FETCH_PLANNING_SLEEP_DURATION_MS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted while fetching plan status", e); + } + break; + case FAILED: + throw new RuntimeException( + "Received \"failed\" status from service when fetching a table scan"); + case CANCELLED: + throw new RuntimeException( + String.format( + "Received \"cancelled\" status from service when fetching a table scan, planId: %s is invalid", + planId)); + default: + throw new RuntimeException( + String.format("Invalid planStatus during fetchPlanningResult: %s", planStatus)); + } + } + return null; + } + + public CloseableIterable getScanTasksIterable( + List planTasks, List fileScanTasks) { + List iterableOfScanTaskIterables = Lists.newArrayList(); + if (fileScanTasks != null) { + // add this to the list for below if planTasks will also be present + ScanTasksIterable scanTasksIterable = + new ScanTasksIterable( + fileScanTasks, + client, + resourcePaths, + tableIdentifier, + headers, + planExecutor(), + table.specs(), + isCaseSensitive()); + iterableOfScanTaskIterables.add(scanTasksIterable); + } + if (planTasks != null) { + // Use parallel iterable since planTasks are present + for (String planTask : planTasks) { + ScanTasksIterable iterable = + new ScanTasksIterable( + planTask, + client, + resourcePaths, + tableIdentifier, + headers, + planExecutor(), + table.specs(), + isCaseSensitive()); + iterableOfScanTaskIterables.add(iterable); + } + return new ParallelIterable<>(iterableOfScanTaskIterables, planExecutor()); + // another idea is to keep concating to the original parallel iterable??? + } + // use a single scanTasks iterable since no need to parallelize since no planTasks + return new ScanTasksIterable( + fileScanTasks, + client, + resourcePaths, + tableIdentifier, + headers, + planExecutor(), + table.specs(), + isCaseSensitive()); + } +} diff --git a/core/src/main/java/org/apache/iceberg/ScanTasksIterable.java b/core/src/main/java/org/apache/iceberg/ScanTasksIterable.java new file mode 100644 index 000000000000..df5dc3f9149b --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/ScanTasksIterable.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.io.CloseableIterator; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.rest.ErrorHandlers; +import org.apache.iceberg.rest.RESTClient; +import org.apache.iceberg.rest.ResourcePaths; +import org.apache.iceberg.rest.requests.FetchScanTasksRequest; +import org.apache.iceberg.rest.responses.FetchScanTasksResponse; +import org.apache.iceberg.util.ParallelIterable; + +public class ScanTasksIterable implements CloseableIterable { + private final RESTClient client; + private final ResourcePaths resourcePaths; + private final TableIdentifier tableIdentifier; + private final Supplier> headers; + private final String + planTask; // parallelizing on this where a planTask produces a list of file scan tasks, as + // well more planTasks + private final List fileScanTasks; + private ExecutorService executorService; + private Map specsById; + private boolean caseSensitive; + + public ScanTasksIterable( + String planTask, + RESTClient client, + ResourcePaths resourcePaths, + TableIdentifier tableIdentifier, + Supplier> headers, + ExecutorService executorService, + Map specsById, + boolean caseSensitive) { + this.planTask = planTask; + this.fileScanTasks = null; + this.client = client; + this.resourcePaths = resourcePaths; + this.tableIdentifier = tableIdentifier; + this.headers = headers; + this.executorService = executorService; + this.specsById = specsById; + this.caseSensitive = caseSensitive; + } + + public ScanTasksIterable( + List fileScanTasks, + RESTClient client, + ResourcePaths resourcePaths, + TableIdentifier tableIdentifier, + Supplier> headers, + ExecutorService executorService, + Map specsById, + boolean caseSensitive) { + this.planTask = null; + this.fileScanTasks = fileScanTasks; + this.client = client; + this.resourcePaths = resourcePaths; + this.tableIdentifier = tableIdentifier; + this.headers = headers; + this.executorService = executorService; + this.specsById = specsById; + this.caseSensitive = caseSensitive; + } + + @Override + public CloseableIterator iterator() { + return new ScanTasksIterator( + planTask, + fileScanTasks, + client, + resourcePaths, + tableIdentifier, + headers, + executorService, + specsById, + caseSensitive); + } + + @Override + public void close() throws IOException {} + + private static class ScanTasksIterator implements CloseableIterator { + private final RESTClient client; + private final ResourcePaths resourcePaths; + private final TableIdentifier tableIdentifier; + private final Supplier> headers; + private String planTask; + private List fileScanTasks; + private ExecutorService executorService; + private Map specsById; + private boolean caseSensitive; + + ScanTasksIterator( + String planTask, + List fileScanTasks, + RESTClient client, + ResourcePaths resourcePaths, + TableIdentifier tableIdentifier, + Supplier> headers, + ExecutorService executorService, + Map specsById, + boolean caseSensitive) { + this.client = client; + this.resourcePaths = resourcePaths; + this.tableIdentifier = tableIdentifier; + this.headers = headers; + this.planTask = planTask; + this.fileScanTasks = fileScanTasks != null ? fileScanTasks : Lists.newArrayList(); + this.executorService = executorService; + this.specsById = specsById; + this.caseSensitive = caseSensitive; + } + + @Override + public boolean hasNext() { + if (!fileScanTasks.isEmpty()) { + // Have file scan tasks so continue to consume + return true; + } + // Out of file scan tasks, so need to now fetch more from each planTask + // Service can send back more planTasks which acts as pagination + if (planTask != null) { + fetchScanTasks(planTask); + planTask = null; + // Make another hasNext() call, as more fileScanTasks have been fetched + return hasNext(); + } + // we have no file scan tasks left to consume + // so means we are finished + return false; + } + + @Override + public FileScanTask next() { + FileScanTask task = fileScanTasks.remove(0); + if (task instanceof UnboundBaseFileScanTask) { + // bind partition spec data to task + UnboundBaseFileScanTask unboundBaseFileScanTask = (UnboundBaseFileScanTask) task; + Integer specId = task.file().specId(); + return unboundBaseFileScanTask.bind(specsById.get(specId), caseSensitive); + } else { + return task; + } + } + + private void fetchScanTasks(String withPlanTask) { + FetchScanTasksRequest fetchScanTasksRequest = new FetchScanTasksRequest(withPlanTask); + FetchScanTasksResponse response = + client.post( + resourcePaths.fetchScanTasks(tableIdentifier), + fetchScanTasksRequest, + FetchScanTasksResponse.class, + headers, + ErrorHandlers.defaultErrorHandler()); + if (response.fileScanTasks() != null) { + fileScanTasks.addAll(response.fileScanTasks()); + } + + if (response.planTasks() != null) { + // this is the case where a plan task returned an additional plan task, so ensure that this + // result is added to top level fileScanTasks list. + // confirmed working with catalog test + // #testPlanTableScanAndFetchScanTasksWithCompletedStatusAndNestedPlanTasks + Iterable fileScanTasksFromPlanTasks = + getScanTasksIterable(response.planTasks()); + fileScanTasksFromPlanTasks.forEach(task -> fileScanTasks.add(task)); + } + } + + public CloseableIterable getScanTasksIterable(List planTasks) { + List iterableOfScanTaskIterables = Lists.newArrayList(); + for (String withPlanTask : planTasks) { + ScanTasksIterable iterable = + new ScanTasksIterable( + withPlanTask, + client, + resourcePaths, + tableIdentifier, + headers, + executorService, + specsById, + caseSensitive); + iterableOfScanTaskIterables.add(iterable); + } + return new ParallelIterable<>(iterableOfScanTaskIterables, executorService); + } + + @Override + public void close() throws IOException {} + } +} diff --git a/core/src/main/java/org/apache/iceberg/UnboundBaseFileScanTask.java b/core/src/main/java/org/apache/iceberg/UnboundBaseFileScanTask.java new file mode 100644 index 000000000000..9905e5be4c21 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/UnboundBaseFileScanTask.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.ResidualEvaluator; + +class UnboundBaseFileScanTask extends BaseFileScanTask { + private UnboundGenericDataFile unboundDataFile; + private UnboundGenericDeleteFile[] unboundDeleteFiles; + private Expression filter; + + UnboundBaseFileScanTask( + UnboundGenericDataFile unboundDataFile, + UnboundGenericDeleteFile[] unboundDeleteFiles, + Expression filter) { + super(unboundDataFile, unboundDeleteFiles, null, null, ResidualEvaluator.unpartitioned(filter)); + this.unboundDataFile = unboundDataFile; + this.unboundDeleteFiles = unboundDeleteFiles; + this.filter = filter; + } + + @Override + public Schema schema() { + throw new UnsupportedOperationException("schema() is not supported in UnboundBaseFileScanTask"); + } + + @Override + public PartitionSpec spec() { + throw new UnsupportedOperationException("spec() is not supported in UnboundBaseFileScanTask"); + } + + public FileScanTask bind(PartitionSpec spec, boolean caseSensitive) { + GenericDataFile boundDataFile = unboundDataFile.bindToSpec(spec); + DeleteFile[] boundDeleteFiles = new DeleteFile[unboundDeleteFiles.length]; + for (int i = 0; i < unboundDeleteFiles.length; i++) { + boundDeleteFiles[i] = unboundDeleteFiles[i].bindToSpec(spec); + } + + String schemaString = SchemaParser.toJson(spec.schema()); + String specString = PartitionSpecParser.toJson(spec); + ResidualEvaluator boundResidual = ResidualEvaluator.of(spec, filter, caseSensitive); + + return new BaseFileScanTask( + boundDataFile, boundDeleteFiles, schemaString, specString, boundResidual); + } +} diff --git a/core/src/main/java/org/apache/iceberg/UnboundGenericDataFile.java b/core/src/main/java/org/apache/iceberg/UnboundGenericDataFile.java new file mode 100644 index 000000000000..66f0d693cd0e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/UnboundGenericDataFile.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.databind.JsonNode; +import java.nio.ByteBuffer; +import java.util.List; + +/** + * An UnboundGenericDataFile is a GenericDataFile which keeps track of the raw partition value + * represented as JSON + */ +class UnboundGenericDataFile extends GenericDataFile { + private final JsonNode rawPartitionValue; + + UnboundGenericDataFile( + int specId, + String filePath, + FileFormat format, + JsonNode rawPartitionValue, + long fileSizeInBytes, + Metrics metrics, + ByteBuffer keyMetadata, + List splitOffsets, + Integer sortOrderId) { + super( + specId, + filePath, + format, + null, + fileSizeInBytes, + metrics, + keyMetadata, + splitOffsets, + sortOrderId, + -1L); // track row-lineage + this.rawPartitionValue = rawPartitionValue; + } + + GenericDataFile bindToSpec(PartitionSpec spec) { + return new GenericDataFile( + specId(), + path().toString(), + format(), + ContentFileParser.partitionDataFromRawValue(rawPartitionValue, spec), + fileSizeInBytes(), + new Metrics( + recordCount(), + columnSizes(), + valueCounts(), + nullValueCounts(), + nanValueCounts(), + lowerBounds(), + upperBounds()), + keyMetadata(), + splitOffsets(), + sortOrderId(), + -1L); + } +} diff --git a/core/src/main/java/org/apache/iceberg/UnboundGenericDeleteFile.java b/core/src/main/java/org/apache/iceberg/UnboundGenericDeleteFile.java new file mode 100644 index 000000000000..57be45e27c5c --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/UnboundGenericDeleteFile.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.databind.JsonNode; +import java.nio.ByteBuffer; +import java.util.List; + +/** + * An UnboundGenericDeleteFile is a GenericDeleteFile which keeps track of the raw partition value + * represented as JSON + */ +class UnboundGenericDeleteFile extends GenericDeleteFile { + private JsonNode rawPartitionValue; + + UnboundGenericDeleteFile( + int specId, + FileContent content, + String filePath, + FileFormat format, + JsonNode rawPartitionValue, + long fileSizeInBytes, + Metrics metrics, + int[] equalityFieldIds, + Integer sortOrderId, + List splitOffsets, + ByteBuffer keyMetadata) { + super( + specId, + content, + filePath, + format, + null, + fileSizeInBytes, + metrics, + equalityFieldIds, + sortOrderId, + splitOffsets, + keyMetadata, + null, + null, + null); + this.rawPartitionValue = rawPartitionValue; + } + + GenericDeleteFile bindToSpec(PartitionSpec spec) { + return new GenericDeleteFile( + specId(), + content(), + path().toString(), + format(), + ContentFileParser.partitionDataFromRawValue(rawPartitionValue, spec), + fileSizeInBytes(), + new Metrics( + recordCount(), + columnSizes(), + valueCounts(), + nullValueCounts(), + nanValueCounts(), + lowerBounds(), + upperBounds()), + equalityFieldIds().stream().mapToInt(Integer::intValue).toArray(), + sortOrderId(), + splitOffsets(), + keyMetadata(), + null, + null, + null); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/PlanStatus.java b/core/src/main/java/org/apache/iceberg/rest/PlanStatus.java new file mode 100644 index 000000000000..5603d51e9aa2 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/PlanStatus.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest; + +import java.util.Locale; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public enum PlanStatus { + COMPLETED("completed"), + SUBMITTED("submitted"), + CANCELLED("cancelled"), + FAILED("failed"); + + private final String status; + + PlanStatus(String status) { + this.status = status; + } + + public String status() { + return status; + } + + public static PlanStatus fromName(String status) { + Preconditions.checkArgument(status != null, "Status is null"); + try { + return PlanStatus.valueOf(status.toUpperCase(Locale.ENGLISH)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException(String.format("Invalid status name: %s", status), e); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java index 667142698633..9167f4233afd 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java @@ -46,9 +46,13 @@ import org.apache.iceberg.rest.requests.CommitTransactionRequestParser; import org.apache.iceberg.rest.requests.CreateViewRequest; import org.apache.iceberg.rest.requests.CreateViewRequestParser; +import org.apache.iceberg.rest.requests.FetchScanTasksRequest; +import org.apache.iceberg.rest.requests.FetchScanTasksRequestParser; import org.apache.iceberg.rest.requests.ImmutableCreateViewRequest; import org.apache.iceberg.rest.requests.ImmutableRegisterTableRequest; import org.apache.iceberg.rest.requests.ImmutableReportMetricsRequest; +import org.apache.iceberg.rest.requests.PlanTableScanRequest; +import org.apache.iceberg.rest.requests.PlanTableScanRequestParser; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RegisterTableRequestParser; import org.apache.iceberg.rest.requests.ReportMetricsRequest; @@ -59,6 +63,10 @@ import org.apache.iceberg.rest.responses.ConfigResponseParser; import org.apache.iceberg.rest.responses.ErrorResponse; import org.apache.iceberg.rest.responses.ErrorResponseParser; +import org.apache.iceberg.rest.responses.FetchPlanningResultResponse; +import org.apache.iceberg.rest.responses.FetchPlanningResultResponseParser; +import org.apache.iceberg.rest.responses.FetchScanTasksResponse; +import org.apache.iceberg.rest.responses.FetchScanTasksResponseParser; import org.apache.iceberg.rest.responses.ImmutableLoadCredentialsResponse; import org.apache.iceberg.rest.responses.ImmutableLoadViewResponse; import org.apache.iceberg.rest.responses.LoadCredentialsResponse; @@ -68,6 +76,8 @@ import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.LoadViewResponseParser; import org.apache.iceberg.rest.responses.OAuthTokenResponse; +import org.apache.iceberg.rest.responses.PlanTableScanResponse; +import org.apache.iceberg.rest.responses.PlanTableScanResponseParser; import org.apache.iceberg.util.JsonUtil; public class RESTSerializers { @@ -123,6 +133,19 @@ public static void registerAll(ObjectMapper mapper) { .addDeserializer(ConfigResponse.class, new ConfigResponseDeserializer<>()) .addSerializer(LoadTableResponse.class, new LoadTableResponseSerializer<>()) .addDeserializer(LoadTableResponse.class, new LoadTableResponseDeserializer<>()) + .addSerializer(PlanTableScanRequest.class, new PlanTableScanRequestSerializer<>()) + .addDeserializer(PlanTableScanRequest.class, new PlanTableScanRequestDeserializer<>()) + .addSerializer(FetchScanTasksRequest.class, new FetchScanTasksRequestSerializer<>()) + .addDeserializer(FetchScanTasksRequest.class, new FetchScanTasksRequestDeserializer<>()) + .addSerializer(PlanTableScanResponse.class, new PlanTableScanResponseSerializer<>()) + .addDeserializer(PlanTableScanResponse.class, new PlanTableScanResponseDeserializer<>()) + .addSerializer( + FetchPlanningResultResponse.class, new FetchPlanningResultResponseSerializer<>()) + .addDeserializer( + FetchPlanningResultResponse.class, new FetchPlanningResultResponseDeserializer<>()) + .addSerializer(FetchScanTasksResponse.class, new FetchScanTaskResponseSerializer<>()) + .addDeserializer(FetchScanTasksResponse.class, new FetchScanTaskResponseDeserializer<>()) + .addDeserializer(LoadTableResponse.class, new LoadTableResponseDeserializer<>()) .addSerializer(LoadCredentialsResponse.class, new LoadCredentialsResponseSerializer<>()) .addSerializer( ImmutableLoadCredentialsResponse.class, new LoadCredentialsResponseSerializer<>()) @@ -470,4 +493,94 @@ public T deserialize(JsonParser p, DeserializationContext context) throws IOExce return (T) LoadCredentialsResponseParser.fromJson(jsonNode); } } + + static class PlanTableScanRequestSerializer + extends JsonSerializer { + @Override + public void serialize(T request, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + PlanTableScanRequestParser.toJson(request, gen); + } + } + + static class PlanTableScanRequestDeserializer + extends JsonDeserializer { + @Override + public T deserialize(JsonParser p, DeserializationContext context) throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return (T) PlanTableScanRequestParser.fromJson(jsonNode); + } + } + + static class FetchScanTasksRequestSerializer + extends JsonSerializer { + @Override + public void serialize(T request, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + FetchScanTasksRequestParser.toJson(request, gen); + } + } + + static class FetchScanTasksRequestDeserializer + extends JsonDeserializer { + @Override + public T deserialize(JsonParser p, DeserializationContext context) throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return (T) FetchScanTasksRequestParser.fromJson(jsonNode); + } + } + + static class PlanTableScanResponseSerializer + extends JsonSerializer { + @Override + public void serialize(T response, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + PlanTableScanResponseParser.toJson(response, gen); + } + } + + static class PlanTableScanResponseDeserializer + extends JsonDeserializer { + @Override + public T deserialize(JsonParser p, DeserializationContext context) throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return (T) PlanTableScanResponseParser.fromJson(jsonNode); + } + } + + static class FetchPlanningResultResponseSerializer + extends JsonSerializer { + @Override + public void serialize(T response, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + FetchPlanningResultResponseParser.toJson(response, gen); + } + } + + static class FetchPlanningResultResponseDeserializer + extends JsonDeserializer { + @Override + public T deserialize(JsonParser p, DeserializationContext context) throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return (T) FetchPlanningResultResponseParser.fromJson(jsonNode); + } + } + + static class FetchScanTaskResponseSerializer + extends JsonSerializer { + @Override + public void serialize(T response, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + FetchScanTasksResponseParser.toJson(response, gen); + } + } + + static class FetchScanTaskResponseDeserializer + extends JsonDeserializer { + @Override + public T deserialize(JsonParser p, DeserializationContext context) throws IOException { + JsonNode jsonNode = p.getCodec().readTree(p); + return (T) FetchScanTasksResponseParser.fromJson(jsonNode); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java index 4e1c339d1fe9..50b545016028 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java @@ -36,10 +36,12 @@ import org.apache.iceberg.MetadataTableUtils; import org.apache.iceberg.MetadataUpdate; import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.RESTTable; import org.apache.iceberg.Schema; import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableOperations; import org.apache.iceberg.Transaction; import org.apache.iceberg.Transactions; import org.apache.iceberg.catalog.BaseViewSessionCatalog; @@ -108,6 +110,9 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog private static final String REST_SNAPSHOT_LOADING_MODE = "snapshot-loading-mode"; // for backwards compatibility with older REST servers where it can be assumed that a particular // server supports view endpoints but doesn't send the "endpoints" field in the ConfigResponse + public static final String REST_SERVER_PLANNING_ENABLED = "rest-server-planning-enabled"; + private static final String REST_TABLE_SCAN_PLANNING_PROPERTY = "table.rest-scan-planning"; + static final String VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported"; public static final String REST_PAGE_SIZE = "rest-page-size"; @@ -155,6 +160,7 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog private FileIO io = null; private MetricsReporter reporter = null; private boolean reportingViaRestEnabled; + private boolean restServerPlanningEnabled; private Integer pageSize = null; private CloseableGroup closeables = null; private Set endpoints; @@ -253,6 +259,9 @@ public void initialize(String name, Map unresolved) { this.reportingViaRestEnabled = PropertyUtil.propertyAsBoolean(mergedProps, REST_METRICS_REPORTING_ENABLED, true); + + this.restServerPlanningEnabled = + PropertyUtil.propertyAsBoolean(mergedProps, REST_SERVER_PLANNING_ENABLED, false); super.initialize(name, mergedProps); } @@ -453,6 +462,11 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) { trackFileIO(ops); + RESTTable restTable = tableSupportsRemoteScanPlanning(ops, finalIdentifier, tableClient); + if (restTable != null) { + return restTable; + } + BaseTable table = new BaseTable( ops, @@ -465,6 +479,26 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) { return table; } + private RESTTable tableSupportsRemoteScanPlanning( + TableOperations ops, TableIdentifier finalIdentifier, RESTClient restClient) { + if (ops.current().properties().containsKey(REST_TABLE_SCAN_PLANNING_PROPERTY)) { + boolean tableSupportsRemotePlanning = + ops.current().propertyAsBoolean(REST_TABLE_SCAN_PLANNING_PROPERTY, false); + if (tableSupportsRemotePlanning && restServerPlanningEnabled) { + return new RESTTable( + ops, + fullTableName(finalIdentifier), + metricsReporter(paths.metrics(finalIdentifier), restClient), + this.client, + paths.table(finalIdentifier), + Map::of, + finalIdentifier, + paths); + } + } + return null; + } + private void trackFileIO(RESTTableOperations ops) { if (io != ops.io()) { fileIOTracker.track(ops); @@ -532,6 +566,11 @@ public Table registerTable( trackFileIO(ops); + RESTTable restTable = tableSupportsRemoteScanPlanning(ops, ident, tableClient); + if (restTable != null) { + return restTable; + } + return new BaseTable( ops, fullTableName(ident), metricsReporter(paths.metrics(ident), tableClient)); } @@ -791,6 +830,11 @@ public Table create() { trackFileIO(ops); + RESTTable restTable = tableSupportsRemoteScanPlanning(ops, ident, tableClient); + if (restTable != null) { + return restTable; + } + return new BaseTable( ops, fullTableName(ident), metricsReporter(paths.metrics(ident), tableClient)); } diff --git a/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java b/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java index d5c11f6052f1..ca42f4a38844 100644 --- a/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java +++ b/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java @@ -130,4 +130,38 @@ public String view(TableIdentifier ident) { public String renameView() { return SLASH.join("v1", prefix, "views", "rename"); } + + public String planTableScan(TableIdentifier ident) { + return SLASH.join( + "v1", + prefix, + "namespaces", + RESTUtil.encodeNamespace(ident.namespace()), + "tables", + RESTUtil.encodeString(ident.name()), + "plan"); + } + + public String fetchPlanningResult(TableIdentifier ident, String planId) { + return SLASH.join( + "v1", + prefix, + "namespaces", + RESTUtil.encodeNamespace(ident.namespace()), + "tables", + RESTUtil.encodeString(ident.name()), + "plan", + planId); + } + + public String fetchScanTasks(TableIdentifier ident) { + return SLASH.join( + "v1", + prefix, + "namespaces", + RESTUtil.encodeNamespace(ident.namespace()), + "tables", + RESTUtil.encodeString(ident.name()), + "tasks"); + } } diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequest.java b/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequest.java new file mode 100644 index 000000000000..2293baac999e --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.rest.RESTRequest; + +public class FetchScanTasksRequest implements RESTRequest { + + private final String planTask; + + public FetchScanTasksRequest(String planTask) { + this.planTask = planTask; + validate(); + } + + public String planTask() { + return planTask; + } + + @Override + public void validate() { + Preconditions.checkArgument(planTask != null, "Invalid planTask: null"); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this).add("planTask", planTask).toString(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequestParser.java b/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequestParser.java new file mode 100644 index 000000000000..fa9af3da0c90 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequestParser.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +public class FetchScanTasksRequestParser { + private static final String PLAN_TASK = "plan-task"; + + private FetchScanTasksRequestParser() {} + + public static String toJson(FetchScanTasksRequest request) { + return toJson(request, false); + } + + public static String toJson(FetchScanTasksRequest request, boolean pretty) { + return JsonUtil.generate(gen -> toJson(request, gen), pretty); + } + + public static void toJson(FetchScanTasksRequest request, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != request, "Invalid request: fetchScanTasks request null"); + gen.writeStartObject(); + gen.writeStringField(PLAN_TASK, request.planTask()); + gen.writeEndObject(); + } + + public static FetchScanTasksRequest fromJson(String json) { + return JsonUtil.parse(json, FetchScanTasksRequestParser::fromJson); + } + + public static FetchScanTasksRequest fromJson(JsonNode json) { + Preconditions.checkArgument(null != json, "Invalid request: fetchScanTasks null"); + + String planTask = JsonUtil.getString(PLAN_TASK, json); + return new FetchScanTasksRequest(planTask); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java new file mode 100644 index 000000000000..d85ee324b0dd --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import java.util.List; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.rest.RESTRequest; + +public class PlanTableScanRequest implements RESTRequest { + private final Long snapshotId; + private final List select; + private final Expression filter; + private final boolean caseSensitive; + private final boolean useSnapshotSchema; + private final Long startSnapshotId; + private final Long endSnapshotId; + private final List statsFields; + + public Long snapshotId() { + return snapshotId; + } + + public List select() { + return select; + } + + public Expression filter() { + return filter; + } + + public boolean caseSensitive() { + return caseSensitive; + } + + public boolean useSnapshotSchema() { + return useSnapshotSchema; + } + + public Long startSnapshotId() { + return startSnapshotId; + } + + public Long endSnapshotId() { + return endSnapshotId; + } + + public List statsFields() { + return statsFields; + } + + private PlanTableScanRequest( + Long snapshotId, + List select, + Expression filter, + boolean caseSensitive, + boolean useSnapshotSchema, + Long startSnapshotId, + Long endSnapshotId, + List statsFields) { + this.snapshotId = snapshotId; + this.select = select; + this.filter = filter; + this.caseSensitive = caseSensitive; + this.useSnapshotSchema = useSnapshotSchema; + this.startSnapshotId = startSnapshotId; + this.endSnapshotId = endSnapshotId; + this.statsFields = statsFields; + } + + @Override + public void validate() { + if (snapshotId != null || startSnapshotId != null || endSnapshotId != null) { + Preconditions.checkArgument( + snapshotId != null ^ (startSnapshotId != null && endSnapshotId != null), + "Either snapshotId must be provided or both startSnapshotId and endSnapshotId must be provided"); + } + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("snapshotId", snapshotId) + .add("caseSensitive", caseSensitive) + .add("useSnapshotSchema", useSnapshotSchema) + .add("startSnapshotId", startSnapshotId) + .add("endSnapshotId", endSnapshotId) + .toString(); + } + + public static class Builder { + private Long snapshotId; + private List select; + private Expression filter; + private boolean caseSensitive = true; + private boolean useSnapshotSchema = false; + private Long startSnapshotId; + private Long endSnapshotId; + private List statsFields; + + public Builder() {} + + public Builder withSnapshotId(Long withSnapshotId) { + this.snapshotId = withSnapshotId; + return this; + } + + public Builder withSelect(List projection) { + this.select = projection; + return this; + } + + public Builder withFilter(Expression expression) { + this.filter = expression; + return this; + } + + public Builder withCaseSensitive(boolean value) { + this.caseSensitive = value; + return this; + } + + public Builder withUseSnapshotSchema(boolean snapshotSchema) { + this.useSnapshotSchema = snapshotSchema; + return this; + } + + public Builder withStartSnapshotId(Long startingSnapshotId) { + this.startSnapshotId = startingSnapshotId; + return this; + } + + public Builder withEndSnapshotId(Long endingSnapshotId) { + this.endSnapshotId = endingSnapshotId; + return this; + } + + public Builder withStatsFields(List fields) { + this.statsFields = fields; + return this; + } + + public PlanTableScanRequest build() { + return new PlanTableScanRequest( + snapshotId, + select, + filter, + caseSensitive, + useSnapshotSchema, + startSnapshotId, + endSnapshotId, + statsFields); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequestParser.java b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequestParser.java new file mode 100644 index 000000000000..e840841fcfcf --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequestParser.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.List; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.ExpressionParser; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +public class PlanTableScanRequestParser { + private static final String SNAPSHOT_ID = "snapshot-id"; + private static final String SELECT = "select"; + private static final String FILTER = "filter"; + private static final String CASE_SENSITIVE = "case-sensitive"; + private static final String USE_SNAPSHOT_SCHEMA = "use-snapshot-schema"; + private static final String START_SNAPSHOT_ID = "start-snapshot-id"; + private static final String END_SNAPSHOT_ID = "end-snapshot-id"; + private static final String STATS_FIELDS = "stats-fields"; + + private PlanTableScanRequestParser() {} + + public static String toJson(PlanTableScanRequest request) { + return toJson(request, false); + } + + public static String toJson(PlanTableScanRequest request, boolean pretty) { + return JsonUtil.generate(gen -> toJson(request, gen), pretty); + } + + @SuppressWarnings("checkstyle:CyclomaticComplexity") + public static void toJson(PlanTableScanRequest request, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != request, "Invalid request: planTableScanRequest null"); + + if (request.snapshotId() != null + || request.startSnapshotId() != null + || request.endSnapshotId() != null) { + Preconditions.checkArgument( + request.snapshotId() != null + ^ (request.startSnapshotId() != null && request.endSnapshotId() != null), + "Either snapshotId must be provided or both startSnapshotId and endSnapshotId must be provided"); + } + + gen.writeStartObject(); + if (request.snapshotId() != null) { + gen.writeNumberField(SNAPSHOT_ID, request.snapshotId()); + } + + if (request.startSnapshotId() != null) { + gen.writeNumberField(START_SNAPSHOT_ID, request.startSnapshotId()); + } + + if (request.endSnapshotId() != null) { + gen.writeNumberField(END_SNAPSHOT_ID, request.endSnapshotId()); + } + + if (request.select() != null && !request.select().isEmpty()) { + JsonUtil.writeStringArray(SELECT, request.select(), gen); + } + + if (request.filter() != null) { + gen.writeStringField(FILTER, ExpressionParser.toJson(request.filter())); + } + + gen.writeBooleanField(CASE_SENSITIVE, request.caseSensitive()); + gen.writeBooleanField(USE_SNAPSHOT_SCHEMA, request.useSnapshotSchema()); + + if (request.statsFields() != null && !request.statsFields().isEmpty()) { + JsonUtil.writeStringArray(STATS_FIELDS, request.statsFields(), gen); + } + + gen.writeEndObject(); + } + + public static PlanTableScanRequest fromJson(String json) { + return JsonUtil.parse(json, PlanTableScanRequestParser::fromJson); + } + + public static PlanTableScanRequest fromJson(JsonNode json) { + Preconditions.checkArgument(null != json, "Invalid request: planTableScanRequest null"); + + Long snapshotId = JsonUtil.getLongOrNull(SNAPSHOT_ID, json); + Long startSnapshotId = JsonUtil.getLongOrNull(START_SNAPSHOT_ID, json); + Long endSnapshotId = JsonUtil.getLongOrNull(END_SNAPSHOT_ID, json); + + List select = JsonUtil.getStringListOrNull(SELECT, json); + + Expression filter = null; + if (json.has(FILTER)) { + filter = ExpressionParser.fromJson(json.get(FILTER).textValue()); + } + + boolean caseSensitive = true; + if (json.has(CASE_SENSITIVE)) { + caseSensitive = JsonUtil.getBool(CASE_SENSITIVE, json); + } + + boolean useSnapshotSchema = false; + if (json.has(USE_SNAPSHOT_SCHEMA)) { + useSnapshotSchema = JsonUtil.getBool(USE_SNAPSHOT_SCHEMA, json); + } + + List statsFields = JsonUtil.getStringListOrNull(STATS_FIELDS, json); + + return new PlanTableScanRequest.Builder() + .withSnapshotId(snapshotId) + .withSelect(select) + .withFilter(filter) + .withCaseSensitive(caseSensitive) + .withUseSnapshotSchema(useSnapshotSchema) + .withStartSnapshotId(startSnapshotId) + .withEndSnapshotId(endSnapshotId) + .withStatsFields(statsFields) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java new file mode 100644 index 000000000000..29af32a03d38 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.rest.PlanStatus; + +public class FetchPlanningResultResponse implements TableScanResponse { + private final PlanStatus planStatus; + private final List planTasks; + private final List fileScanTasks; + private final List deleteFiles; + private final Map specsById; + + private FetchPlanningResultResponse( + PlanStatus planStatus, + List planTasks, + List fileScanTasks, + List deleteFiles, + Map specsById) { + this.planStatus = planStatus; + this.planTasks = planTasks; + this.fileScanTasks = fileScanTasks; + this.deleteFiles = deleteFiles; + this.specsById = specsById; + validate(); + } + + public PlanStatus planStatus() { + return planStatus; + } + + public List planTasks() { + return planTasks; + } + + public List fileScanTasks() { + return fileScanTasks; + } + + public List deleteFiles() { + return deleteFiles; + } + + public Map specsById() { + return specsById; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public void validate() { + Preconditions.checkArgument(planStatus() != null, "Invalid status: null"); + Preconditions.checkArgument( + planStatus() == PlanStatus.COMPLETED || (planTasks() == null && fileScanTasks() == null), + "Invalid response: tasks can only be returned in a 'completed' status"); + if (fileScanTasks() == null || fileScanTasks.isEmpty()) { + Preconditions.checkArgument( + (deleteFiles() == null || deleteFiles().isEmpty()), + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + } + } + + public static class Builder { + private Builder() {} + + private PlanStatus planStatus; + private List planTasks; + private List fileScanTasks; + private List deleteFiles; + private Map specsById; + + public Builder withPlanStatus(PlanStatus status) { + this.planStatus = status; + return this; + } + + public Builder withPlanTasks(List tasks) { + this.planTasks = tasks; + return this; + } + + public Builder withFileScanTasks(List tasks) { + this.fileScanTasks = tasks; + return this; + } + + public Builder withDeleteFiles(List deletes) { + this.deleteFiles = deletes; + return this; + } + + public Builder withSpecsById(Map specs) { + this.specsById = specs; + return this; + } + + public FetchPlanningResultResponse build() { + return new FetchPlanningResultResponse( + planStatus, planTasks, fileScanTasks, deleteFiles, specsById); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java new file mode 100644 index 000000000000..a5ac0cdcc212 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.List; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.rest.PlanStatus; +import org.apache.iceberg.util.JsonUtil; + +public class FetchPlanningResultResponseParser { + private static final String PLAN_STATUS = "plan-status"; + private static final String PLAN_TASKS = "plan-tasks"; + + private FetchPlanningResultResponseParser() {} + + public static String toJson(FetchPlanningResultResponse response) { + return toJson(response, false); + } + + public static String toJson(FetchPlanningResultResponse response, boolean pretty) { + return JsonUtil.generate(gen -> toJson(response, gen), pretty); + } + + public static void toJson(FetchPlanningResultResponse response, JsonGenerator gen) + throws IOException { + Preconditions.checkArgument( + null != response, "Invalid response: fetchPanningResultResponse null"); + Preconditions.checkArgument( + response.specsById() != null + || (response.fileScanTasks() == null || response.fileScanTasks().isEmpty()), + "Cannot serialize fileScanTasks in fetchingPlanningResultResponse without specsById"); + gen.writeStartObject(); + gen.writeStringField(PLAN_STATUS, response.planStatus().status()); + if (response.planTasks() != null) { + JsonUtil.writeStringArray(PLAN_TASKS, response.planTasks(), gen); + } + + TableScanResponseParser.serializeScanTasks( + response.fileScanTasks(), response.deleteFiles(), response.specsById(), gen); + gen.writeEndObject(); + } + + public static FetchPlanningResultResponse fromJson(String json) { + Preconditions.checkArgument(json != null, "Invalid response: fetchPanningResultResponse null"); + return JsonUtil.parse(json, FetchPlanningResultResponseParser::fromJson); + } + + public static FetchPlanningResultResponse fromJson(JsonNode json) { + Preconditions.checkArgument( + json != null && !json.isEmpty(), + "Invalid response: fetchPanningResultResponse null or empty"); + + PlanStatus planStatus = PlanStatus.fromName(JsonUtil.getString(PLAN_STATUS, json)); + List planTasks = JsonUtil.getStringListOrNull(PLAN_TASKS, json); + List deleteFiles = TableScanResponseParser.parseDeleteFiles(json); + List fileScanTasks = + TableScanResponseParser.parseFileScanTasks(json, deleteFiles); + return FetchPlanningResultResponse.builder() + .withPlanStatus(planStatus) + .withPlanTasks(planTasks) + .withFileScanTasks(fileScanTasks) + .withDeleteFiles(deleteFiles) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java new file mode 100644 index 000000000000..fd512ab3b499 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public class FetchScanTasksResponse implements TableScanResponse { + private final List planTasks; + private final List fileScanTasks; + private final List deleteFiles; + private final Map specsById; + + private FetchScanTasksResponse( + List planTasks, + List fileScanTasks, + List deleteFiles, + Map specsById) { + this.planTasks = planTasks; + this.fileScanTasks = fileScanTasks; + this.deleteFiles = deleteFiles; + this.specsById = specsById; + validate(); + } + + public List planTasks() { + return planTasks; + } + + public List fileScanTasks() { + return fileScanTasks; + } + + public List deleteFiles() { + return deleteFiles; + } + + public Map specsById() { + return specsById; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public void validate() { + if (fileScanTasks() == null || fileScanTasks.isEmpty()) { + Preconditions.checkArgument( + (deleteFiles() == null || deleteFiles().isEmpty()), + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + } + + Preconditions.checkArgument( + planTasks() != null || fileScanTasks() != null, + "Invalid response: planTasks and fileScanTask cannot both be null"); + } + + public static class Builder { + private Builder() {} + + private List planTasks; + private List fileScanTasks; + private List deleteFiles; + private Map specsById; + + public Builder withPlanTasks(List tasks) { + this.planTasks = tasks; + return this; + } + + public Builder withFileScanTasks(List tasks) { + this.fileScanTasks = tasks; + return this; + } + + public Builder withDeleteFiles(List deletes) { + this.deleteFiles = deletes; + return this; + } + + public Builder withSpecsById(Map specs) { + this.specsById = specs; + return this; + } + + public FetchScanTasksResponse build() { + return new FetchScanTasksResponse(planTasks, fileScanTasks, deleteFiles, specsById); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java new file mode 100644 index 000000000000..eefd165c4960 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.List; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +public class FetchScanTasksResponseParser { + private static final String PLAN_TASKS = "plan-tasks"; + + private FetchScanTasksResponseParser() {} + + public static String toJson(FetchScanTasksResponse response) { + return toJson(response, false); + } + + public static String toJson(FetchScanTasksResponse response, boolean pretty) { + return JsonUtil.generate(gen -> toJson(response, gen), pretty); + } + + public static void toJson(FetchScanTasksResponse response, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(response != null, "Invalid response: fetchScanTasksResponse null"); + Preconditions.checkArgument( + response.specsById() != null + || (response.fileScanTasks() == null || response.fileScanTasks().isEmpty()), + "Cannot serialize fileScanTasks in fetchScanTasksResponse without specsById"); + gen.writeStartObject(); + if (response.planTasks() != null) { + JsonUtil.writeStringArray(PLAN_TASKS, response.planTasks(), gen); + } + + TableScanResponseParser.serializeScanTasks( + response.fileScanTasks(), response.deleteFiles(), response.specsById(), gen); + gen.writeEndObject(); + } + + public static FetchScanTasksResponse fromJson(String json) { + Preconditions.checkArgument(json != null, "Cannot parse fetchScanTasks response from null"); + return JsonUtil.parse(json, FetchScanTasksResponseParser::fromJson); + } + + public static FetchScanTasksResponse fromJson(JsonNode json) { + Preconditions.checkArgument( + json != null && !json.isEmpty(), "Invalid response: fetchScanTasksResponse null"); + List planTasks = JsonUtil.getStringListOrNull(PLAN_TASKS, json); + List deleteFiles = TableScanResponseParser.parseDeleteFiles(json); + List fileScanTasks = + TableScanResponseParser.parseFileScanTasks(json, deleteFiles); + return FetchScanTasksResponse.builder() + .withPlanTasks(planTasks) + .withFileScanTasks(fileScanTasks) + .withDeleteFiles(deleteFiles) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java new file mode 100644 index 000000000000..dbe0efaf1de8 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.rest.PlanStatus; + +public class PlanTableScanResponse implements TableScanResponse { + private final PlanStatus planStatus; + private final String planId; + private final List planTasks; + private final List fileScanTasks; + private final List deleteFiles; + private final Map specsById; + + private PlanTableScanResponse( + PlanStatus planStatus, + String planId, + List planTasks, + List fileScanTasks, + List deleteFiles, + Map specsById) { + this.planStatus = planStatus; + this.planId = planId; + this.planTasks = planTasks; + this.fileScanTasks = fileScanTasks; + this.deleteFiles = deleteFiles; + this.specsById = specsById; + validate(); + } + + public PlanStatus planStatus() { + return planStatus; + } + + public String planId() { + return planId; + } + + public List planTasks() { + return planTasks; + } + + public List fileScanTasks() { + return fileScanTasks; + } + + public List deleteFiles() { + return deleteFiles; + } + + public Map specsById() { + return specsById; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("planStatus", planStatus) + .add("planId", planId) + .toString(); + } + + @Override + public void validate() { + Preconditions.checkArgument( + planStatus() != null, "Invalid response: plan status must be defined"); + Preconditions.checkArgument( + planStatus() != PlanStatus.SUBMITTED || planId() != null, + "Invalid response: plan id should be defined when status is 'submitted'"); + Preconditions.checkArgument( + planStatus() != PlanStatus.CANCELLED, + "Invalid response: 'cancelled' is not a valid status for planTableScan"); + Preconditions.checkArgument( + planStatus() == PlanStatus.COMPLETED || (planTasks() == null && fileScanTasks() == null), + "Invalid response: tasks can only be returned in a 'completed' status"); + Preconditions.checkArgument( + planStatus() == PlanStatus.SUBMITTED || planId() == null, + "Invalid response: plan id can only be returned in a 'submitted' status"); + if (fileScanTasks() == null || fileScanTasks.isEmpty()) { + Preconditions.checkArgument( + (deleteFiles() == null || deleteFiles().isEmpty()), + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + } + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private Builder() {} + + private PlanStatus planStatus; + private String planId; + private List planTasks; + private List fileScanTasks; + private List deleteFiles; + private Map specsById; + + public Builder withPlanStatus(PlanStatus status) { + this.planStatus = status; + return this; + } + + public Builder withPlanId(String id) { + this.planId = id; + return this; + } + + public Builder withPlanTasks(List tasks) { + this.planTasks = tasks; + return this; + } + + public Builder withFileScanTasks(List tasks) { + this.fileScanTasks = tasks; + return this; + } + + public Builder withDeleteFiles(List deletes) { + this.deleteFiles = deletes; + return this; + } + + public Builder withSpecsById(Map specs) { + this.specsById = specs; + return this; + } + + public PlanTableScanResponse build() { + return new PlanTableScanResponse( + planStatus, planId, planTasks, fileScanTasks, deleteFiles, specsById); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java new file mode 100644 index 000000000000..25d8f11d9ac4 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.List; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.rest.PlanStatus; +import org.apache.iceberg.util.JsonUtil; + +public class PlanTableScanResponseParser { + private static final String PLAN_STATUS = "plan-status"; + private static final String PLAN_ID = "plan-id"; + private static final String PLAN_TASKS = "plan-tasks"; + + private PlanTableScanResponseParser() {} + + public static String toJson(PlanTableScanResponse response) { + return toJson(response, false); + } + + public static String toJson(PlanTableScanResponse response, boolean pretty) { + return JsonUtil.generate(gen -> toJson(response, gen), pretty); + } + + public static void toJson(PlanTableScanResponse response, JsonGenerator gen) throws IOException { + Preconditions.checkArgument(null != response, "Invalid response: planTableScanResponse null"); + Preconditions.checkArgument( + response.planStatus() != null, "Invalid response: status can not be null"); + Preconditions.checkArgument( + response.specsById() != null, "Cannot serialize planTableScanResponse without specsById"); + + gen.writeStartObject(); + gen.writeStringField(PLAN_STATUS, response.planStatus().status()); + + if (response.planId() != null) { + gen.writeStringField(PLAN_ID, response.planId()); + } + if (response.planTasks() != null) { + JsonUtil.writeStringArray(PLAN_TASKS, response.planTasks(), gen); + } + + TableScanResponseParser.serializeScanTasks( + response.fileScanTasks(), response.deleteFiles(), response.specsById(), gen); + + gen.writeEndObject(); + } + + public static PlanTableScanResponse fromJson(String json) { + Preconditions.checkArgument( + json != null, "Cannot parse planTableScan response from empty or null object"); + return JsonUtil.parse(json, PlanTableScanResponseParser::fromJson); + } + + public static PlanTableScanResponse fromJson(JsonNode json) { + Preconditions.checkArgument( + json != null && !json.isEmpty(), + "Cannot parse planTableScan response from empty or null object"); + + PlanStatus planStatus = PlanStatus.fromName(JsonUtil.getString(PLAN_STATUS, json)); + String planId = JsonUtil.getStringOrNull(PLAN_ID, json); + List planTasks = JsonUtil.getStringListOrNull(PLAN_TASKS, json); + List deleteFiles = TableScanResponseParser.parseDeleteFiles(json); + List fileScanTasks = + TableScanResponseParser.parseFileScanTasks(json, deleteFiles); + + return PlanTableScanResponse.builder() + .withPlanId(planId) + .withPlanStatus(planStatus) + .withPlanTasks(planTasks) + .withFileScanTasks(fileScanTasks) + .withDeleteFiles(deleteFiles) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponse.java new file mode 100644 index 000000000000..4213b50881b9 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponse.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import org.apache.iceberg.rest.RESTResponse; + +public interface TableScanResponse extends RESTResponse {} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponseParser.java new file mode 100644 index 000000000000..0e111755eaa9 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponseParser.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.ContentFileParser; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.RESTFileScanTaskParser; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.util.JsonUtil; + +class TableScanResponseParser { + + private TableScanResponseParser() {} + + static final String FILE_SCAN_TASKS = "file-scan-tasks"; + static final String DELETE_FILES = "delete-files"; + + public static List parseDeleteFiles(JsonNode node) { + if (node.has(DELETE_FILES)) { + JsonNode deleteFiles = JsonUtil.get(DELETE_FILES, node); + Preconditions.checkArgument( + deleteFiles.isArray(), "Cannot parse delete files from non-array: %s", deleteFiles); + ImmutableList.Builder deleteFilesBuilder = ImmutableList.builder(); + for (JsonNode deleteFileNode : deleteFiles) { + DeleteFile deleteFile = + (DeleteFile) ContentFileParser.unboundContentFileFromJson(deleteFileNode); + deleteFilesBuilder.add(deleteFile); + } + return deleteFilesBuilder.build(); + } + + return null; + } + + public static List parseFileScanTasks(JsonNode node, List deleteFiles) { + if (node.has(FILE_SCAN_TASKS)) { + JsonNode scanTasks = JsonUtil.get(FILE_SCAN_TASKS, node); + Preconditions.checkArgument( + scanTasks.isArray(), "Cannot parse file scan tasks from non-array: %s", scanTasks); + List fileScanTaskList = Lists.newArrayList(); + for (JsonNode fileScanTaskNode : scanTasks) { + FileScanTask fileScanTask = RESTFileScanTaskParser.fromJson(fileScanTaskNode, deleteFiles); + fileScanTaskList.add(fileScanTask); + } + + return fileScanTaskList; + } + + return null; + } + + public static void serializeScanTasks( + List fileScanTasks, + List deleteFiles, + Map specsById, + JsonGenerator gen) + throws IOException { + Map deleteFilePathToIndex = Maps.newHashMap(); + if (deleteFiles != null) { + Preconditions.checkArgument( + specsById != null, "Cannot serialize response without specs by ID defined"); + gen.writeArrayFieldStart(DELETE_FILES); + for (int i = 0; i < deleteFiles.size(); i++) { + DeleteFile deleteFile = deleteFiles.get(i); + deleteFilePathToIndex.put(String.valueOf(deleteFile.path()), i); + ContentFileParser.unboundContentFileToJson( + deleteFiles.get(i), specsById.get(deleteFile.specId()), gen); + } + gen.writeEndArray(); + } + + if (fileScanTasks != null) { + gen.writeArrayFieldStart(FILE_SCAN_TASKS); + Set deleteFileReferences = Sets.newHashSet(); + for (FileScanTask fileScanTask : fileScanTasks) { + if (deleteFiles != null) { + for (DeleteFile taskDelete : fileScanTask.deletes()) { + deleteFileReferences.add(deleteFilePathToIndex.get(taskDelete.path().toString())); + } + } + + PartitionSpec spec = specsById.get(fileScanTask.file().specId()); + Preconditions.checkArgument( + spec != null, + "Cannot serialize scan task with unknown spec %s", + fileScanTask.file().specId()); + RESTFileScanTaskParser.toJson(fileScanTask, deleteFileReferences, spec, gen); + } + gen.writeEndArray(); + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestBase.java b/core/src/test/java/org/apache/iceberg/TestBase.java index 8930be7a36ba..4e9d66475940 100644 --- a/core/src/test/java/org/apache/iceberg/TestBase.java +++ b/core/src/test/java/org/apache/iceberg/TestBase.java @@ -31,6 +31,7 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; import org.apache.iceberg.avro.AvroSchemaUtil; @@ -65,7 +66,9 @@ public class TestBase { public static final PartitionSpec SPEC = PartitionSpec.builderFor(SCHEMA).bucket("data", BUCKETS_NUMBER).build(); - static final DataFile FILE_A = + public static final Map PARTITION_SPECS_BY_ID = Map.of(0, SPEC); + + public static final DataFile FILE_A = DataFiles.builder(SPEC) .withPath("/path/to/data-a.parquet") .withFileSizeInBytes(10) @@ -79,7 +82,7 @@ public class TestBase { .withPartitionPath("data_bucket=0") // easy way to set partition data for now .withRecordCount(1) .build(); - static final DeleteFile FILE_A_DELETES = + public static final DeleteFile FILE_A_DELETES = FileMetadata.deleteFileBuilder(SPEC) .ofPositionDeletes() .withPath("/path/to/data-a-deletes.parquet") @@ -99,7 +102,7 @@ public class TestBase { .withContentSizeInBytes(6) .build(); // Equality delete files. - static final DeleteFile FILE_A2_DELETES = + public static final DeleteFile FILE_A2_DELETES = FileMetadata.deleteFileBuilder(SPEC) .ofEqualityDeletes(1) .withPath("/path/to/data-a2-deletes.parquet") diff --git a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java index c2fd24856fb2..fd9fc306b168 100644 --- a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java +++ b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java @@ -43,6 +43,7 @@ import org.apache.iceberg.FilesTable; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.HistoryEntry; +import org.apache.iceberg.PartitionData; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.ReachableFileUtil; import org.apache.iceberg.ReplaceSortOrder; @@ -156,6 +157,16 @@ public abstract class CatalogTests { .withRecordCount(2) // needs at least one record or else metrics will filter it out .build(); + protected static final Namespace REST_DB = Namespace.of("restDB"); + public static final TableIdentifier TABLE_COMPLETED_WITH_FILE_SCAN_TASK = + TableIdentifier.of(REST_DB, "table_completed_with_file_scan_task"); + public static final TableIdentifier TABLE_SUBMITTED_WITH_FILE_SCAN_TASK = + TableIdentifier.of(REST_DB, "table_submitted_with_file_scan_task"); + public static final TableIdentifier TABLE_COMPLETED_WITH_PLAN_TASK = + TableIdentifier.of(REST_DB, "table_completed_with_plan_task"); + public static final TableIdentifier TABLE_COMPLETED_WITH_NESTED_PLAN_TASK = + TableIdentifier.of(REST_DB, "table_completed_with_nested_plan_task"); + protected abstract C catalog(); protected abstract C initCatalog(String catalogName, Map additionalProperties); @@ -3325,4 +3336,34 @@ private List concat(List starting, Namespace... additional namespaces.addAll(Arrays.asList(additional)); return namespaces; } + + public void assertBoundFileScanTasks(Table table, PartitionSpec partitionSpec) { + PartitionData partitionData = new PartitionData(partitionSpec.partitionType()); + try (CloseableIterable tasks = table.newScan().planFiles()) { + Streams.stream(tasks) + .forEach( + task -> { + // assert file scan task spec being bound + assertThat(task.spec().equals(partitionSpec)); + // assert data file spec being bound + assertThat(task.file().partition().equals(partitionData)); + // assert all delete files in task are bound + task.deletes() + .forEach( + deleteFile -> assertThat(deleteFile.partition().equals(partitionData))); + }); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public void assertBoundFiles(Table table, DataFile dataFile) { + try (CloseableIterable tasks = table.newScan().planFiles()) { + Streams.stream(tasks) + .map(FileScanTask::file) + .forEach(file -> assertThat(file.partition()).isEqualTo(dataFile.partition())); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } } diff --git a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java index 0f7e76b81e49..314d80387c41 100644 --- a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java +++ b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java @@ -18,17 +18,25 @@ */ package org.apache.iceberg.rest; +import static org.apache.iceberg.catalog.CatalogTests.TABLE_COMPLETED_WITH_FILE_SCAN_TASK; +import static org.apache.iceberg.catalog.CatalogTests.TABLE_COMPLETED_WITH_NESTED_PLAN_TASK; +import static org.apache.iceberg.catalog.CatalogTests.TABLE_COMPLETED_WITH_PLAN_TASK; +import static org.apache.iceberg.catalog.CatalogTests.TABLE_SUBMITTED_WITH_FILE_SCAN_TASK; + import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; import java.net.URI; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.function.Consumer; import java.util.stream.Collectors; import org.apache.iceberg.BaseTable; import org.apache.iceberg.BaseTransaction; +import org.apache.iceberg.FileScanTask; import org.apache.iceberg.Table; +import org.apache.iceberg.TableScan; import org.apache.iceberg.Transaction; import org.apache.iceberg.Transactions; import org.apache.iceberg.catalog.Catalog; @@ -49,15 +57,19 @@ import org.apache.iceberg.exceptions.RESTException; import org.apache.iceberg.exceptions.UnprocessableEntityException; import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.relocated.com.google.common.base.Splitter; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.rest.HTTPRequest.HTTPMethod; import org.apache.iceberg.rest.auth.AuthSession; import org.apache.iceberg.rest.requests.CommitTransactionRequest; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; import org.apache.iceberg.rest.requests.CreateTableRequest; import org.apache.iceberg.rest.requests.CreateViewRequest; +import org.apache.iceberg.rest.requests.FetchScanTasksRequest; +import org.apache.iceberg.rest.requests.PlanTableScanRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; import org.apache.iceberg.rest.requests.ReportMetricsRequest; @@ -66,12 +78,15 @@ import org.apache.iceberg.rest.responses.ConfigResponse; import org.apache.iceberg.rest.responses.CreateNamespaceResponse; import org.apache.iceberg.rest.responses.ErrorResponse; +import org.apache.iceberg.rest.responses.FetchPlanningResultResponse; +import org.apache.iceberg.rest.responses.FetchScanTasksResponse; import org.apache.iceberg.rest.responses.GetNamespaceResponse; import org.apache.iceberg.rest.responses.ListNamespacesResponse; import org.apache.iceberg.rest.responses.ListTablesResponse; import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.OAuthTokenResponse; +import org.apache.iceberg.rest.responses.PlanTableScanResponse; import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; import org.apache.iceberg.util.Pair; import org.apache.iceberg.util.PropertyUtil; @@ -101,6 +116,8 @@ public class RESTCatalogAdapter extends BaseHTTPClient { private final Catalog catalog; private final SupportsNamespaces asNamespaceCatalog; private final ViewCatalog asViewCatalog; + private Map> planToFileScanTasks; + private Map planToPlanTasks; private AuthSession authSession = AuthSession.EMPTY; @@ -109,6 +126,8 @@ public RESTCatalogAdapter(Catalog catalog) { this.asNamespaceCatalog = catalog instanceof SupportsNamespaces ? (SupportsNamespaces) catalog : null; this.asViewCatalog = catalog instanceof ViewCatalog ? (ViewCatalog) catalog : null; + this.planToFileScanTasks = Maps.newHashMap(); + this.planToPlanTasks = Maps.newHashMap(); } enum Route { @@ -139,6 +158,21 @@ enum Route { LoadTableResponse.class), TABLE_EXISTS(HTTPMethod.HEAD, ResourcePaths.V1_TABLE), LOAD_TABLE(HTTPMethod.GET, ResourcePaths.V1_TABLE, null, LoadTableResponse.class), + PLAN_TABLE_SCAN( + HTTPMethod.POST, + "/v1/{prefix}/namespaces/{namespace}/tables/{table}/plan", + PlanTableScanRequest.class, + PlanTableScanResponse.class), + FETCH_PLANNING_RESULT( + HTTPMethod.GET, + "/v1/{prefix}/namespaces/{namespace}/tables/{table}/plan/{plan-id}", + null, + FetchPlanningResultResponse.class), + FETCH_SCAN_TASKS( + HTTPMethod.POST, + "/v1/{prefix}/namespaces/{namespace}/tables/{table}/tasks", + FetchScanTasksRequest.class, + FetchScanTasksResponse.class), REGISTER_TABLE( HTTPMethod.POST, ResourcePaths.V1_TABLE_REGISTER, @@ -533,6 +567,154 @@ public T handleRequest( break; } + case PLAN_TABLE_SCAN: + { + TableIdentifier ident = tableIdentFromPathVars(vars); + PlanTableScanRequest request = castRequest(PlanTableScanRequest.class, body); + Table table = catalog.loadTable(ident); + TableScan tableScan = table.newScan(); + + if (request.snapshotId() != null) { + tableScan.useSnapshot(request.snapshotId()); + } + if (request.select() != null) { + tableScan.select(request.select()); + } + if (request.filter() != null) { + tableScan.filter(request.filter()); + } + if (request.statsFields() != null) { + tableScan.includeColumnStats(request.statsFields()); + } + tableScan.caseSensitive(request.caseSensitive()); + + List fileScanTasks = Lists.newArrayList(); + CloseableIterable returnedTasks = tableScan.planFiles(); + returnedTasks.forEach(task -> fileScanTasks.add(task)); + + if (ident.equals(TABLE_COMPLETED_WITH_FILE_SCAN_TASK)) { + return castResponse( + responseType, + PlanTableScanResponse.builder() + .withPlanStatus(PlanStatus.COMPLETED) + .withFileScanTasks(fileScanTasks) + .withSpecsById(table.specs()) + .build()); + } + + if (ident.equals(TABLE_SUBMITTED_WITH_FILE_SCAN_TASK)) { + // this is the case where we return a plan-id, then call fetchPlanningResult to get the + // tasks at a later point + String planId = "plan-id-" + UUID.randomUUID(); + planToFileScanTasks.put(planId, fileScanTasks); + return castResponse( + responseType, + PlanTableScanResponse.builder() + .withPlanId(planId) + .withPlanStatus(PlanStatus.SUBMITTED) + .withSpecsById(table.specs()) + .build()); + } + + if (ident.equals(TABLE_COMPLETED_WITH_PLAN_TASK)) { + // this is the case where we return a list of plan-task, and then call fetchScanTasks + // for each + List planTasks = + List.of("plan-task-" + UUID.randomUUID(), "plan-task-" + UUID.randomUUID()); + planTasks.forEach(task -> planToFileScanTasks.put(task, fileScanTasks)); + return castResponse( + responseType, + PlanTableScanResponse.builder() + .withPlanStatus(PlanStatus.COMPLETED) + .withPlanTasks(planTasks) + .withSpecsById(table.specs()) + .build()); + } + + if (ident.equals(TABLE_COMPLETED_WITH_NESTED_PLAN_TASK)) { + // this is the case where our plan tasks, can return additional plan tasks, and those + // can return file scan tasks. + List outerPlanTasks = + List.of( + "outer-plan-task-" + UUID.randomUUID(), "outer-plan-task-" + UUID.randomUUID()); + List innerPlanTasks = + List.of( + "inner-plan-task-" + UUID.randomUUID(), "inner-plan-task-" + UUID.randomUUID()); + + for (int i = 0; i < outerPlanTasks.size(); i++) { + planToPlanTasks.put(outerPlanTasks.get(i), innerPlanTasks.get(i)); + planToFileScanTasks.put(innerPlanTasks.get(i), fileScanTasks); + } + + return castResponse( + responseType, + PlanTableScanResponse.builder() + .withPlanStatus(PlanStatus.COMPLETED) + .withPlanTasks(outerPlanTasks) + .withSpecsById(table.specs()) + .build()); + } + break; + } + + case FETCH_PLANNING_RESULT: + { + TableIdentifier ident = tableIdentFromPathVars(vars); + Table table = catalog.loadTable(ident); + if (ident.equals(TABLE_SUBMITTED_WITH_FILE_SCAN_TASK)) { + String planId = planIDFromPathVars(vars); + return castResponse( + responseType, + FetchPlanningResultResponse.builder() + .withPlanStatus(PlanStatus.fromName("completed")) + .withFileScanTasks(planToFileScanTasks.get(planId)) + .withSpecsById(table.specs()) + .build()); + } + break; + } + + case FETCH_SCAN_TASKS: + { + TableIdentifier ident = tableIdentFromPathVars(vars); + Table table = catalog.loadTable(ident); + FetchScanTasksRequest request = castRequest(FetchScanTasksRequest.class, body); + if (ident.equals(TABLE_COMPLETED_WITH_PLAN_TASK)) { + return castResponse( + responseType, + FetchScanTasksResponse.builder() + .withFileScanTasks(planToFileScanTasks.get(request.planTask())) + .withSpecsById(table.specs()) + .build()); + } + + if (ident.equals(TABLE_COMPLETED_WITH_NESTED_PLAN_TASK)) { + // this is the case where we return another round of nested plan tasks + if (planToPlanTasks.containsKey(request.planTask())) { + String innerPlanTask = planToPlanTasks.remove(request.planTask()); + return castResponse( + responseType, + FetchScanTasksResponse.builder() + .withPlanTasks(List.of(innerPlanTask)) + .withSpecsById(table.specs()) + .build()); + } + + if (planToFileScanTasks.containsKey(request.planTask())) { + // this is the case where we get from nested plan tasks the file scan tasks + List fileScanTasksFromPlanTask = + planToFileScanTasks.remove(request.planTask()); + return castResponse( + responseType, + FetchScanTasksResponse.builder() + .withFileScanTasks(fileScanTasksFromPlanTask) + .withSpecsById(table.specs()) + .build()); + } + } + break; + } + default: if (responseType == OAuthTokenResponse.class) { return castResponse(responseType, handleOAuthRequest(body)); @@ -704,4 +886,8 @@ private static TableIdentifier viewIdentFromPathVars(Map pathVar return TableIdentifier.of( namespaceFromPathVars(pathVars), RESTUtil.decodeString(pathVars.get("view"))); } + + private static String planIDFromPathVars(Map pathVars) { + return RESTUtil.decodeString(pathVars.get("plan-id")); + } } diff --git a/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java b/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java index 47c27c975762..61f550860e12 100644 --- a/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java +++ b/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java @@ -95,6 +95,7 @@ import org.eclipse.jetty.servlet.ServletHolder; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; @@ -2841,6 +2842,75 @@ public void testTableExistsFallbackToGETRequestWithLegacyServer() { verifyTableExistsFallbackToGETRequest(ConfigResponse.builder().build()); } + @Test + @Disabled + public void testPlanTableScanWithCompletedStatusAndFileScanTask() throws IOException { + Table table = createRESTTableAndInsertData(TABLE_COMPLETED_WITH_FILE_SCAN_TASK); + assertBoundFileScanTasks(table, SPEC); + } + + @Test + @Disabled + public void testPlanTableScanAndFetchPlanningResultWithSubmittedStatusAndFileScanTask() + throws IOException { + Table table = createRESTTableAndInsertData(TABLE_SUBMITTED_WITH_FILE_SCAN_TASK); + assertBoundFileScanTasks(table, SPEC); + } + + @Test + @Disabled + public void testPlanTableScanAndFetchScanTasksWithCompletedStatusAndPlanTask() + throws IOException { + Table table = createRESTTableAndInsertData(TABLE_COMPLETED_WITH_PLAN_TASK); + assertBoundFileScanTasks(table, SPEC); + } + + @Test + @Disabled + public void testPlanTableScanAndFetchScanTasksWithCompletedStatusAndNestedPlanTasks() + throws IOException { + Table table = createRESTTableAndInsertData(TABLE_COMPLETED_WITH_NESTED_PLAN_TASK); + assertBoundFileScanTasks(table, SPEC); + } + + public Table createRESTTableAndInsertData(TableIdentifier tableIdentifier) throws IOException { + SessionCatalog.SessionContext context = + new SessionCatalog.SessionContext( + UUID.randomUUID().toString(), + "user", + ImmutableMap.of("credential", "user:12345"), + ImmutableMap.of()); + RESTCatalog catalog = + new RESTCatalog( + context, + (config) -> HTTPClient.builder(config).uri(config.get(CatalogProperties.URI)).build()); + catalog.initialize( + "test", + ImmutableMap.of( + RESTSessionCatalog.REST_SERVER_PLANNING_ENABLED, + "true", + CatalogProperties.URI, + httpServer.getURI().toString(), + CatalogProperties.FILE_IO_IMPL, + "org.apache.iceberg.inmemory.InMemoryFileIO", + "credential", + "catalog:secret")); + + if (requiresNamespaceCreate()) { + catalog.createNamespace(tableIdentifier.namespace()); + } + + Table table = + catalog + .buildTable(tableIdentifier, SCHEMA) + .withProperty("table.rest-scan-planning", "true") + .withPartitionSpec(SPEC) + .create(); + + table.newAppend().appendFile(FILE_A).commit(); + return table; + } + private RESTCatalog catalog(RESTCatalogAdapter adapter) { RESTCatalog catalog = new RESTCatalog(SessionCatalog.SessionContext.createEmpty(), (config) -> adapter); diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestFetchScanTasksRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestFetchScanTasksRequest.java new file mode 100644 index 000000000000..a68f0d3d2b29 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestFetchScanTasksRequest.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.Test; + +public class TestFetchScanTasksRequest { + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> FetchScanTasksRequestParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid request: fetchScanTasks request null"); + + assertThatThrownBy(() -> FetchScanTasksRequestParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid request: fetchScanTasks null"); + } + + @Test + public void roundTripSerdeWithPlanTask() { + FetchScanTasksRequest request = new FetchScanTasksRequest("somePlanTask"); + String expectedJson = "{\"plan-task\":\"somePlanTask\"}"; + String json = FetchScanTasksRequestParser.toJson(request, false); + assertThat(json).isEqualTo(expectedJson); + + // can't do an equality comparison on FetchScanTasksRequest because we don't implement + // equals/hashcode + assertThat( + FetchScanTasksRequestParser.toJson(FetchScanTasksRequestParser.fromJson(json), false)) + .isEqualTo(expectedJson); + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java new file mode 100644 index 000000000000..43cf0d8b3aa4 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.requests; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.junit.jupiter.api.Test; + +public class TestPlanTableScanRequest { + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> PlanTableScanRequestParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid request: planTableScanRequest null"); + + assertThatThrownBy(() -> PlanTableScanRequestParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid request: planTableScanRequest null"); + } + + @Test + public void roundTripSerdeWithEmptyRequestAndDefaultsPresent() { + PlanTableScanRequest request = new PlanTableScanRequest.Builder().build(); + + String expectedJson = "{" + "\"case-sensitive\":true," + "\"use-snapshot-schema\":false}"; + + String json = PlanTableScanRequestParser.toJson(request, false); + assertThat(json).isEqualTo(expectedJson); + + // can't do an equality comparison on PlanTableScanRequest because we don't implement + // equals/hashcode + assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) + .isEqualTo(expectedJson); + } + + @Test + public void roundTripSerdeWithSelectField() { + PlanTableScanRequest request = + new PlanTableScanRequest.Builder() + .withSnapshotId(1L) + .withSelect(Lists.newArrayList("col1", "col2")) + .build(); + + String expectedJson = + "{\"snapshot-id\":1," + + "\"select\":[\"col1\",\"col2\"]," + + "\"case-sensitive\":true," + + "\"use-snapshot-schema\":false}"; + + String json = PlanTableScanRequestParser.toJson(request, false); + assertThat(json).isEqualTo(expectedJson); + + // can't do an equality comparison on PlanTableScanRequest because we don't implement + // equals/hashcode + assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) + .isEqualTo(expectedJson); + } + + @Test + public void roundTripSerdeWithFilterField() { + PlanTableScanRequest request = + new PlanTableScanRequest.Builder() + .withSnapshotId(1L) + .withFilter(Expressions.alwaysFalse()) + .build(); + + String expectedJson = + "{\"snapshot-id\":1," + + "\"filter\":\"false\"," + + "\"case-sensitive\":true," + + "\"use-snapshot-schema\":false}"; + + String json = PlanTableScanRequestParser.toJson(request, false); + assertThat(json).isEqualTo(expectedJson); + + // can't do an equality comparison on PlanTableScanRequest because we don't implement + // equals/hashcode + assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) + .isEqualTo(expectedJson); + } + + @Test + public void roundTripSerdeWithAllFieldsInvalidRequest() { + PlanTableScanRequest request = + new PlanTableScanRequest.Builder() + .withSnapshotId(1L) + .withSelect(Lists.newArrayList("col1", "col2")) + .withFilter(Expressions.alwaysTrue()) + .withStartSnapshotId(1L) + .withEndSnapshotId(2L) + .withCaseSensitive(false) + .withUseSnapshotSchema(true) + .withStatsFields(Lists.newArrayList("col1", "col2")) + .build(); + + assertThatThrownBy(() -> PlanTableScanRequestParser.toJson(request)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Either snapshotId must be provided or both startSnapshotId and endSnapshotId must be provided"); + } + + @Test + public void roundTripSerdeWithAllFieldsExceptSnapShotId() { + PlanTableScanRequest request = + new PlanTableScanRequest.Builder() + .withSelect(Lists.newArrayList("col1", "col2")) + .withFilter(Expressions.alwaysTrue()) + .withStartSnapshotId(1L) + .withEndSnapshotId(2L) + .withCaseSensitive(false) + .withUseSnapshotSchema(true) + .withStatsFields(Lists.newArrayList("col1", "col2")) + .build(); + + String expectedJson = + "{\"start-snapshot-id\":1," + + "\"end-snapshot-id\":2," + + "\"select\":[\"col1\",\"col2\"]," + + "\"filter\":\"true\"," + + "\"case-sensitive\":false," + + "\"use-snapshot-schema\":true," + + "\"stats-fields\":[\"col1\",\"col2\"]}"; + + String json = PlanTableScanRequestParser.toJson(request, false); + assertThat(json).isEqualTo(expectedJson); + + // can't do an equality comparison on PlanTableScanRequest because we don't implement + // equals/hashcode + assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) + .isEqualTo(expectedJson); + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java new file mode 100644 index 000000000000..215ea1481da0 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import static org.apache.iceberg.TestBase.FILE_A; +import static org.apache.iceberg.TestBase.FILE_A_DELETES; +import static org.apache.iceberg.TestBase.PARTITION_SPECS_BY_ID; +import static org.apache.iceberg.TestBase.SCHEMA; +import static org.apache.iceberg.TestBase.SPEC; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.List; +import org.apache.iceberg.BaseFileScanTask; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpecParser; +import org.apache.iceberg.SchemaParser; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.apache.iceberg.rest.PlanStatus; +import org.junit.jupiter.api.Test; + +public class TestFetchPlanningResultResponseParser { + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> FetchPlanningResultResponseParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: fetchPanningResultResponse null"); + + assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: fetchPanningResultResponse null or empty"); + } + + @Test + public void roundTripSerdeWithEmptyObject() { + assertThatThrownBy( + () -> + FetchPlanningResultResponseParser.toJson( + FetchPlanningResultResponse.builder().build())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid status: null"); + + String emptyJson = "{ }"; + assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson(emptyJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: fetchPanningResultResponse null or empty"); + } + + @Test + public void roundTripSerdeWithInvalidPlanStatus() { + String invalidStatusJson = "{\"plan-status\": \"someStatus\"}"; + assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson(invalidStatusJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid status name: someStatus"); + } + + @Test + public void roundTripSerdeWithValidSubmittedStatus() { + PlanStatus planStatus = PlanStatus.fromName("submitted"); + FetchPlanningResultResponse response = + FetchPlanningResultResponse.builder().withPlanStatus(planStatus).build(); + + String expectedJson = "{\"plan-status\":\"submitted\"}"; + String json = FetchPlanningResultResponseParser.toJson(response); + assertThat(json).isEqualTo(expectedJson); + + FetchPlanningResultResponse fromResponse = FetchPlanningResultResponseParser.fromJson(json); + assertThat(FetchPlanningResultResponseParser.toJson(fromResponse)).isEqualTo(expectedJson); + } + + @Test + public void roundTripSerdeWithInvalidPlanStatusSubmittedWithTasksPresent() { + PlanStatus planStatus = PlanStatus.fromName("submitted"); + assertThatThrownBy( + () -> + FetchPlanningResultResponse.builder() + .withPlanStatus(planStatus) + .withPlanTasks(List.of("task1", "task2")) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: tasks can only be returned in a 'completed' status"); + + String invalidJson = + "{\"plan-status\":\"submitted\"," + "\"plan-tasks\":[\"task1\",\"task2\"]}"; + + assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson(invalidJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: tasks can only be returned in a 'completed' status"); + } + + @Test + public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileScanTasksPresent() { + PlanStatus planStatus = PlanStatus.fromName("submitted"); + assertThatThrownBy( + () -> + FetchPlanningResultResponse.builder() + .withPlanStatus(planStatus) + .withDeleteFiles(List.of(FILE_A_DELETES)) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + + String invalidJson = + "{\"plan-status\":\"submitted\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]" + + "}"; + + assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson(invalidJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + } + + @Test + public void roundTripSerdeWithValidStatusAndFileScanTasks() { + ResidualEvaluator residualEvaluator = + ResidualEvaluator.of(SPEC, Expressions.equal("id", 1), true); + FileScanTask fileScanTask = + new BaseFileScanTask( + FILE_A, + new DeleteFile[] {FILE_A_DELETES}, + SchemaParser.toJson(SCHEMA), + PartitionSpecParser.toJson(SPEC), + residualEvaluator); + + PlanStatus planStatus = PlanStatus.fromName("completed"); + FetchPlanningResultResponse response = + FetchPlanningResultResponse.builder() + .withPlanStatus(planStatus) + .withFileScanTasks(List.of(fileScanTask)) + .withDeleteFiles(List.of(FILE_A_DELETES)) + // assume this has been set + .withSpecsById(PARTITION_SPECS_BY_ID) + .build(); + + String expectedToJson = + "{\"plan-status\":\"completed\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]," + + "\"file-scan-tasks\":[" + + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0}," + + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + + "\"delete-file-references\":[0]," + + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" + + "}"; + + String json = FetchPlanningResultResponseParser.toJson(response, false); + assertThat(json).isEqualTo(expectedToJson); + + // make an unbound json where you expect to not have partitions for the data file, + // delete files as service does not send partition spec + String expectedFromJson = + "{\"plan-status\":\"completed\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{},\"file-size-in-bytes\":10,\"record-count\":1}]," + + "\"file-scan-tasks\":[" + + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"file-format\":\"PARQUET\",\"partition\":{}," + + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + + "\"delete-file-references\":[0]," + + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" + + "}"; + + FetchPlanningResultResponse fromResponse = FetchPlanningResultResponseParser.fromJson(json); + // Need to make a new response with partitionSpec set + FetchPlanningResultResponse copyResponse = + FetchPlanningResultResponse.builder() + .withPlanStatus(fromResponse.planStatus()) + .withPlanTasks(fromResponse.planTasks()) + .withDeleteFiles(fromResponse.deleteFiles()) + .withFileScanTasks(fromResponse.fileScanTasks()) + .withSpecsById(PARTITION_SPECS_BY_ID) + .build(); + + // can't do an equality comparison on PlanTableScanRequest because we don't implement + // equals/hashcode + assertThat(FetchPlanningResultResponseParser.toJson(copyResponse, false)) + .isEqualTo(expectedFromJson); + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java new file mode 100644 index 000000000000..4ddfbd1fcced --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import static org.apache.iceberg.TestBase.FILE_A; +import static org.apache.iceberg.TestBase.FILE_A_DELETES; +import static org.apache.iceberg.TestBase.PARTITION_SPECS_BY_ID; +import static org.apache.iceberg.TestBase.SCHEMA; +import static org.apache.iceberg.TestBase.SPEC; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.List; +import org.apache.iceberg.BaseFileScanTask; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpecParser; +import org.apache.iceberg.SchemaParser; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.junit.jupiter.api.Test; + +public class TestFetchScanTasksResponseParser { + + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> FetchScanTasksResponseParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: fetchScanTasksResponse null"); + + assertThatThrownBy(() -> FetchScanTasksResponseParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: fetchScanTasksResponse null"); + } + + @Test + public void roundTripSerdeWithEmptyObject() { + assertThatThrownBy( + () -> FetchScanTasksResponseParser.toJson(FetchScanTasksResponse.builder().build())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: planTasks and fileScanTask cannot both be null"); + + String emptyJson = "{ }"; + assertThatThrownBy(() -> FetchScanTasksResponseParser.fromJson(emptyJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: fetchScanTasksResponse null"); + } + + @Test + public void roundTripSerdeWithPlanTasks() { + String expectedJson = "{\"plan-tasks\":[\"task1\",\"task2\"]}"; + String json = + FetchScanTasksResponseParser.toJson( + FetchScanTasksResponse.builder().withPlanTasks(List.of("task1", "task2")).build()); + assertThat(json).isEqualTo(expectedJson); + + FetchScanTasksResponse fromResponse = FetchScanTasksResponseParser.fromJson(json); + + // can't do an equality comparison on PlanTableScanRequest because we don't implement + // equals/hashcode + assertThat(FetchScanTasksResponseParser.toJson(fromResponse, false)).isEqualTo(expectedJson); + } + + @Test + public void roundTripSerdeWithDeleteFilesNoFileScanTasksPresent() { + assertThatThrownBy( + () -> + FetchScanTasksResponse.builder() + .withPlanTasks(List.of("task1", "task2")) + .withDeleteFiles(List.of(FILE_A_DELETES)) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + + String invalidJson = + "{\"plan-tasks\":[\"task1\",\"task2\"]," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]" + + "}"; + + assertThatThrownBy(() -> FetchScanTasksResponseParser.fromJson(invalidJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + } + + @Test + public void roundTripSerdeWithFileScanTasks() { + ResidualEvaluator residualEvaluator = + ResidualEvaluator.of(SPEC, Expressions.equal("id", 1), true); + FileScanTask fileScanTask = + new BaseFileScanTask( + FILE_A, + new DeleteFile[] {FILE_A_DELETES}, + SchemaParser.toJson(SCHEMA), + PartitionSpecParser.toJson(SPEC), + residualEvaluator); + + FetchScanTasksResponse response = + FetchScanTasksResponse.builder() + .withFileScanTasks(List.of(fileScanTask)) + .withDeleteFiles(List.of(FILE_A_DELETES)) + // assume you have set this already + .withSpecsById(PARTITION_SPECS_BY_ID) + .build(); + + String expectedToJson = + "{" + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]," + + "\"file-scan-tasks\":[" + + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0}," + + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + + "\"delete-file-references\":[0]," + + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" + + "}"; + + String json = FetchScanTasksResponseParser.toJson(response, false); + assertThat(json).isEqualTo(expectedToJson); + + // make an unbound json where you expect to not have partitions for the data file, + // delete files as service does not send parition spec + String expectedFromJson = + "{" + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{},\"file-size-in-bytes\":10,\"record-count\":1}]," + + "\"file-scan-tasks\":[" + + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"file-format\":\"PARQUET\",\"partition\":{}," + + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + + "\"delete-file-references\":[0]," + + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" + + "}"; + + FetchScanTasksResponse fromResponse = FetchScanTasksResponseParser.fromJson(json); + // Need to make a new response with partitionSpec set + FetchScanTasksResponse copyResponse = + FetchScanTasksResponse.builder() + .withPlanTasks(fromResponse.planTasks()) + .withDeleteFiles(fromResponse.deleteFiles()) + .withFileScanTasks(fromResponse.fileScanTasks()) + .withSpecsById(PARTITION_SPECS_BY_ID) + .build(); + + // can't do an equality comparison on PlanTableScanRequest because we don't implement + // equals/hashcode + assertThat(FetchScanTasksResponseParser.toJson(copyResponse, false)) + .isEqualTo(expectedFromJson); + } +} diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java new file mode 100644 index 000000000000..ef39e47bdc31 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import static org.apache.iceberg.TestBase.FILE_A; +import static org.apache.iceberg.TestBase.FILE_A_DELETES; +import static org.apache.iceberg.TestBase.PARTITION_SPECS_BY_ID; +import static org.apache.iceberg.TestBase.SCHEMA; +import static org.apache.iceberg.TestBase.SPEC; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.List; +import org.apache.iceberg.BaseFileScanTask; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpecParser; +import org.apache.iceberg.SchemaParser; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.apache.iceberg.rest.PlanStatus; +import org.junit.jupiter.api.Test; + +public class TestPlanTableScanResponseParser { + @Test + public void nullAndEmptyCheck() { + assertThatThrownBy(() -> PlanTableScanResponseParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: planTableScanResponse null"); + + assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse planTableScan response from empty or null object"); + } + + @Test + public void roundTripSerdeWithEmptyObject() { + + assertThatThrownBy(() -> PlanTableScanResponse.builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: plan status must be defined"); + + String emptyJson = "{ }"; + assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(emptyJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse planTableScan response from empty or null object"); + } + + @Test + public void roundTripSerdeWithInvalidPlanStatus() { + String invalidStatusJson = "{\"plan-status\": \"someStatus\"}"; + assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidStatusJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid status name: someStatus"); + } + + @Test + public void roundTripSerdeWithInvalidPlanStatusSubmittedWithoutPlanId() { + PlanStatus planStatus = PlanStatus.fromName("submitted"); + + assertThatThrownBy(() -> PlanTableScanResponse.builder().withPlanStatus(planStatus).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: plan id should be defined when status is 'submitted'"); + + String invalidJson = "{\"plan-status\":\"submitted\"}"; + assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: plan id should be defined when status is 'submitted'"); + } + + @Test + public void roundTripSerdeWithInvalidPlanStatusCancelled() { + PlanStatus planStatus = PlanStatus.fromName("cancelled"); + assertThatThrownBy(() -> PlanTableScanResponse.builder().withPlanStatus(planStatus).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: 'cancelled' is not a valid status for planTableScan"); + + String invalidJson = "{\"plan-status\":\"cancelled\"}"; + assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: 'cancelled' is not a valid status for planTableScan"); + } + + @Test + public void roundTripSerdeWithInvalidPlanStatusSubmittedWithTasksPresent() { + PlanStatus planStatus = PlanStatus.fromName("submitted"); + assertThatThrownBy( + () -> + PlanTableScanResponse.builder() + .withPlanStatus(planStatus) + .withPlanId("somePlanId") + .withPlanTasks(List.of("task1", "task2")) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: tasks can only be returned in a 'completed' status"); + + String invalidJson = + "{\"plan-status\":\"submitted\"," + + "\"plan-id\":\"somePlanId\"," + + "\"plan-tasks\":[\"task1\",\"task2\"]}"; + + assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: tasks can only be returned in a 'completed' status"); + } + + @Test + public void roundTripSerdeWithInvalidPlanIdWithIncorrectStatus() { + PlanStatus planStatus = PlanStatus.fromName("failed"); + assertThatThrownBy( + () -> + PlanTableScanResponse.builder() + .withPlanStatus(planStatus) + .withPlanId("somePlanId") + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: plan id can only be returned in a 'submitted' status"); + + String invalidJson = "{\"plan-status\":\"failed\"," + "\"plan-id\":\"somePlanId\"}"; + + assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid response: plan id can only be returned in a 'submitted' status"); + } + + @Test + public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileScanTasksPresent() { + PlanStatus planStatus = PlanStatus.fromName("submitted"); + assertThatThrownBy( + () -> + PlanTableScanResponse.builder() + .withPlanStatus(planStatus) + .withPlanId("somePlanId") + .withDeleteFiles(List.of(FILE_A_DELETES)) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + + String invalidJson = + "{\"plan-status\":\"submitted\"," + + "\"plan-id\":\"somePlanId\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]" + + "}"; + + assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); + } + + @Test + public void roundTripSerdeWithValidStatusAndFileScanTasks() { + ResidualEvaluator residualEvaluator = + ResidualEvaluator.of(SPEC, Expressions.equal("id", 1), true); + FileScanTask fileScanTask = + new BaseFileScanTask( + FILE_A, + new DeleteFile[] {FILE_A_DELETES}, + SchemaParser.toJson(SCHEMA), + PartitionSpecParser.toJson(SPEC), + residualEvaluator); + + PlanStatus planStatus = PlanStatus.fromName("completed"); + PlanTableScanResponse response = + PlanTableScanResponse.builder() + .withPlanStatus(planStatus) + .withFileScanTasks(List.of(fileScanTask)) + .withDeleteFiles(List.of(FILE_A_DELETES)) + .withSpecsById(PARTITION_SPECS_BY_ID) + .build(); + + String expectedToJson = + "{\"plan-status\":\"completed\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]," + + "\"file-scan-tasks\":[" + + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0}," + + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + + "\"delete-file-references\":[0]," + + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" + + "}"; + + String json = PlanTableScanResponseParser.toJson(response); + assertThat(json).isEqualTo(expectedToJson); + + String expectedFromJson = + "{\"plan-status\":\"completed\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"partition\":{},\"file-size-in-bytes\":10,\"record-count\":1}]," + + "\"file-scan-tasks\":[" + + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"file-format\":\"PARQUET\",\"partition\":{}," + + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + + "\"delete-file-references\":[0]," + + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" + + "}"; + + PlanTableScanResponse fromResponse = PlanTableScanResponseParser.fromJson(json); + PlanTableScanResponse copyResponse = + PlanTableScanResponse.builder() + .withPlanStatus(fromResponse.planStatus()) + .withPlanId(fromResponse.planId()) + .withPlanTasks(fromResponse.planTasks()) + .withDeleteFiles(fromResponse.deleteFiles()) + .withFileScanTasks(fromResponse.fileScanTasks()) + .withSpecsById(PARTITION_SPECS_BY_ID) + .build(); + + // can't do an equality comparison on PlanTableScanRequest because we don't implement + // equals/hashcode + assertThat(PlanTableScanResponseParser.toJson(copyResponse)).isEqualTo(expectedFromJson); + } +} From 5dfdd24b4db849b74ec51fec3bc137d7a3b9045c Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Fri, 20 Jun 2025 00:04:59 -0700 Subject: [PATCH 2/9] Adapt to context-aware parser --- .../exceptions/EntityNotFoundException.java | 34 --- .../java/org/apache/iceberg/BaseFile.java | 5 - .../org/apache/iceberg/ContentFileParser.java | 135 ++-------- .../org/apache/iceberg/DataTaskParser.java | 4 +- .../apache/iceberg/FileScanTaskParser.java | 8 +- .../iceberg/RESTFileScanTaskParser.java | 32 ++- .../org/apache/iceberg/RESTPlanningMode.java | 47 ---- .../java/org/apache/iceberg/RESTTable.java | 71 ------ .../org/apache/iceberg/RESTTableScan.java | 240 ------------------ .../org/apache/iceberg/ScanTasksIterable.java | 216 ---------------- .../TableScanResponseParser.java | 30 ++- .../iceberg/UnboundBaseFileScanTask.java | 63 ----- .../iceberg/UnboundGenericDataFile.java | 76 ------ .../iceberg/UnboundGenericDeleteFile.java | 86 ------- .../apache/iceberg/rest/RESTSerializers.java | 27 +- .../iceberg/rest/RESTSessionCatalog.java | 48 ---- .../FetchPlanningResultResponseParser.java | 19 +- .../FetchScanTasksResponseParser.java | 19 +- .../PlanTableScanResponseParser.java | 19 +- .../java/org/apache/iceberg/TestBase.java | 2 +- .../apache/iceberg/TestContentFileParser.java | 10 +- .../apache/iceberg/catalog/CatalogTests.java | 41 --- .../iceberg/rest/RESTCatalogAdapter.java | 186 -------------- .../apache/iceberg/rest/TestRESTCatalog.java | 120 +++------ ...TestFetchPlanningResultResponseParser.java | 86 ++++--- .../TestFetchScanTasksResponseParser.java | 35 +-- .../TestPlanTableScanResponseParser.java | 46 ++-- 27 files changed, 260 insertions(+), 1445 deletions(-) delete mode 100644 api/src/main/java/org/apache/iceberg/exceptions/EntityNotFoundException.java delete mode 100644 core/src/main/java/org/apache/iceberg/RESTPlanningMode.java delete mode 100644 core/src/main/java/org/apache/iceberg/RESTTable.java delete mode 100644 core/src/main/java/org/apache/iceberg/RESTTableScan.java delete mode 100644 core/src/main/java/org/apache/iceberg/ScanTasksIterable.java rename core/src/main/java/org/apache/iceberg/{rest/responses => }/TableScanResponseParser.java (82%) delete mode 100644 core/src/main/java/org/apache/iceberg/UnboundBaseFileScanTask.java delete mode 100644 core/src/main/java/org/apache/iceberg/UnboundGenericDataFile.java delete mode 100644 core/src/main/java/org/apache/iceberg/UnboundGenericDeleteFile.java diff --git a/api/src/main/java/org/apache/iceberg/exceptions/EntityNotFoundException.java b/api/src/main/java/org/apache/iceberg/exceptions/EntityNotFoundException.java deleted file mode 100644 index 1d06a5d2bc26..000000000000 --- a/api/src/main/java/org/apache/iceberg/exceptions/EntityNotFoundException.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.exceptions; - -import com.google.errorprone.annotations.FormatMethod; - -/** Exception raised when an entity is not found. */ -public class EntityNotFoundException extends RESTException implements CleanableFailure { - @FormatMethod - public EntityNotFoundException(String message, Object... args) { - super(message, args); - } - - @FormatMethod - public EntityNotFoundException(Throwable cause, String message, Object... args) { - super(cause, message, args); - } -} diff --git a/core/src/main/java/org/apache/iceberg/BaseFile.java b/core/src/main/java/org/apache/iceberg/BaseFile.java index d5dfd8ec0ba9..a02e0eff55a2 100644 --- a/core/src/main/java/org/apache/iceberg/BaseFile.java +++ b/core/src/main/java/org/apache/iceberg/BaseFile.java @@ -483,11 +483,6 @@ public StructLike partition() { return partitionData; } - public void setPartitionData(PartitionData partitionData) { - // TODO for binding in REST scan - this.partitionData = partitionData; - } - @Override public long recordCount() { return recordCount; diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 182acab8759c..0626338c15e6 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -27,7 +27,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; -public class ContentFileParser { +class ContentFileParser { private static final String SPEC_ID = "spec-id"; private static final String CONTENT = "content"; private static final String FILE_PATH = "file-path"; @@ -52,97 +52,6 @@ public class ContentFileParser { private ContentFileParser() {} - public static void unboundContentFileToJson( - ContentFile contentFile, PartitionSpec spec, JsonGenerator generator) throws IOException { - Preconditions.checkArgument(contentFile != null, "Invalid content file: null"); - Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); - Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); - Preconditions.checkArgument( - contentFile.specId() == spec.specId(), - "Invalid partition spec id from content file: expected = %s, actual = %s", - spec.specId(), - contentFile.specId()); - - generator.writeStartObject(); - // ignore the ordinal position (ContentFile#pos) of the file in a manifest, - // as it isn't used and BaseFile constructor doesn't support it. - - generator.writeNumberField(SPEC_ID, contentFile.specId()); - generator.writeStringField(CONTENT, contentFile.content().name()); - generator.writeStringField(FILE_PATH, contentFile.path().toString()); - generator.writeStringField(FILE_FORMAT, contentFile.format().name()); - - if (contentFile.partition() != null) { - generator.writeFieldName(PARTITION); - SingleValueParser.toJson(spec.partitionType(), contentFile.partition(), generator); - } - - generator.writeNumberField(FILE_SIZE, contentFile.fileSizeInBytes()); - - metricsToJson(contentFile, generator); - - if (contentFile.keyMetadata() != null) { - generator.writeFieldName(KEY_METADATA); - SingleValueParser.toJson(DataFile.KEY_METADATA.type(), contentFile.keyMetadata(), generator); - } - - if (contentFile.splitOffsets() != null) { - JsonUtil.writeLongArray(SPLIT_OFFSETS, contentFile.splitOffsets(), generator); - } - - if (contentFile.equalityFieldIds() != null) { - JsonUtil.writeIntegerArray(EQUALITY_IDS, contentFile.equalityFieldIds(), generator); - } - - if (contentFile.sortOrderId() != null) { - generator.writeNumberField(SORT_ORDER_ID, contentFile.sortOrderId()); - } - - generator.writeEndObject(); - } - - public static ContentFile unboundContentFileFromJson(JsonNode jsonNode) { - Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for content file: null"); - - int specId = JsonUtil.getInt(SPEC_ID, jsonNode); - FileContent fileContent = FileContent.valueOf(JsonUtil.getString(CONTENT, jsonNode)); - String filePath = JsonUtil.getString(FILE_PATH, jsonNode); - FileFormat fileFormat = FileFormat.fromString(JsonUtil.getString(FILE_FORMAT, jsonNode)); - - long fileSizeInBytes = JsonUtil.getLong(FILE_SIZE, jsonNode); - Metrics metrics = metricsFromJson(jsonNode); - ByteBuffer keyMetadata = JsonUtil.getByteBufferOrNull(KEY_METADATA, jsonNode); - List splitOffsets = JsonUtil.getLongListOrNull(SPLIT_OFFSETS, jsonNode); - int[] equalityFieldIds = JsonUtil.getIntArrayOrNull(EQUALITY_IDS, jsonNode); - Integer sortOrderId = JsonUtil.getIntOrNull(SORT_ORDER_ID, jsonNode); - - if (fileContent == FileContent.DATA) { - return new UnboundGenericDataFile( - specId, - filePath, - fileFormat, - jsonNode.get(PARTITION), - fileSizeInBytes, - metrics, - keyMetadata, - splitOffsets, - sortOrderId); - } else { - return new UnboundGenericDeleteFile( - specId, - fileContent, - filePath, - fileFormat, - jsonNode.get(PARTITION), - fileSizeInBytes, - metrics, - equalityFieldIds, - sortOrderId, - splitOffsets, - keyMetadata); - } - } - private static boolean hasPartitionData(StructLike partitionData) { return partitionData != null && partitionData.size() > 0; } @@ -225,12 +134,11 @@ static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGenerator generator.writeEndObject(); } - static ContentFile fromJson(JsonNode jsonNode, PartitionSpec spec) { + static ContentFile fromJson(JsonNode jsonNode, Map specsById) { Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for content file: null"); Preconditions.checkArgument( jsonNode.isObject(), "Invalid JSON node for content file: non-object (%s)", jsonNode); - Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); - + Preconditions.checkArgument(specsById != null, "Invalid partition spec: null"); int specId = JsonUtil.getInt(SPEC_ID, jsonNode); FileContent fileContent = FileContent.valueOf(JsonUtil.getString(CONTENT, jsonNode)); String filePath = JsonUtil.getString(FILE_PATH, jsonNode); @@ -238,7 +146,21 @@ static ContentFile fromJson(JsonNode jsonNode, PartitionSpec spec) { PartitionData partitionData = null; if (jsonNode.has(PARTITION)) { - partitionData = partitionDataFromRawValue(jsonNode.get(PARTITION), spec); + partitionData = new PartitionData(specsById.get(specId).partitionType()); + StructLike structLike = + (StructLike) + SingleValueParser.fromJson( + specsById.get(specId).partitionType(), jsonNode.get(PARTITION)); + Preconditions.checkState( + partitionData.size() == structLike.size(), + "Invalid partition data size: expected = %s, actual = %s", + partitionData.size(), + structLike.size()); + for (int pos = 0; pos < partitionData.size(); ++pos) { + Class javaClass = + specsById.get(specId).partitionType().fields().get(pos).type().typeId().javaClass(); + partitionData.set(pos, structLike.get(pos, javaClass)); + } } long fileSizeInBytes = JsonUtil.getLong(FILE_SIZE, jsonNode); @@ -283,27 +205,6 @@ static ContentFile fromJson(JsonNode jsonNode, PartitionSpec spec) { } } - static PartitionData partitionDataFromRawValue(JsonNode rawPartitionValue, PartitionSpec spec) { - if (rawPartitionValue == null) { - return null; - } - - PartitionData partitionData = new PartitionData(spec.partitionType()); - StructLike structLike = - (StructLike) SingleValueParser.fromJson(spec.partitionType(), rawPartitionValue); - Preconditions.checkState( - partitionData.size() == structLike.size(), - "Invalid partition data size: expected = %s, actual = %s", - partitionData.size(), - structLike.size()); - for (int pos = 0; pos < partitionData.size(); ++pos) { - Class javaClass = spec.partitionType().fields().get(pos).type().typeId().javaClass(); - partitionData.set(pos, structLike.get(pos, javaClass)); - } - - return partitionData; - } - private static void metricsToJson(ContentFile contentFile, JsonGenerator generator) throws IOException { generator.writeNumberField(RECORD_COUNT, contentFile.recordCount()); diff --git a/core/src/main/java/org/apache/iceberg/DataTaskParser.java b/core/src/main/java/org/apache/iceberg/DataTaskParser.java index 428bcf15e7e2..585ac29b3748 100644 --- a/core/src/main/java/org/apache/iceberg/DataTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/DataTaskParser.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; +import java.util.Map; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; @@ -64,7 +65,8 @@ static StaticDataTask fromJson(JsonNode jsonNode) { DataFile metadataFile = (DataFile) ContentFileParser.fromJson( - JsonUtil.get(METADATA_FILE, jsonNode), PartitionSpec.unpartitioned()); + JsonUtil.get(METADATA_FILE, jsonNode), + Map.of(PartitionSpec.unpartitioned().specId(), PartitionSpec.unpartitioned())); JsonNode rowsArray = JsonUtil.get(ROWS, jsonNode); Preconditions.checkArgument( diff --git a/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java b/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java index 7ae7dc74a72e..01d1443efdb8 100644 --- a/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; +import java.util.Map; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.ExpressionParser; import org.apache.iceberg.expressions.Expressions; @@ -86,7 +87,9 @@ static FileScanTask fromJson(JsonNode jsonNode, boolean caseSensitive) { DataFile dataFile = null; if (jsonNode.has(DATA_FILE)) { - dataFile = (DataFile) ContentFileParser.fromJson(jsonNode.get(DATA_FILE), spec); + dataFile = + (DataFile) + ContentFileParser.fromJson(jsonNode.get(DATA_FILE), Map.of(spec.specId(), spec)); } long start = JsonUtil.getLong(START, jsonNode); @@ -102,7 +105,8 @@ static FileScanTask fromJson(JsonNode jsonNode, boolean caseSensitive) { // parse the schema array ImmutableList.Builder builder = ImmutableList.builder(); for (JsonNode deleteFileNode : deletesArray) { - DeleteFile deleteFile = (DeleteFile) ContentFileParser.fromJson(deleteFileNode, spec); + DeleteFile deleteFile = + (DeleteFile) ContentFileParser.fromJson(deleteFileNode, Map.of(spec.specId(), spec)); builder.add(deleteFile); } diff --git a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java index 15e3392999f3..ef57d2d1c991 100644 --- a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java @@ -22,9 +22,11 @@ import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.ExpressionParser; +import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Sets; @@ -48,7 +50,7 @@ public static void toJson( generator.writeStartObject(); generator.writeFieldName(DATA_FILE); - ContentFileParser.unboundContentFileToJson(fileScanTask.file(), partitionSpec, generator); + ContentFileParser.toJson(fileScanTask.file(), partitionSpec, generator); if (deleteFileReferences != null) { JsonUtil.writeIntegerArray(DELETE_FILE_REFERENCES, deleteFileReferences, generator); } @@ -60,23 +62,28 @@ public static void toJson( generator.writeEndObject(); } - public static FileScanTask fromJson(JsonNode jsonNode, List allDeleteFiles) { + public static FileScanTask fromJson( + JsonNode jsonNode, + List allDeleteFiles, + Map specsById, + boolean isCaseSensitive) { Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for file scan task: null"); Preconditions.checkArgument( jsonNode.isObject(), "Invalid JSON node for file scan task: non-object (%s)", jsonNode); - UnboundGenericDataFile dataFile = - (UnboundGenericDataFile) - ContentFileParser.unboundContentFileFromJson(JsonUtil.get(DATA_FILE, jsonNode)); + DataFile dataFile = + (DataFile) ContentFileParser.fromJson(JsonUtil.get(DATA_FILE, jsonNode), specsById); + // specId from the dataFile + int specId = dataFile.specId(); - UnboundGenericDeleteFile[] deleteFiles = null; + DeleteFile[] deleteFiles = null; Set deleteFileReferences = Sets.newHashSet(); if (jsonNode.has(DELETE_FILE_REFERENCES)) { deleteFileReferences.addAll(JsonUtil.getIntegerList(DELETE_FILE_REFERENCES, jsonNode)); - ImmutableList.Builder builder = ImmutableList.builder(); + ImmutableList.Builder builder = ImmutableList.builder(); deleteFileReferences.forEach( - delIdx -> builder.add((UnboundGenericDeleteFile) allDeleteFiles.get(delIdx))); - deleteFiles = builder.build().toArray(new UnboundGenericDeleteFile[0]); + delIdx -> builder.add((GenericDeleteFile) allDeleteFiles.get(delIdx))); + deleteFiles = builder.build().toArray(new GenericDeleteFile[0]); } Expression filter = null; @@ -84,6 +91,11 @@ public static FileScanTask fromJson(JsonNode jsonNode, List allDelet filter = ExpressionParser.fromJson(jsonNode.get(RESIDUAL)); } - return new UnboundBaseFileScanTask(dataFile, deleteFiles, filter); + String schemaString = SchemaParser.toJson(specsById.get(specId).schema()); + String specString = PartitionSpecParser.toJson(specsById.get(specId)); + ResidualEvaluator boundResidual = + ResidualEvaluator.of(specsById.get(specId), filter, isCaseSensitive); + + return new BaseFileScanTask(dataFile, deleteFiles, schemaString, specString, boundResidual); } } diff --git a/core/src/main/java/org/apache/iceberg/RESTPlanningMode.java b/core/src/main/java/org/apache/iceberg/RESTPlanningMode.java deleted file mode 100644 index 185276ecbff7..000000000000 --- a/core/src/main/java/org/apache/iceberg/RESTPlanningMode.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import java.util.Locale; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; - -public enum RESTPlanningMode { - REQUIRED("required"), - SUPPORTED("supported"), - UNSUPPORTED("unsupported"); - private final String planningMode; - - RESTPlanningMode(String planningMode) { - this.planningMode = planningMode; - } - - public String mode() { - return planningMode; - } - - public static RESTPlanningMode fromName(String planningMode) { - Preconditions.checkArgument(planningMode != null, "planningMode is null"); - try { - return RESTPlanningMode.valueOf(planningMode.toUpperCase(Locale.ENGLISH)); - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException( - String.format("Invalid planningMode name: %s", planningMode), e); - } - } -} diff --git a/core/src/main/java/org/apache/iceberg/RESTTable.java b/core/src/main/java/org/apache/iceberg/RESTTable.java deleted file mode 100644 index c45c9feb581c..000000000000 --- a/core/src/main/java/org/apache/iceberg/RESTTable.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import java.util.Map; -import java.util.function.Supplier; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.metrics.MetricsReporter; -import org.apache.iceberg.rest.RESTClient; -import org.apache.iceberg.rest.ResourcePaths; - -public class RESTTable extends BaseTable { - private final RESTClient client; - private final String path; - private final Supplier> headers; - private final MetricsReporter reporter; - private final ResourcePaths resourcePaths; - private final TableIdentifier tableIdentifier; - - public RESTTable( - TableOperations ops, - String name, - MetricsReporter reporter, - RESTClient client, - String path, - Supplier> headers, - TableIdentifier tableIdentifier, - ResourcePaths resourcePaths) { - super(ops, name, reporter); - this.reporter = reporter; - this.client = client; - this.headers = headers; - this.path = path; - this.tableIdentifier = tableIdentifier; - this.resourcePaths = resourcePaths; - } - - @Override - public TableScan newScan() { - // TODO when looking at ImmutableTableScanContext how do we ensure - // correct snapshotId to use for point in time cases. When looking at spark - // it seems it follows similar approach, see class SparkDistributedDataScan - - return new RESTTableScan( - this, - schema(), - ImmutableTableScanContext.builder().metricsReporter(reporter).build(), - client, - path, - headers, - operations(), - tableIdentifier, - resourcePaths); - } -} diff --git a/core/src/main/java/org/apache/iceberg/RESTTableScan.java b/core/src/main/java/org/apache/iceberg/RESTTableScan.java deleted file mode 100644 index 76a1a9d69548..000000000000 --- a/core/src/main/java/org/apache/iceberg/RESTTableScan.java +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import java.util.List; -import java.util.Map; -import java.util.function.Supplier; -import java.util.stream.Collectors; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.io.CloseableIterable; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.rest.ErrorHandlers; -import org.apache.iceberg.rest.PlanStatus; -import org.apache.iceberg.rest.RESTClient; -import org.apache.iceberg.rest.ResourcePaths; -import org.apache.iceberg.rest.requests.PlanTableScanRequest; -import org.apache.iceberg.rest.responses.FetchPlanningResultResponse; -import org.apache.iceberg.rest.responses.PlanTableScanResponse; -import org.apache.iceberg.types.Types; -import org.apache.iceberg.util.ParallelIterable; - -public class RESTTableScan extends DataTableScan { - private final RESTClient client; - private final String path; - private final Supplier> headers; - private final TableOperations operations; - private final Table table; - private final ResourcePaths resourcePaths; - private final TableIdentifier tableIdentifier; - - // TODO revisit if this property should be configurable - private static final int FETCH_PLANNING_SLEEP_DURATION_MS = 1000; - - public RESTTableScan( - Table table, - Schema schema, - TableScanContext context, - RESTClient client, - String path, - Supplier> headers, - TableOperations operations, - TableIdentifier tableIdentifier, - ResourcePaths resourcePaths) { - super(table, schema, context); - this.table = table; - this.client = client; - this.headers = headers; - this.path = path; - this.operations = operations; - this.tableIdentifier = tableIdentifier; - this.resourcePaths = resourcePaths; - } - - @Override - protected TableScan newRefinedScan( - Table refinedTable, Schema refinedSchema, TableScanContext refinedContext) { - return new RESTTableScan( - refinedTable, - refinedSchema, - refinedContext, - client, - path, - headers, - operations, - tableIdentifier, - resourcePaths); - } - - @Override - public CloseableIterable planFiles() { - List selectedColumns = - schema().columns().stream().map(Types.NestedField::name).collect(Collectors.toList()); - - List statsFields = null; - if (columnsToKeepStats() != null) { - statsFields = - columnsToKeepStats().stream() - .map(columnId -> schema().findColumnName(columnId)) - .collect(Collectors.toList()); - } - - Long startSnapshotId = context().fromSnapshotId(); - Long endSnapshotId = context().toSnapshotId(); - Long snapshotId = snapshotId(); - - PlanTableScanRequest.Builder planTableScanRequestBuilder = - new PlanTableScanRequest.Builder() - .withSelect(selectedColumns) - .withFilter(filter()) - .withCaseSensitive(isCaseSensitive()) - .withStatsFields(statsFields); - - if (startSnapshotId != null && endSnapshotId != null) { - planTableScanRequestBuilder - .withStartSnapshotId(startSnapshotId) - .withEndSnapshotId(endSnapshotId) - .withUseSnapshotSchema(true); - - } else if (snapshotId != null) { - boolean useSnapShotSchema = snapshotId != table.currentSnapshot().snapshotId(); - planTableScanRequestBuilder - .withSnapshotId(snapshotId) - .withUseSnapshotSchema(useSnapShotSchema); - - } else { - planTableScanRequestBuilder.withSnapshotId(table().currentSnapshot().snapshotId()); - } - - return planTableScan(planTableScanRequestBuilder.build()); - } - - private CloseableIterable planTableScan(PlanTableScanRequest planTableScanRequest) { - PlanTableScanResponse response = - client.post( - resourcePaths.planTableScan(tableIdentifier), - planTableScanRequest, - PlanTableScanResponse.class, - headers, - ErrorHandlers.defaultErrorHandler()); - - PlanStatus planStatus = response.planStatus(); - switch (planStatus) { - case COMPLETED: - // List fileScanTasks = bindFileScanTasksWithSpec(response.fileScanTasks()); - return getScanTasksIterable(response.planTasks(), response.fileScanTasks()); - case SUBMITTED: - return fetchPlanningResult(response.planId()); - case FAILED: - throw new RuntimeException( - "Received \"failed\" status from service when planning a table scan"); - default: - throw new RuntimeException( - String.format("Invalid planStatus during planTableScan: %s", planStatus)); - } - } - - private CloseableIterable fetchPlanningResult(String planId) { - - // TODO need to introduce a max wait time for this loop potentially - boolean planningFinished = false; - while (!planningFinished) { - FetchPlanningResultResponse response = - client.get( - resourcePaths.fetchPlanningResult(tableIdentifier, planId), - FetchPlanningResultResponse.class, - headers, - ErrorHandlers.defaultErrorHandler()); - - PlanStatus planStatus = response.planStatus(); - switch (planStatus) { - case COMPLETED: - // List fileScanTasks = bindFileScanTasksWithSpec(response.fileScanTasks()); - return getScanTasksIterable(response.planTasks(), response.fileScanTasks()); - case SUBMITTED: - try { - Thread.sleep(FETCH_PLANNING_SLEEP_DURATION_MS); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted while fetching plan status", e); - } - break; - case FAILED: - throw new RuntimeException( - "Received \"failed\" status from service when fetching a table scan"); - case CANCELLED: - throw new RuntimeException( - String.format( - "Received \"cancelled\" status from service when fetching a table scan, planId: %s is invalid", - planId)); - default: - throw new RuntimeException( - String.format("Invalid planStatus during fetchPlanningResult: %s", planStatus)); - } - } - return null; - } - - public CloseableIterable getScanTasksIterable( - List planTasks, List fileScanTasks) { - List iterableOfScanTaskIterables = Lists.newArrayList(); - if (fileScanTasks != null) { - // add this to the list for below if planTasks will also be present - ScanTasksIterable scanTasksIterable = - new ScanTasksIterable( - fileScanTasks, - client, - resourcePaths, - tableIdentifier, - headers, - planExecutor(), - table.specs(), - isCaseSensitive()); - iterableOfScanTaskIterables.add(scanTasksIterable); - } - if (planTasks != null) { - // Use parallel iterable since planTasks are present - for (String planTask : planTasks) { - ScanTasksIterable iterable = - new ScanTasksIterable( - planTask, - client, - resourcePaths, - tableIdentifier, - headers, - planExecutor(), - table.specs(), - isCaseSensitive()); - iterableOfScanTaskIterables.add(iterable); - } - return new ParallelIterable<>(iterableOfScanTaskIterables, planExecutor()); - // another idea is to keep concating to the original parallel iterable??? - } - // use a single scanTasks iterable since no need to parallelize since no planTasks - return new ScanTasksIterable( - fileScanTasks, - client, - resourcePaths, - tableIdentifier, - headers, - planExecutor(), - table.specs(), - isCaseSensitive()); - } -} diff --git a/core/src/main/java/org/apache/iceberg/ScanTasksIterable.java b/core/src/main/java/org/apache/iceberg/ScanTasksIterable.java deleted file mode 100644 index df5dc3f9149b..000000000000 --- a/core/src/main/java/org/apache/iceberg/ScanTasksIterable.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.function.Supplier; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.io.CloseableIterable; -import org.apache.iceberg.io.CloseableIterator; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.rest.ErrorHandlers; -import org.apache.iceberg.rest.RESTClient; -import org.apache.iceberg.rest.ResourcePaths; -import org.apache.iceberg.rest.requests.FetchScanTasksRequest; -import org.apache.iceberg.rest.responses.FetchScanTasksResponse; -import org.apache.iceberg.util.ParallelIterable; - -public class ScanTasksIterable implements CloseableIterable { - private final RESTClient client; - private final ResourcePaths resourcePaths; - private final TableIdentifier tableIdentifier; - private final Supplier> headers; - private final String - planTask; // parallelizing on this where a planTask produces a list of file scan tasks, as - // well more planTasks - private final List fileScanTasks; - private ExecutorService executorService; - private Map specsById; - private boolean caseSensitive; - - public ScanTasksIterable( - String planTask, - RESTClient client, - ResourcePaths resourcePaths, - TableIdentifier tableIdentifier, - Supplier> headers, - ExecutorService executorService, - Map specsById, - boolean caseSensitive) { - this.planTask = planTask; - this.fileScanTasks = null; - this.client = client; - this.resourcePaths = resourcePaths; - this.tableIdentifier = tableIdentifier; - this.headers = headers; - this.executorService = executorService; - this.specsById = specsById; - this.caseSensitive = caseSensitive; - } - - public ScanTasksIterable( - List fileScanTasks, - RESTClient client, - ResourcePaths resourcePaths, - TableIdentifier tableIdentifier, - Supplier> headers, - ExecutorService executorService, - Map specsById, - boolean caseSensitive) { - this.planTask = null; - this.fileScanTasks = fileScanTasks; - this.client = client; - this.resourcePaths = resourcePaths; - this.tableIdentifier = tableIdentifier; - this.headers = headers; - this.executorService = executorService; - this.specsById = specsById; - this.caseSensitive = caseSensitive; - } - - @Override - public CloseableIterator iterator() { - return new ScanTasksIterator( - planTask, - fileScanTasks, - client, - resourcePaths, - tableIdentifier, - headers, - executorService, - specsById, - caseSensitive); - } - - @Override - public void close() throws IOException {} - - private static class ScanTasksIterator implements CloseableIterator { - private final RESTClient client; - private final ResourcePaths resourcePaths; - private final TableIdentifier tableIdentifier; - private final Supplier> headers; - private String planTask; - private List fileScanTasks; - private ExecutorService executorService; - private Map specsById; - private boolean caseSensitive; - - ScanTasksIterator( - String planTask, - List fileScanTasks, - RESTClient client, - ResourcePaths resourcePaths, - TableIdentifier tableIdentifier, - Supplier> headers, - ExecutorService executorService, - Map specsById, - boolean caseSensitive) { - this.client = client; - this.resourcePaths = resourcePaths; - this.tableIdentifier = tableIdentifier; - this.headers = headers; - this.planTask = planTask; - this.fileScanTasks = fileScanTasks != null ? fileScanTasks : Lists.newArrayList(); - this.executorService = executorService; - this.specsById = specsById; - this.caseSensitive = caseSensitive; - } - - @Override - public boolean hasNext() { - if (!fileScanTasks.isEmpty()) { - // Have file scan tasks so continue to consume - return true; - } - // Out of file scan tasks, so need to now fetch more from each planTask - // Service can send back more planTasks which acts as pagination - if (planTask != null) { - fetchScanTasks(planTask); - planTask = null; - // Make another hasNext() call, as more fileScanTasks have been fetched - return hasNext(); - } - // we have no file scan tasks left to consume - // so means we are finished - return false; - } - - @Override - public FileScanTask next() { - FileScanTask task = fileScanTasks.remove(0); - if (task instanceof UnboundBaseFileScanTask) { - // bind partition spec data to task - UnboundBaseFileScanTask unboundBaseFileScanTask = (UnboundBaseFileScanTask) task; - Integer specId = task.file().specId(); - return unboundBaseFileScanTask.bind(specsById.get(specId), caseSensitive); - } else { - return task; - } - } - - private void fetchScanTasks(String withPlanTask) { - FetchScanTasksRequest fetchScanTasksRequest = new FetchScanTasksRequest(withPlanTask); - FetchScanTasksResponse response = - client.post( - resourcePaths.fetchScanTasks(tableIdentifier), - fetchScanTasksRequest, - FetchScanTasksResponse.class, - headers, - ErrorHandlers.defaultErrorHandler()); - if (response.fileScanTasks() != null) { - fileScanTasks.addAll(response.fileScanTasks()); - } - - if (response.planTasks() != null) { - // this is the case where a plan task returned an additional plan task, so ensure that this - // result is added to top level fileScanTasks list. - // confirmed working with catalog test - // #testPlanTableScanAndFetchScanTasksWithCompletedStatusAndNestedPlanTasks - Iterable fileScanTasksFromPlanTasks = - getScanTasksIterable(response.planTasks()); - fileScanTasksFromPlanTasks.forEach(task -> fileScanTasks.add(task)); - } - } - - public CloseableIterable getScanTasksIterable(List planTasks) { - List iterableOfScanTaskIterables = Lists.newArrayList(); - for (String withPlanTask : planTasks) { - ScanTasksIterable iterable = - new ScanTasksIterable( - withPlanTask, - client, - resourcePaths, - tableIdentifier, - headers, - executorService, - specsById, - caseSensitive); - iterableOfScanTaskIterables.add(iterable); - } - return new ParallelIterable<>(iterableOfScanTaskIterables, executorService); - } - - @Override - public void close() throws IOException {} - } -} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java similarity index 82% rename from core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponseParser.java rename to core/src/main/java/org/apache/iceberg/TableScanResponseParser.java index 0e111755eaa9..e66f683ef60e 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.iceberg.rest.responses; +package org.apache.iceberg; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; @@ -24,11 +24,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.iceberg.ContentFileParser; -import org.apache.iceberg.DeleteFile; -import org.apache.iceberg.FileScanTask; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.RESTFileScanTaskParser; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -36,22 +31,22 @@ import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.util.JsonUtil; -class TableScanResponseParser { +public class TableScanResponseParser { private TableScanResponseParser() {} static final String FILE_SCAN_TASKS = "file-scan-tasks"; static final String DELETE_FILES = "delete-files"; - public static List parseDeleteFiles(JsonNode node) { + public static List parseDeleteFiles( + JsonNode node, Map specsById) { if (node.has(DELETE_FILES)) { JsonNode deleteFiles = JsonUtil.get(DELETE_FILES, node); Preconditions.checkArgument( deleteFiles.isArray(), "Cannot parse delete files from non-array: %s", deleteFiles); ImmutableList.Builder deleteFilesBuilder = ImmutableList.builder(); for (JsonNode deleteFileNode : deleteFiles) { - DeleteFile deleteFile = - (DeleteFile) ContentFileParser.unboundContentFileFromJson(deleteFileNode); + DeleteFile deleteFile = (DeleteFile) ContentFileParser.fromJson(deleteFileNode, specsById); deleteFilesBuilder.add(deleteFile); } return deleteFilesBuilder.build(); @@ -60,14 +55,22 @@ public static List parseDeleteFiles(JsonNode node) { return null; } - public static List parseFileScanTasks(JsonNode node, List deleteFiles) { + public static List parseFileScanTasks( + JsonNode node, + List deleteFiles, + Map specsById, + boolean caseSensitive) { + // TODO: add assertions in the code to make sure all these are set + // before we start parsing. if (node.has(FILE_SCAN_TASKS)) { JsonNode scanTasks = JsonUtil.get(FILE_SCAN_TASKS, node); Preconditions.checkArgument( scanTasks.isArray(), "Cannot parse file scan tasks from non-array: %s", scanTasks); List fileScanTaskList = Lists.newArrayList(); for (JsonNode fileScanTaskNode : scanTasks) { - FileScanTask fileScanTask = RESTFileScanTaskParser.fromJson(fileScanTaskNode, deleteFiles); + FileScanTask fileScanTask = + RESTFileScanTaskParser.fromJson( + fileScanTaskNode, deleteFiles, specsById, caseSensitive); fileScanTaskList.add(fileScanTask); } @@ -91,8 +94,7 @@ public static void serializeScanTasks( for (int i = 0; i < deleteFiles.size(); i++) { DeleteFile deleteFile = deleteFiles.get(i); deleteFilePathToIndex.put(String.valueOf(deleteFile.path()), i); - ContentFileParser.unboundContentFileToJson( - deleteFiles.get(i), specsById.get(deleteFile.specId()), gen); + ContentFileParser.toJson(deleteFiles.get(i), specsById.get(deleteFile.specId()), gen); } gen.writeEndArray(); } diff --git a/core/src/main/java/org/apache/iceberg/UnboundBaseFileScanTask.java b/core/src/main/java/org/apache/iceberg/UnboundBaseFileScanTask.java deleted file mode 100644 index 9905e5be4c21..000000000000 --- a/core/src/main/java/org/apache/iceberg/UnboundBaseFileScanTask.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import org.apache.iceberg.expressions.Expression; -import org.apache.iceberg.expressions.ResidualEvaluator; - -class UnboundBaseFileScanTask extends BaseFileScanTask { - private UnboundGenericDataFile unboundDataFile; - private UnboundGenericDeleteFile[] unboundDeleteFiles; - private Expression filter; - - UnboundBaseFileScanTask( - UnboundGenericDataFile unboundDataFile, - UnboundGenericDeleteFile[] unboundDeleteFiles, - Expression filter) { - super(unboundDataFile, unboundDeleteFiles, null, null, ResidualEvaluator.unpartitioned(filter)); - this.unboundDataFile = unboundDataFile; - this.unboundDeleteFiles = unboundDeleteFiles; - this.filter = filter; - } - - @Override - public Schema schema() { - throw new UnsupportedOperationException("schema() is not supported in UnboundBaseFileScanTask"); - } - - @Override - public PartitionSpec spec() { - throw new UnsupportedOperationException("spec() is not supported in UnboundBaseFileScanTask"); - } - - public FileScanTask bind(PartitionSpec spec, boolean caseSensitive) { - GenericDataFile boundDataFile = unboundDataFile.bindToSpec(spec); - DeleteFile[] boundDeleteFiles = new DeleteFile[unboundDeleteFiles.length]; - for (int i = 0; i < unboundDeleteFiles.length; i++) { - boundDeleteFiles[i] = unboundDeleteFiles[i].bindToSpec(spec); - } - - String schemaString = SchemaParser.toJson(spec.schema()); - String specString = PartitionSpecParser.toJson(spec); - ResidualEvaluator boundResidual = ResidualEvaluator.of(spec, filter, caseSensitive); - - return new BaseFileScanTask( - boundDataFile, boundDeleteFiles, schemaString, specString, boundResidual); - } -} diff --git a/core/src/main/java/org/apache/iceberg/UnboundGenericDataFile.java b/core/src/main/java/org/apache/iceberg/UnboundGenericDataFile.java deleted file mode 100644 index 66f0d693cd0e..000000000000 --- a/core/src/main/java/org/apache/iceberg/UnboundGenericDataFile.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import com.fasterxml.jackson.databind.JsonNode; -import java.nio.ByteBuffer; -import java.util.List; - -/** - * An UnboundGenericDataFile is a GenericDataFile which keeps track of the raw partition value - * represented as JSON - */ -class UnboundGenericDataFile extends GenericDataFile { - private final JsonNode rawPartitionValue; - - UnboundGenericDataFile( - int specId, - String filePath, - FileFormat format, - JsonNode rawPartitionValue, - long fileSizeInBytes, - Metrics metrics, - ByteBuffer keyMetadata, - List splitOffsets, - Integer sortOrderId) { - super( - specId, - filePath, - format, - null, - fileSizeInBytes, - metrics, - keyMetadata, - splitOffsets, - sortOrderId, - -1L); // track row-lineage - this.rawPartitionValue = rawPartitionValue; - } - - GenericDataFile bindToSpec(PartitionSpec spec) { - return new GenericDataFile( - specId(), - path().toString(), - format(), - ContentFileParser.partitionDataFromRawValue(rawPartitionValue, spec), - fileSizeInBytes(), - new Metrics( - recordCount(), - columnSizes(), - valueCounts(), - nullValueCounts(), - nanValueCounts(), - lowerBounds(), - upperBounds()), - keyMetadata(), - splitOffsets(), - sortOrderId(), - -1L); - } -} diff --git a/core/src/main/java/org/apache/iceberg/UnboundGenericDeleteFile.java b/core/src/main/java/org/apache/iceberg/UnboundGenericDeleteFile.java deleted file mode 100644 index 57be45e27c5c..000000000000 --- a/core/src/main/java/org/apache/iceberg/UnboundGenericDeleteFile.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import com.fasterxml.jackson.databind.JsonNode; -import java.nio.ByteBuffer; -import java.util.List; - -/** - * An UnboundGenericDeleteFile is a GenericDeleteFile which keeps track of the raw partition value - * represented as JSON - */ -class UnboundGenericDeleteFile extends GenericDeleteFile { - private JsonNode rawPartitionValue; - - UnboundGenericDeleteFile( - int specId, - FileContent content, - String filePath, - FileFormat format, - JsonNode rawPartitionValue, - long fileSizeInBytes, - Metrics metrics, - int[] equalityFieldIds, - Integer sortOrderId, - List splitOffsets, - ByteBuffer keyMetadata) { - super( - specId, - content, - filePath, - format, - null, - fileSizeInBytes, - metrics, - equalityFieldIds, - sortOrderId, - splitOffsets, - keyMetadata, - null, - null, - null); - this.rawPartitionValue = rawPartitionValue; - } - - GenericDeleteFile bindToSpec(PartitionSpec spec) { - return new GenericDeleteFile( - specId(), - content(), - path().toString(), - format(), - ContentFileParser.partitionDataFromRawValue(rawPartitionValue, spec), - fileSizeInBytes(), - new Metrics( - recordCount(), - columnSizes(), - valueCounts(), - nullValueCounts(), - nanValueCounts(), - lowerBounds(), - upperBounds()), - equalityFieldIds().stream().mapToInt(Integer::intValue).toArray(), - sortOrderId(), - splitOffsets(), - keyMetadata(), - null, - null, - null); - } -} diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java index 9167f4233afd..37f703c5740d 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java @@ -28,8 +28,10 @@ import com.fasterxml.jackson.databind.SerializerProvider; import com.fasterxml.jackson.databind.module.SimpleModule; import java.io.IOException; +import java.util.Map; import org.apache.iceberg.MetadataUpdate; import org.apache.iceberg.MetadataUpdateParser; +import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; import org.apache.iceberg.SchemaParser; @@ -145,7 +147,6 @@ public static void registerAll(ObjectMapper mapper) { FetchPlanningResultResponse.class, new FetchPlanningResultResponseDeserializer<>()) .addSerializer(FetchScanTasksResponse.class, new FetchScanTaskResponseSerializer<>()) .addDeserializer(FetchScanTasksResponse.class, new FetchScanTaskResponseDeserializer<>()) - .addDeserializer(LoadTableResponse.class, new LoadTableResponseDeserializer<>()) .addSerializer(LoadCredentialsResponse.class, new LoadCredentialsResponseSerializer<>()) .addSerializer( ImmutableLoadCredentialsResponse.class, new LoadCredentialsResponseSerializer<>()) @@ -544,7 +545,13 @@ static class PlanTableScanResponseDeserializer @Override public T deserialize(JsonParser p, DeserializationContext context) throws IOException { JsonNode jsonNode = p.getCodec().readTree(p); - return (T) PlanTableScanResponseParser.fromJson(jsonNode); + // Retrieve injectable values + @SuppressWarnings("unchecked") + Map specsById = + (Map) context.findInjectableValue("specsById", null, null); + + boolean caseSensitive = (boolean) context.findInjectableValue("caseSensitive", null, null); + return (T) PlanTableScanResponseParser.fromJson(jsonNode, specsById, caseSensitive); } } @@ -562,7 +569,13 @@ static class FetchPlanningResultResponseDeserializer specsById = + (Map) context.findInjectableValue("specsById", null, null); + + boolean caseSensitive = (boolean) context.findInjectableValue("caseSensitive", null, null); + return (T) FetchPlanningResultResponseParser.fromJson(jsonNode, specsById, caseSensitive); } } @@ -580,7 +593,13 @@ static class FetchScanTaskResponseDeserializer @Override public T deserialize(JsonParser p, DeserializationContext context) throws IOException { JsonNode jsonNode = p.getCodec().readTree(p); - return (T) FetchScanTasksResponseParser.fromJson(jsonNode); + // Retrieve injectable values + @SuppressWarnings("unchecked") + Map specsById = + (Map) context.findInjectableValue("specsById", null, null); + + boolean caseSensitive = (boolean) context.findInjectableValue("caseSensitive", null, null); + return (T) FetchScanTasksResponseParser.fromJson(jsonNode, specsById, caseSensitive); } } } diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java index 50b545016028..33c396b6b5c9 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java @@ -36,12 +36,10 @@ import org.apache.iceberg.MetadataTableUtils; import org.apache.iceberg.MetadataUpdate; import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.RESTTable; import org.apache.iceberg.Schema; import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; -import org.apache.iceberg.TableOperations; import org.apache.iceberg.Transaction; import org.apache.iceberg.Transactions; import org.apache.iceberg.catalog.BaseViewSessionCatalog; @@ -110,9 +108,6 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog private static final String REST_SNAPSHOT_LOADING_MODE = "snapshot-loading-mode"; // for backwards compatibility with older REST servers where it can be assumed that a particular // server supports view endpoints but doesn't send the "endpoints" field in the ConfigResponse - public static final String REST_SERVER_PLANNING_ENABLED = "rest-server-planning-enabled"; - private static final String REST_TABLE_SCAN_PLANNING_PROPERTY = "table.rest-scan-planning"; - static final String VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported"; public static final String REST_PAGE_SIZE = "rest-page-size"; @@ -160,7 +155,6 @@ public class RESTSessionCatalog extends BaseViewSessionCatalog private FileIO io = null; private MetricsReporter reporter = null; private boolean reportingViaRestEnabled; - private boolean restServerPlanningEnabled; private Integer pageSize = null; private CloseableGroup closeables = null; private Set endpoints; @@ -259,9 +253,6 @@ public void initialize(String name, Map unresolved) { this.reportingViaRestEnabled = PropertyUtil.propertyAsBoolean(mergedProps, REST_METRICS_REPORTING_ENABLED, true); - - this.restServerPlanningEnabled = - PropertyUtil.propertyAsBoolean(mergedProps, REST_SERVER_PLANNING_ENABLED, false); super.initialize(name, mergedProps); } @@ -461,12 +452,6 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) { endpoints); trackFileIO(ops); - - RESTTable restTable = tableSupportsRemoteScanPlanning(ops, finalIdentifier, tableClient); - if (restTable != null) { - return restTable; - } - BaseTable table = new BaseTable( ops, @@ -479,26 +464,6 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) { return table; } - private RESTTable tableSupportsRemoteScanPlanning( - TableOperations ops, TableIdentifier finalIdentifier, RESTClient restClient) { - if (ops.current().properties().containsKey(REST_TABLE_SCAN_PLANNING_PROPERTY)) { - boolean tableSupportsRemotePlanning = - ops.current().propertyAsBoolean(REST_TABLE_SCAN_PLANNING_PROPERTY, false); - if (tableSupportsRemotePlanning && restServerPlanningEnabled) { - return new RESTTable( - ops, - fullTableName(finalIdentifier), - metricsReporter(paths.metrics(finalIdentifier), restClient), - this.client, - paths.table(finalIdentifier), - Map::of, - finalIdentifier, - paths); - } - } - return null; - } - private void trackFileIO(RESTTableOperations ops) { if (io != ops.io()) { fileIOTracker.track(ops); @@ -565,12 +530,6 @@ public Table registerTable( endpoints); trackFileIO(ops); - - RESTTable restTable = tableSupportsRemoteScanPlanning(ops, ident, tableClient); - if (restTable != null) { - return restTable; - } - return new BaseTable( ops, fullTableName(ident), metricsReporter(paths.metrics(ident), tableClient)); } @@ -827,14 +786,7 @@ public Table create() { tableFileIO(context, tableConf, response.credentials()), response.tableMetadata(), endpoints); - trackFileIO(ops); - - RESTTable restTable = tableSupportsRemoteScanPlanning(ops, ident, tableClient); - if (restTable != null) { - return restTable; - } - return new BaseTable( ops, fullTableName(ident), metricsReporter(paths.metrics(ident), tableClient)); } diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java index a5ac0cdcc212..179db81cbe53 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java @@ -22,8 +22,11 @@ import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; import java.util.List; +import java.util.Map; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.TableScanResponseParser; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; import org.apache.iceberg.util.JsonUtil; @@ -61,21 +64,27 @@ public static void toJson(FetchPlanningResultResponse response, JsonGenerator ge gen.writeEndObject(); } - public static FetchPlanningResultResponse fromJson(String json) { + public static FetchPlanningResultResponse fromJson( + String json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument(json != null, "Invalid response: fetchPanningResultResponse null"); - return JsonUtil.parse(json, FetchPlanningResultResponseParser::fromJson); + return JsonUtil.parse( + json, + node -> { + return fromJson(node, specsById, caseSensitive); + }); } - public static FetchPlanningResultResponse fromJson(JsonNode json) { + public static FetchPlanningResultResponse fromJson( + JsonNode json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument( json != null && !json.isEmpty(), "Invalid response: fetchPanningResultResponse null or empty"); PlanStatus planStatus = PlanStatus.fromName(JsonUtil.getString(PLAN_STATUS, json)); List planTasks = JsonUtil.getStringListOrNull(PLAN_TASKS, json); - List deleteFiles = TableScanResponseParser.parseDeleteFiles(json); + List deleteFiles = TableScanResponseParser.parseDeleteFiles(json, specsById); List fileScanTasks = - TableScanResponseParser.parseFileScanTasks(json, deleteFiles); + TableScanResponseParser.parseFileScanTasks(json, deleteFiles, specsById, caseSensitive); return FetchPlanningResultResponse.builder() .withPlanStatus(planStatus) .withPlanTasks(planTasks) diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java index eefd165c4960..455f12a70124 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java @@ -22,8 +22,11 @@ import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; import java.util.List; +import java.util.Map; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.TableScanResponseParser; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; @@ -56,18 +59,24 @@ public static void toJson(FetchScanTasksResponse response, JsonGenerator gen) th gen.writeEndObject(); } - public static FetchScanTasksResponse fromJson(String json) { + public static FetchScanTasksResponse fromJson( + String json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument(json != null, "Cannot parse fetchScanTasks response from null"); - return JsonUtil.parse(json, FetchScanTasksResponseParser::fromJson); + return JsonUtil.parse( + json, + node -> { + return fromJson(node, specsById, caseSensitive); + }); } - public static FetchScanTasksResponse fromJson(JsonNode json) { + public static FetchScanTasksResponse fromJson( + JsonNode json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument( json != null && !json.isEmpty(), "Invalid response: fetchScanTasksResponse null"); List planTasks = JsonUtil.getStringListOrNull(PLAN_TASKS, json); - List deleteFiles = TableScanResponseParser.parseDeleteFiles(json); + List deleteFiles = TableScanResponseParser.parseDeleteFiles(json, specsById); List fileScanTasks = - TableScanResponseParser.parseFileScanTasks(json, deleteFiles); + TableScanResponseParser.parseFileScanTasks(json, deleteFiles, specsById, caseSensitive); return FetchScanTasksResponse.builder() .withPlanTasks(planTasks) .withFileScanTasks(fileScanTasks) diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java index 25d8f11d9ac4..2943850fdf74 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java @@ -22,8 +22,11 @@ import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; import java.util.List; +import java.util.Map; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.TableScanResponseParser; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; import org.apache.iceberg.util.JsonUtil; @@ -66,13 +69,19 @@ public static void toJson(PlanTableScanResponse response, JsonGenerator gen) thr gen.writeEndObject(); } - public static PlanTableScanResponse fromJson(String json) { + public static PlanTableScanResponse fromJson( + String json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument( json != null, "Cannot parse planTableScan response from empty or null object"); - return JsonUtil.parse(json, PlanTableScanResponseParser::fromJson); + return JsonUtil.parse( + json, + node -> { + return PlanTableScanResponseParser.fromJson(node, specsById, caseSensitive); + }); } - public static PlanTableScanResponse fromJson(JsonNode json) { + public static PlanTableScanResponse fromJson( + JsonNode json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument( json != null && !json.isEmpty(), "Cannot parse planTableScan response from empty or null object"); @@ -80,9 +89,9 @@ public static PlanTableScanResponse fromJson(JsonNode json) { PlanStatus planStatus = PlanStatus.fromName(JsonUtil.getString(PLAN_STATUS, json)); String planId = JsonUtil.getStringOrNull(PLAN_ID, json); List planTasks = JsonUtil.getStringListOrNull(PLAN_TASKS, json); - List deleteFiles = TableScanResponseParser.parseDeleteFiles(json); + List deleteFiles = TableScanResponseParser.parseDeleteFiles(json, specsById); List fileScanTasks = - TableScanResponseParser.parseFileScanTasks(json, deleteFiles); + TableScanResponseParser.parseFileScanTasks(json, deleteFiles, specsById, caseSensitive); return PlanTableScanResponse.builder() .withPlanId(planId) diff --git a/core/src/test/java/org/apache/iceberg/TestBase.java b/core/src/test/java/org/apache/iceberg/TestBase.java index 4e9d66475940..30c1fb7191fd 100644 --- a/core/src/test/java/org/apache/iceberg/TestBase.java +++ b/core/src/test/java/org/apache/iceberg/TestBase.java @@ -102,7 +102,7 @@ public class TestBase { .withContentSizeInBytes(6) .build(); // Equality delete files. - public static final DeleteFile FILE_A2_DELETES = + static final DeleteFile FILE_A2_DELETES = FileMetadata.deleteFileBuilder(SPEC) .ofEqualityDeletes(1) .withPath("/path/to/data-a2-deletes.parquet") diff --git a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java index 759f2f8ecd0b..8c3f43df9e8a 100644 --- a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java +++ b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java @@ -25,6 +25,7 @@ import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Collections; +import java.util.Map; import java.util.stream.Stream; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.types.Comparators; @@ -51,7 +52,8 @@ public void testNullArguments() throws Exception { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid JSON generator: null"); - assertThatThrownBy(() -> ContentFileParser.fromJson(null, TestBase.SPEC)) + assertThatThrownBy( + () -> ContentFileParser.fromJson(null, Map.of(TestBase.SPEC.specId(), TestBase.SPEC))) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid JSON node for content file: null"); @@ -69,7 +71,8 @@ public void testDataFile(PartitionSpec spec, DataFile dataFile, String expectedJ String jsonStr = ContentFileParser.toJson(dataFile, spec); assertThat(jsonStr).isEqualTo(expectedJson); JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); - ContentFile deserializedContentFile = ContentFileParser.fromJson(jsonNode, spec); + ContentFile deserializedContentFile = + ContentFileParser.fromJson(jsonNode, Map.of(TestBase.SPEC.specId(), spec)); assertThat(deserializedContentFile).isInstanceOf(DataFile.class); assertContentFileEquals(dataFile, deserializedContentFile, spec); } @@ -81,7 +84,8 @@ public void testDeleteFile(PartitionSpec spec, DeleteFile deleteFile, String exp String jsonStr = ContentFileParser.toJson(deleteFile, spec); assertThat(jsonStr).isEqualTo(expectedJson); JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); - ContentFile deserializedContentFile = ContentFileParser.fromJson(jsonNode, spec); + ContentFile deserializedContentFile = + ContentFileParser.fromJson(jsonNode, Map.of(spec.specId(), TestBase.SPEC)); assertThat(deserializedContentFile).isInstanceOf(DeleteFile.class); assertContentFileEquals(deleteFile, deserializedContentFile, spec); } diff --git a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java index fd9fc306b168..c2fd24856fb2 100644 --- a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java +++ b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java @@ -43,7 +43,6 @@ import org.apache.iceberg.FilesTable; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.HistoryEntry; -import org.apache.iceberg.PartitionData; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.ReachableFileUtil; import org.apache.iceberg.ReplaceSortOrder; @@ -157,16 +156,6 @@ public abstract class CatalogTests { .withRecordCount(2) // needs at least one record or else metrics will filter it out .build(); - protected static final Namespace REST_DB = Namespace.of("restDB"); - public static final TableIdentifier TABLE_COMPLETED_WITH_FILE_SCAN_TASK = - TableIdentifier.of(REST_DB, "table_completed_with_file_scan_task"); - public static final TableIdentifier TABLE_SUBMITTED_WITH_FILE_SCAN_TASK = - TableIdentifier.of(REST_DB, "table_submitted_with_file_scan_task"); - public static final TableIdentifier TABLE_COMPLETED_WITH_PLAN_TASK = - TableIdentifier.of(REST_DB, "table_completed_with_plan_task"); - public static final TableIdentifier TABLE_COMPLETED_WITH_NESTED_PLAN_TASK = - TableIdentifier.of(REST_DB, "table_completed_with_nested_plan_task"); - protected abstract C catalog(); protected abstract C initCatalog(String catalogName, Map additionalProperties); @@ -3336,34 +3325,4 @@ private List concat(List starting, Namespace... additional namespaces.addAll(Arrays.asList(additional)); return namespaces; } - - public void assertBoundFileScanTasks(Table table, PartitionSpec partitionSpec) { - PartitionData partitionData = new PartitionData(partitionSpec.partitionType()); - try (CloseableIterable tasks = table.newScan().planFiles()) { - Streams.stream(tasks) - .forEach( - task -> { - // assert file scan task spec being bound - assertThat(task.spec().equals(partitionSpec)); - // assert data file spec being bound - assertThat(task.file().partition().equals(partitionData)); - // assert all delete files in task are bound - task.deletes() - .forEach( - deleteFile -> assertThat(deleteFile.partition().equals(partitionData))); - }); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - public void assertBoundFiles(Table table, DataFile dataFile) { - try (CloseableIterable tasks = table.newScan().planFiles()) { - Streams.stream(tasks) - .map(FileScanTask::file) - .forEach(file -> assertThat(file.partition()).isEqualTo(dataFile.partition())); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } } diff --git a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java index 314d80387c41..0f7e76b81e49 100644 --- a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java +++ b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java @@ -18,25 +18,17 @@ */ package org.apache.iceberg.rest; -import static org.apache.iceberg.catalog.CatalogTests.TABLE_COMPLETED_WITH_FILE_SCAN_TASK; -import static org.apache.iceberg.catalog.CatalogTests.TABLE_COMPLETED_WITH_NESTED_PLAN_TASK; -import static org.apache.iceberg.catalog.CatalogTests.TABLE_COMPLETED_WITH_PLAN_TASK; -import static org.apache.iceberg.catalog.CatalogTests.TABLE_SUBMITTED_WITH_FILE_SCAN_TASK; - import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; import java.net.URI; import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.UUID; import java.util.function.Consumer; import java.util.stream.Collectors; import org.apache.iceberg.BaseTable; import org.apache.iceberg.BaseTransaction; -import org.apache.iceberg.FileScanTask; import org.apache.iceberg.Table; -import org.apache.iceberg.TableScan; import org.apache.iceberg.Transaction; import org.apache.iceberg.Transactions; import org.apache.iceberg.catalog.Catalog; @@ -57,19 +49,15 @@ import org.apache.iceberg.exceptions.RESTException; import org.apache.iceberg.exceptions.UnprocessableEntityException; import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.relocated.com.google.common.base.Splitter; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.rest.HTTPRequest.HTTPMethod; import org.apache.iceberg.rest.auth.AuthSession; import org.apache.iceberg.rest.requests.CommitTransactionRequest; import org.apache.iceberg.rest.requests.CreateNamespaceRequest; import org.apache.iceberg.rest.requests.CreateTableRequest; import org.apache.iceberg.rest.requests.CreateViewRequest; -import org.apache.iceberg.rest.requests.FetchScanTasksRequest; -import org.apache.iceberg.rest.requests.PlanTableScanRequest; import org.apache.iceberg.rest.requests.RegisterTableRequest; import org.apache.iceberg.rest.requests.RenameTableRequest; import org.apache.iceberg.rest.requests.ReportMetricsRequest; @@ -78,15 +66,12 @@ import org.apache.iceberg.rest.responses.ConfigResponse; import org.apache.iceberg.rest.responses.CreateNamespaceResponse; import org.apache.iceberg.rest.responses.ErrorResponse; -import org.apache.iceberg.rest.responses.FetchPlanningResultResponse; -import org.apache.iceberg.rest.responses.FetchScanTasksResponse; import org.apache.iceberg.rest.responses.GetNamespaceResponse; import org.apache.iceberg.rest.responses.ListNamespacesResponse; import org.apache.iceberg.rest.responses.ListTablesResponse; import org.apache.iceberg.rest.responses.LoadTableResponse; import org.apache.iceberg.rest.responses.LoadViewResponse; import org.apache.iceberg.rest.responses.OAuthTokenResponse; -import org.apache.iceberg.rest.responses.PlanTableScanResponse; import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; import org.apache.iceberg.util.Pair; import org.apache.iceberg.util.PropertyUtil; @@ -116,8 +101,6 @@ public class RESTCatalogAdapter extends BaseHTTPClient { private final Catalog catalog; private final SupportsNamespaces asNamespaceCatalog; private final ViewCatalog asViewCatalog; - private Map> planToFileScanTasks; - private Map planToPlanTasks; private AuthSession authSession = AuthSession.EMPTY; @@ -126,8 +109,6 @@ public RESTCatalogAdapter(Catalog catalog) { this.asNamespaceCatalog = catalog instanceof SupportsNamespaces ? (SupportsNamespaces) catalog : null; this.asViewCatalog = catalog instanceof ViewCatalog ? (ViewCatalog) catalog : null; - this.planToFileScanTasks = Maps.newHashMap(); - this.planToPlanTasks = Maps.newHashMap(); } enum Route { @@ -158,21 +139,6 @@ enum Route { LoadTableResponse.class), TABLE_EXISTS(HTTPMethod.HEAD, ResourcePaths.V1_TABLE), LOAD_TABLE(HTTPMethod.GET, ResourcePaths.V1_TABLE, null, LoadTableResponse.class), - PLAN_TABLE_SCAN( - HTTPMethod.POST, - "/v1/{prefix}/namespaces/{namespace}/tables/{table}/plan", - PlanTableScanRequest.class, - PlanTableScanResponse.class), - FETCH_PLANNING_RESULT( - HTTPMethod.GET, - "/v1/{prefix}/namespaces/{namespace}/tables/{table}/plan/{plan-id}", - null, - FetchPlanningResultResponse.class), - FETCH_SCAN_TASKS( - HTTPMethod.POST, - "/v1/{prefix}/namespaces/{namespace}/tables/{table}/tasks", - FetchScanTasksRequest.class, - FetchScanTasksResponse.class), REGISTER_TABLE( HTTPMethod.POST, ResourcePaths.V1_TABLE_REGISTER, @@ -567,154 +533,6 @@ public T handleRequest( break; } - case PLAN_TABLE_SCAN: - { - TableIdentifier ident = tableIdentFromPathVars(vars); - PlanTableScanRequest request = castRequest(PlanTableScanRequest.class, body); - Table table = catalog.loadTable(ident); - TableScan tableScan = table.newScan(); - - if (request.snapshotId() != null) { - tableScan.useSnapshot(request.snapshotId()); - } - if (request.select() != null) { - tableScan.select(request.select()); - } - if (request.filter() != null) { - tableScan.filter(request.filter()); - } - if (request.statsFields() != null) { - tableScan.includeColumnStats(request.statsFields()); - } - tableScan.caseSensitive(request.caseSensitive()); - - List fileScanTasks = Lists.newArrayList(); - CloseableIterable returnedTasks = tableScan.planFiles(); - returnedTasks.forEach(task -> fileScanTasks.add(task)); - - if (ident.equals(TABLE_COMPLETED_WITH_FILE_SCAN_TASK)) { - return castResponse( - responseType, - PlanTableScanResponse.builder() - .withPlanStatus(PlanStatus.COMPLETED) - .withFileScanTasks(fileScanTasks) - .withSpecsById(table.specs()) - .build()); - } - - if (ident.equals(TABLE_SUBMITTED_WITH_FILE_SCAN_TASK)) { - // this is the case where we return a plan-id, then call fetchPlanningResult to get the - // tasks at a later point - String planId = "plan-id-" + UUID.randomUUID(); - planToFileScanTasks.put(planId, fileScanTasks); - return castResponse( - responseType, - PlanTableScanResponse.builder() - .withPlanId(planId) - .withPlanStatus(PlanStatus.SUBMITTED) - .withSpecsById(table.specs()) - .build()); - } - - if (ident.equals(TABLE_COMPLETED_WITH_PLAN_TASK)) { - // this is the case where we return a list of plan-task, and then call fetchScanTasks - // for each - List planTasks = - List.of("plan-task-" + UUID.randomUUID(), "plan-task-" + UUID.randomUUID()); - planTasks.forEach(task -> planToFileScanTasks.put(task, fileScanTasks)); - return castResponse( - responseType, - PlanTableScanResponse.builder() - .withPlanStatus(PlanStatus.COMPLETED) - .withPlanTasks(planTasks) - .withSpecsById(table.specs()) - .build()); - } - - if (ident.equals(TABLE_COMPLETED_WITH_NESTED_PLAN_TASK)) { - // this is the case where our plan tasks, can return additional plan tasks, and those - // can return file scan tasks. - List outerPlanTasks = - List.of( - "outer-plan-task-" + UUID.randomUUID(), "outer-plan-task-" + UUID.randomUUID()); - List innerPlanTasks = - List.of( - "inner-plan-task-" + UUID.randomUUID(), "inner-plan-task-" + UUID.randomUUID()); - - for (int i = 0; i < outerPlanTasks.size(); i++) { - planToPlanTasks.put(outerPlanTasks.get(i), innerPlanTasks.get(i)); - planToFileScanTasks.put(innerPlanTasks.get(i), fileScanTasks); - } - - return castResponse( - responseType, - PlanTableScanResponse.builder() - .withPlanStatus(PlanStatus.COMPLETED) - .withPlanTasks(outerPlanTasks) - .withSpecsById(table.specs()) - .build()); - } - break; - } - - case FETCH_PLANNING_RESULT: - { - TableIdentifier ident = tableIdentFromPathVars(vars); - Table table = catalog.loadTable(ident); - if (ident.equals(TABLE_SUBMITTED_WITH_FILE_SCAN_TASK)) { - String planId = planIDFromPathVars(vars); - return castResponse( - responseType, - FetchPlanningResultResponse.builder() - .withPlanStatus(PlanStatus.fromName("completed")) - .withFileScanTasks(planToFileScanTasks.get(planId)) - .withSpecsById(table.specs()) - .build()); - } - break; - } - - case FETCH_SCAN_TASKS: - { - TableIdentifier ident = tableIdentFromPathVars(vars); - Table table = catalog.loadTable(ident); - FetchScanTasksRequest request = castRequest(FetchScanTasksRequest.class, body); - if (ident.equals(TABLE_COMPLETED_WITH_PLAN_TASK)) { - return castResponse( - responseType, - FetchScanTasksResponse.builder() - .withFileScanTasks(planToFileScanTasks.get(request.planTask())) - .withSpecsById(table.specs()) - .build()); - } - - if (ident.equals(TABLE_COMPLETED_WITH_NESTED_PLAN_TASK)) { - // this is the case where we return another round of nested plan tasks - if (planToPlanTasks.containsKey(request.planTask())) { - String innerPlanTask = planToPlanTasks.remove(request.planTask()); - return castResponse( - responseType, - FetchScanTasksResponse.builder() - .withPlanTasks(List.of(innerPlanTask)) - .withSpecsById(table.specs()) - .build()); - } - - if (planToFileScanTasks.containsKey(request.planTask())) { - // this is the case where we get from nested plan tasks the file scan tasks - List fileScanTasksFromPlanTask = - planToFileScanTasks.remove(request.planTask()); - return castResponse( - responseType, - FetchScanTasksResponse.builder() - .withFileScanTasks(fileScanTasksFromPlanTask) - .withSpecsById(table.specs()) - .build()); - } - } - break; - } - default: if (responseType == OAuthTokenResponse.class) { return castResponse(responseType, handleOAuthRequest(body)); @@ -886,8 +704,4 @@ private static TableIdentifier viewIdentFromPathVars(Map pathVar return TableIdentifier.of( namespaceFromPathVars(pathVars), RESTUtil.decodeString(pathVars.get("view"))); } - - private static String planIDFromPathVars(Map pathVars) { - return RESTUtil.decodeString(pathVars.get("plan-id")); - } } diff --git a/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java b/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java index 61f550860e12..aa06319add7c 100644 --- a/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java +++ b/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java @@ -90,12 +90,12 @@ import org.assertj.core.api.InstanceOfAssertFactories; import org.awaitility.Awaitility; import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.server.handler.gzip.GzipHandler; import org.eclipse.jetty.servlet.ServletContextHandler; import org.eclipse.jetty.servlet.ServletHolder; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; @@ -195,26 +195,32 @@ protected RESTCatalog initCatalog(String catalogName, Map additi .withHeaders(RESTUtil.configHeaders(config)) .build()); catalog.setConf(conf); - Map properties = - ImmutableMap.of( - CatalogProperties.URI, - httpServer.getURI().toString(), - CatalogProperties.FILE_IO_IMPL, - "org.apache.iceberg.inmemory.InMemoryFileIO", - CatalogProperties.TABLE_DEFAULT_PREFIX + "default-key1", - "catalog-default-key1", - CatalogProperties.TABLE_DEFAULT_PREFIX + "default-key2", - "catalog-default-key2", - CatalogProperties.TABLE_DEFAULT_PREFIX + "override-key3", - "catalog-default-key3", - CatalogProperties.TABLE_OVERRIDE_PREFIX + "override-key3", - "catalog-override-key3", - CatalogProperties.TABLE_OVERRIDE_PREFIX + "override-key4", - "catalog-override-key4", - "credential", - "catalog:12345", - "header.test-header", - "test-value"); + int port = ((ServerConnector) httpServer.getConnectors()[0]).getLocalPort(); + String oauth2ServerUri = "http://127.0.0.1:" + port + "/v1/oauth/tokens"; + + Map properties = Maps.newHashMap(); + + properties.put(CatalogProperties.URI, "http://127.0.0.1:" + port); + properties.put("oauth2-server-uri", oauth2ServerUri); + properties.put("rest.auth.type", "oauth2"); + properties.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.inmemory.InMemoryFileIO"); + + properties.put(CatalogProperties.TABLE_DEFAULT_PREFIX + "default-key1", "catalog-default-key1"); + properties.put(CatalogProperties.TABLE_DEFAULT_PREFIX + "default-key2", "catalog-default-key2"); + properties.put( + CatalogProperties.TABLE_DEFAULT_PREFIX + "override-key3", "catalog-default-key3"); + + properties.put( + CatalogProperties.TABLE_OVERRIDE_PREFIX + "override-key3", "catalog-override-key3"); + properties.put( + CatalogProperties.TABLE_OVERRIDE_PREFIX + "override-key4", "catalog-override-key4"); + + properties.put("credential", "catalog:12345"); + properties.put("header.test-header", "test-value"); + + properties.put("oauth2-server-uri", oauth2ServerUri); + properties.put("rest.auth.type", "oauth2"); // optional but avoids warnings + catalog.initialize( catalogName, ImmutableMap.builder() @@ -355,10 +361,11 @@ public void testInitializeWithBadArguments() throws IOException { @Test public void testDefaultHeadersPropagated() { RESTCatalog catalog = new RESTCatalog(); + int port = ((ServerConnector) httpServer.getConnectors()[0]).getLocalPort(); Map properties = Map.of( CatalogProperties.URI, - httpServer.getURI().toString(), + "http://127.0.0.1:" + port, OAuth2Properties.CREDENTIAL, "catalog:secret", "header.test-header", @@ -2842,75 +2849,6 @@ public void testTableExistsFallbackToGETRequestWithLegacyServer() { verifyTableExistsFallbackToGETRequest(ConfigResponse.builder().build()); } - @Test - @Disabled - public void testPlanTableScanWithCompletedStatusAndFileScanTask() throws IOException { - Table table = createRESTTableAndInsertData(TABLE_COMPLETED_WITH_FILE_SCAN_TASK); - assertBoundFileScanTasks(table, SPEC); - } - - @Test - @Disabled - public void testPlanTableScanAndFetchPlanningResultWithSubmittedStatusAndFileScanTask() - throws IOException { - Table table = createRESTTableAndInsertData(TABLE_SUBMITTED_WITH_FILE_SCAN_TASK); - assertBoundFileScanTasks(table, SPEC); - } - - @Test - @Disabled - public void testPlanTableScanAndFetchScanTasksWithCompletedStatusAndPlanTask() - throws IOException { - Table table = createRESTTableAndInsertData(TABLE_COMPLETED_WITH_PLAN_TASK); - assertBoundFileScanTasks(table, SPEC); - } - - @Test - @Disabled - public void testPlanTableScanAndFetchScanTasksWithCompletedStatusAndNestedPlanTasks() - throws IOException { - Table table = createRESTTableAndInsertData(TABLE_COMPLETED_WITH_NESTED_PLAN_TASK); - assertBoundFileScanTasks(table, SPEC); - } - - public Table createRESTTableAndInsertData(TableIdentifier tableIdentifier) throws IOException { - SessionCatalog.SessionContext context = - new SessionCatalog.SessionContext( - UUID.randomUUID().toString(), - "user", - ImmutableMap.of("credential", "user:12345"), - ImmutableMap.of()); - RESTCatalog catalog = - new RESTCatalog( - context, - (config) -> HTTPClient.builder(config).uri(config.get(CatalogProperties.URI)).build()); - catalog.initialize( - "test", - ImmutableMap.of( - RESTSessionCatalog.REST_SERVER_PLANNING_ENABLED, - "true", - CatalogProperties.URI, - httpServer.getURI().toString(), - CatalogProperties.FILE_IO_IMPL, - "org.apache.iceberg.inmemory.InMemoryFileIO", - "credential", - "catalog:secret")); - - if (requiresNamespaceCreate()) { - catalog.createNamespace(tableIdentifier.namespace()); - } - - Table table = - catalog - .buildTable(tableIdentifier, SCHEMA) - .withProperty("table.rest-scan-planning", "true") - .withPartitionSpec(SPEC) - .create(); - - table.newAppend().appendFile(FILE_A).commit(); - return table; - } - private RESTCatalog catalog(RESTCatalogAdapter adapter) { RESTCatalog catalog = new RESTCatalog(SessionCatalog.SessionContext.createEmpty(), (config) -> adapter); diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java index 215ea1481da0..7f9035f6165a 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java @@ -26,7 +26,13 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonFactoryBuilder; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; import java.util.List; import org.apache.iceberg.BaseFileScanTask; import org.apache.iceberg.DeleteFile; @@ -36,17 +42,34 @@ import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.rest.PlanStatus; +import org.apache.iceberg.rest.RESTSerializers; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; public class TestFetchPlanningResultResponseParser { + private static final JsonFactory FACTORY = + new JsonFactoryBuilder() + .configure(JsonFactory.Feature.INTERN_FIELD_NAMES, false) + .configure(JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false) + .build(); + private static final ObjectMapper MAPPER = new ObjectMapper(FACTORY); + + @BeforeEach + public void before() { + RESTSerializers.registerAll(MAPPER); + } + @Test public void nullAndEmptyCheck() { assertThatThrownBy(() -> FetchPlanningResultResponseParser.toJson(null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: fetchPanningResultResponse null"); - assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson((JsonNode) null)) + assertThatThrownBy( + () -> + FetchPlanningResultResponseParser.fromJson( + (JsonNode) null, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: fetchPanningResultResponse null or empty"); } @@ -61,7 +84,9 @@ public void roundTripSerdeWithEmptyObject() { .hasMessage("Invalid status: null"); String emptyJson = "{ }"; - assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson(emptyJson)) + assertThatThrownBy( + () -> + FetchPlanningResultResponseParser.fromJson(emptyJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: fetchPanningResultResponse null or empty"); } @@ -69,7 +94,10 @@ public void roundTripSerdeWithEmptyObject() { @Test public void roundTripSerdeWithInvalidPlanStatus() { String invalidStatusJson = "{\"plan-status\": \"someStatus\"}"; - assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson(invalidStatusJson)) + assertThatThrownBy( + () -> + FetchPlanningResultResponseParser.fromJson( + invalidStatusJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid status name: someStatus"); } @@ -84,7 +112,8 @@ public void roundTripSerdeWithValidSubmittedStatus() { String json = FetchPlanningResultResponseParser.toJson(response); assertThat(json).isEqualTo(expectedJson); - FetchPlanningResultResponse fromResponse = FetchPlanningResultResponseParser.fromJson(json); + FetchPlanningResultResponse fromResponse = + FetchPlanningResultResponseParser.fromJson(json, PARTITION_SPECS_BY_ID, false); assertThat(FetchPlanningResultResponseParser.toJson(fromResponse)).isEqualTo(expectedJson); } @@ -103,20 +132,25 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithTasksPresent() { String invalidJson = "{\"plan-status\":\"submitted\"," + "\"plan-tasks\":[\"task1\",\"task2\"]}"; - assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson(invalidJson)) + assertThatThrownBy( + () -> + FetchPlanningResultResponseParser.fromJson( + invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: tasks can only be returned in a 'completed' status"); } @Test public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileScanTasksPresent() { + PlanStatus planStatus = PlanStatus.fromName("submitted"); assertThatThrownBy( - () -> - FetchPlanningResultResponse.builder() - .withPlanStatus(planStatus) - .withDeleteFiles(List.of(FILE_A_DELETES)) - .build()) + () -> { + FetchPlanningResultResponse.builder() + .withPlanStatus(planStatus) + .withDeleteFiles(List.of(FILE_A_DELETES)) + .build(); + }) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); @@ -128,14 +162,17 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileSca + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; - assertThatThrownBy(() -> FetchPlanningResultResponseParser.fromJson(invalidJson)) + assertThatThrownBy( + () -> + FetchPlanningResultResponseParser.fromJson( + invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); } @Test - public void roundTripSerdeWithValidStatusAndFileScanTasks() { + public void roundTripSerdeWithValidStatusAndFileScanTasks() throws JsonProcessingException { ResidualEvaluator residualEvaluator = ResidualEvaluator.of(SPEC, Expressions.equal("id", 1), true); FileScanTask fileScanTask = @@ -172,22 +209,13 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() { String json = FetchPlanningResultResponseParser.toJson(response, false); assertThat(json).isEqualTo(expectedToJson); - // make an unbound json where you expect to not have partitions for the data file, - // delete files as service does not send partition spec - String expectedFromJson = - "{\"plan-status\":\"completed\"," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," - + "\"partition\":{},\"file-size-in-bytes\":10,\"record-count\":1}]," - + "\"file-scan-tasks\":[" - + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":{}," - + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," - + "\"delete-file-references\":[0]," - + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" - + "}"; - - FetchPlanningResultResponse fromResponse = FetchPlanningResultResponseParser.fromJson(json); + // use RESTObjectMapper to read this + InjectableValues.Std injectableValues = new InjectableValues.Std(); + injectableValues.addValue("specsById", PARTITION_SPECS_BY_ID); + injectableValues.addValue("caseSensitive", false); + ObjectReader objectReader = + MAPPER.readerFor(FetchPlanningResultResponse.class).with(injectableValues); + FetchPlanningResultResponse fromResponse = objectReader.readValue(json); // Need to make a new response with partitionSpec set FetchPlanningResultResponse copyResponse = FetchPlanningResultResponse.builder() @@ -201,6 +229,6 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() { // can't do an equality comparison on PlanTableScanRequest because we don't implement // equals/hashcode assertThat(FetchPlanningResultResponseParser.toJson(copyResponse, false)) - .isEqualTo(expectedFromJson); + .isEqualTo(expectedToJson); } } diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java index 4ddfbd1fcced..2399e82b100a 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java @@ -45,7 +45,10 @@ public void nullAndEmptyCheck() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: fetchScanTasksResponse null"); - assertThatThrownBy(() -> FetchScanTasksResponseParser.fromJson((JsonNode) null)) + assertThatThrownBy( + () -> + FetchScanTasksResponseParser.fromJson( + (JsonNode) null, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: fetchScanTasksResponse null"); } @@ -58,7 +61,8 @@ public void roundTripSerdeWithEmptyObject() { .hasMessage("Invalid response: planTasks and fileScanTask cannot both be null"); String emptyJson = "{ }"; - assertThatThrownBy(() -> FetchScanTasksResponseParser.fromJson(emptyJson)) + assertThatThrownBy( + () -> FetchScanTasksResponseParser.fromJson(emptyJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: fetchScanTasksResponse null"); } @@ -71,7 +75,8 @@ public void roundTripSerdeWithPlanTasks() { FetchScanTasksResponse.builder().withPlanTasks(List.of("task1", "task2")).build()); assertThat(json).isEqualTo(expectedJson); - FetchScanTasksResponse fromResponse = FetchScanTasksResponseParser.fromJson(json); + FetchScanTasksResponse fromResponse = + FetchScanTasksResponseParser.fromJson(json, PARTITION_SPECS_BY_ID, false); // can't do an equality comparison on PlanTableScanRequest because we don't implement // equals/hashcode @@ -97,7 +102,8 @@ public void roundTripSerdeWithDeleteFilesNoFileScanTasksPresent() { + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; - assertThatThrownBy(() -> FetchScanTasksResponseParser.fromJson(invalidJson)) + assertThatThrownBy( + () -> FetchScanTasksResponseParser.fromJson(invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); @@ -139,22 +145,8 @@ public void roundTripSerdeWithFileScanTasks() { String json = FetchScanTasksResponseParser.toJson(response, false); assertThat(json).isEqualTo(expectedToJson); - // make an unbound json where you expect to not have partitions for the data file, - // delete files as service does not send parition spec - String expectedFromJson = - "{" - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," - + "\"partition\":{},\"file-size-in-bytes\":10,\"record-count\":1}]," - + "\"file-scan-tasks\":[" - + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":{}," - + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," - + "\"delete-file-references\":[0]," - + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" - + "}"; - - FetchScanTasksResponse fromResponse = FetchScanTasksResponseParser.fromJson(json); + FetchScanTasksResponse fromResponse = + FetchScanTasksResponseParser.fromJson(json, PARTITION_SPECS_BY_ID, false); // Need to make a new response with partitionSpec set FetchScanTasksResponse copyResponse = FetchScanTasksResponse.builder() @@ -166,7 +158,6 @@ public void roundTripSerdeWithFileScanTasks() { // can't do an equality comparison on PlanTableScanRequest because we don't implement // equals/hashcode - assertThat(FetchScanTasksResponseParser.toJson(copyResponse, false)) - .isEqualTo(expectedFromJson); + assertThat(FetchScanTasksResponseParser.toJson(copyResponse, false)).isEqualTo(expectedToJson); } } diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java index ef39e47bdc31..315898d32e28 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java @@ -39,13 +39,16 @@ import org.junit.jupiter.api.Test; public class TestPlanTableScanResponseParser { + @Test public void nullAndEmptyCheck() { assertThatThrownBy(() -> PlanTableScanResponseParser.toJson(null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: planTableScanResponse null"); - assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson((JsonNode) null)) + assertThatThrownBy( + () -> + PlanTableScanResponseParser.fromJson((JsonNode) null, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse planTableScan response from empty or null object"); } @@ -58,7 +61,8 @@ public void roundTripSerdeWithEmptyObject() { .hasMessage("Invalid response: plan status must be defined"); String emptyJson = "{ }"; - assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(emptyJson)) + assertThatThrownBy( + () -> PlanTableScanResponseParser.fromJson(emptyJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse planTableScan response from empty or null object"); } @@ -66,7 +70,10 @@ public void roundTripSerdeWithEmptyObject() { @Test public void roundTripSerdeWithInvalidPlanStatus() { String invalidStatusJson = "{\"plan-status\": \"someStatus\"}"; - assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidStatusJson)) + assertThatThrownBy( + () -> + PlanTableScanResponseParser.fromJson( + invalidStatusJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid status name: someStatus"); } @@ -80,7 +87,8 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithoutPlanId() { .hasMessage("Invalid response: plan id should be defined when status is 'submitted'"); String invalidJson = "{\"plan-status\":\"submitted\"}"; - assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + assertThatThrownBy( + () -> PlanTableScanResponseParser.fromJson(invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: plan id should be defined when status is 'submitted'"); } @@ -93,7 +101,8 @@ public void roundTripSerdeWithInvalidPlanStatusCancelled() { .hasMessage("Invalid response: 'cancelled' is not a valid status for planTableScan"); String invalidJson = "{\"plan-status\":\"cancelled\"}"; - assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + assertThatThrownBy( + () -> PlanTableScanResponseParser.fromJson(invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: 'cancelled' is not a valid status for planTableScan"); } @@ -116,7 +125,8 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithTasksPresent() { + "\"plan-id\":\"somePlanId\"," + "\"plan-tasks\":[\"task1\",\"task2\"]}"; - assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + assertThatThrownBy( + () -> PlanTableScanResponseParser.fromJson(invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: tasks can only be returned in a 'completed' status"); } @@ -135,7 +145,8 @@ public void roundTripSerdeWithInvalidPlanIdWithIncorrectStatus() { String invalidJson = "{\"plan-status\":\"failed\"," + "\"plan-id\":\"somePlanId\"}"; - assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + assertThatThrownBy( + () -> PlanTableScanResponseParser.fromJson(invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid response: plan id can only be returned in a 'submitted' status"); } @@ -162,7 +173,8 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileSca + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; - assertThatThrownBy(() -> PlanTableScanResponseParser.fromJson(invalidJson)) + assertThatThrownBy( + () -> PlanTableScanResponseParser.fromJson(invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); @@ -205,20 +217,8 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() { String json = PlanTableScanResponseParser.toJson(response); assertThat(json).isEqualTo(expectedToJson); - String expectedFromJson = - "{\"plan-status\":\"completed\"," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," - + "\"partition\":{},\"file-size-in-bytes\":10,\"record-count\":1}]," - + "\"file-scan-tasks\":[" - + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":{}," - + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," - + "\"delete-file-references\":[0]," - + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" - + "}"; - - PlanTableScanResponse fromResponse = PlanTableScanResponseParser.fromJson(json); + PlanTableScanResponse fromResponse = + PlanTableScanResponseParser.fromJson(json, PARTITION_SPECS_BY_ID, false); PlanTableScanResponse copyResponse = PlanTableScanResponse.builder() .withPlanStatus(fromResponse.planStatus()) @@ -231,6 +231,6 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() { // can't do an equality comparison on PlanTableScanRequest because we don't implement // equals/hashcode - assertThat(PlanTableScanResponseParser.toJson(copyResponse)).isEqualTo(expectedFromJson); + assertThat(PlanTableScanResponseParser.toJson(copyResponse)).isEqualTo(expectedToJson); } } From 603a43fce36d52b69c6603b6e05cde2c00731e76 Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Wed, 25 Jun 2025 18:14:03 -0700 Subject: [PATCH 3/9] remove the unnecessary code --- .../iceberg/RESTFileScanTaskParser.java | 9 ++-- .../iceberg/TableScanResponseParser.java | 2 - .../iceberg/rest/RESTSessionCatalog.java | 4 ++ .../apache/iceberg/rest/ResourcePaths.java | 34 ------------- .../apache/iceberg/rest/TestRESTCatalog.java | 49 ++++++++----------- 5 files changed, 29 insertions(+), 69 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java index ef57d2d1c991..56d16c395478 100644 --- a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java @@ -35,7 +35,7 @@ public class RESTFileScanTaskParser { private static final String DATA_FILE = "data-file"; private static final String DELETE_FILE_REFERENCES = "delete-file-references"; - private static final String RESIDUAL = "residual-filter"; + private static final String RESIDUAL_FILTER = "residual-filter"; private RESTFileScanTaskParser() {} @@ -56,7 +56,7 @@ public static void toJson( } if (fileScanTask.residual() != null) { - generator.writeFieldName(RESIDUAL); + generator.writeFieldName(RESIDUAL_FILTER); ExpressionParser.toJson(fileScanTask.residual(), generator); } generator.writeEndObject(); @@ -73,7 +73,6 @@ public static FileScanTask fromJson( DataFile dataFile = (DataFile) ContentFileParser.fromJson(JsonUtil.get(DATA_FILE, jsonNode), specsById); - // specId from the dataFile int specId = dataFile.specId(); DeleteFile[] deleteFiles = null; @@ -87,8 +86,8 @@ public static FileScanTask fromJson( } Expression filter = null; - if (jsonNode.has(RESIDUAL)) { - filter = ExpressionParser.fromJson(jsonNode.get(RESIDUAL)); + if (jsonNode.has(RESIDUAL_FILTER)) { + filter = ExpressionParser.fromJson(jsonNode.get(RESIDUAL_FILTER)); } String schemaString = SchemaParser.toJson(specsById.get(specId).schema()); diff --git a/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java index e66f683ef60e..9c76a99d6684 100644 --- a/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java @@ -60,8 +60,6 @@ public static List parseFileScanTasks( List deleteFiles, Map specsById, boolean caseSensitive) { - // TODO: add assertions in the code to make sure all these are set - // before we start parsing. if (node.has(FILE_SCAN_TASKS)) { JsonNode scanTasks = JsonUtil.get(FILE_SCAN_TASKS, node); Preconditions.checkArgument( diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java index 33c396b6b5c9..4e1c339d1fe9 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java @@ -452,6 +452,7 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) { endpoints); trackFileIO(ops); + BaseTable table = new BaseTable( ops, @@ -530,6 +531,7 @@ public Table registerTable( endpoints); trackFileIO(ops); + return new BaseTable( ops, fullTableName(ident), metricsReporter(paths.metrics(ident), tableClient)); } @@ -786,7 +788,9 @@ public Table create() { tableFileIO(context, tableConf, response.credentials()), response.tableMetadata(), endpoints); + trackFileIO(ops); + return new BaseTable( ops, fullTableName(ident), metricsReporter(paths.metrics(ident), tableClient)); } diff --git a/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java b/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java index ca42f4a38844..d5c11f6052f1 100644 --- a/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java +++ b/core/src/main/java/org/apache/iceberg/rest/ResourcePaths.java @@ -130,38 +130,4 @@ public String view(TableIdentifier ident) { public String renameView() { return SLASH.join("v1", prefix, "views", "rename"); } - - public String planTableScan(TableIdentifier ident) { - return SLASH.join( - "v1", - prefix, - "namespaces", - RESTUtil.encodeNamespace(ident.namespace()), - "tables", - RESTUtil.encodeString(ident.name()), - "plan"); - } - - public String fetchPlanningResult(TableIdentifier ident, String planId) { - return SLASH.join( - "v1", - prefix, - "namespaces", - RESTUtil.encodeNamespace(ident.namespace()), - "tables", - RESTUtil.encodeString(ident.name()), - "plan", - planId); - } - - public String fetchScanTasks(TableIdentifier ident) { - return SLASH.join( - "v1", - prefix, - "namespaces", - RESTUtil.encodeNamespace(ident.namespace()), - "tables", - RESTUtil.encodeString(ident.name()), - "tasks"); - } } diff --git a/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java b/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java index aa06319add7c..1b550eae3231 100644 --- a/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java +++ b/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java @@ -90,7 +90,6 @@ import org.assertj.core.api.InstanceOfAssertFactories; import org.awaitility.Awaitility; import org.eclipse.jetty.server.Server; -import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.server.handler.gzip.GzipHandler; import org.eclipse.jetty.servlet.ServletContextHandler; import org.eclipse.jetty.servlet.ServletHolder; @@ -195,31 +194,26 @@ protected RESTCatalog initCatalog(String catalogName, Map additi .withHeaders(RESTUtil.configHeaders(config)) .build()); catalog.setConf(conf); - int port = ((ServerConnector) httpServer.getConnectors()[0]).getLocalPort(); - String oauth2ServerUri = "http://127.0.0.1:" + port + "/v1/oauth/tokens"; - - Map properties = Maps.newHashMap(); - - properties.put(CatalogProperties.URI, "http://127.0.0.1:" + port); - properties.put("oauth2-server-uri", oauth2ServerUri); - properties.put("rest.auth.type", "oauth2"); - properties.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.inmemory.InMemoryFileIO"); - - properties.put(CatalogProperties.TABLE_DEFAULT_PREFIX + "default-key1", "catalog-default-key1"); - properties.put(CatalogProperties.TABLE_DEFAULT_PREFIX + "default-key2", "catalog-default-key2"); - properties.put( - CatalogProperties.TABLE_DEFAULT_PREFIX + "override-key3", "catalog-default-key3"); - - properties.put( - CatalogProperties.TABLE_OVERRIDE_PREFIX + "override-key3", "catalog-override-key3"); - properties.put( - CatalogProperties.TABLE_OVERRIDE_PREFIX + "override-key4", "catalog-override-key4"); - - properties.put("credential", "catalog:12345"); - properties.put("header.test-header", "test-value"); - - properties.put("oauth2-server-uri", oauth2ServerUri); - properties.put("rest.auth.type", "oauth2"); // optional but avoids warnings + Map properties = + ImmutableMap.of( + CatalogProperties.URI, + httpServer.getURI().toString(), + CatalogProperties.FILE_IO_IMPL, + "org.apache.iceberg.inmemory.InMemoryFileIO", + CatalogProperties.TABLE_DEFAULT_PREFIX + "default-key1", + "catalog-default-key1", + CatalogProperties.TABLE_DEFAULT_PREFIX + "default-key2", + "catalog-default-key2", + CatalogProperties.TABLE_DEFAULT_PREFIX + "override-key3", + "catalog-default-key3", + CatalogProperties.TABLE_OVERRIDE_PREFIX + "override-key3", + "catalog-override-key3", + CatalogProperties.TABLE_OVERRIDE_PREFIX + "override-key4", + "catalog-override-key4", + "credential", + "catalog:12345", + "header.test-header", + "test-value"); catalog.initialize( catalogName, @@ -361,11 +355,10 @@ public void testInitializeWithBadArguments() throws IOException { @Test public void testDefaultHeadersPropagated() { RESTCatalog catalog = new RESTCatalog(); - int port = ((ServerConnector) httpServer.getConnectors()[0]).getLocalPort(); Map properties = Map.of( CatalogProperties.URI, - "http://127.0.0.1:" + port, + httpServer.getURI().toString(), OAuth2Properties.CREDENTIAL, "catalog:secret", "header.test-header", From 3761e6fd28b35186a191ac05a7b6886c487ae38a Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Fri, 8 Aug 2025 18:04:12 -0700 Subject: [PATCH 4/9] Address review feedback (partially) --- .../org/apache/iceberg/ContentFileParser.java | 4 ++++ .../java/org/apache/iceberg/DataTaskParser.java | 4 +--- .../org/apache/iceberg/FileScanTaskParser.java | 8 ++------ .../apache/iceberg/RESTFileScanTaskParser.java | 14 ++++++-------- .../requests/FetchScanTasksRequestParser.java | 4 ++-- .../requests/PlanTableScanRequestParser.java | 4 ++-- .../FetchPlanningResultResponseParser.java | 8 +++----- .../apache/iceberg/TestContentFileParser.java | 2 +- .../org/apache/iceberg/rest/TestRESTCatalog.java | 1 - .../rest/requests/TestFetchScanTasksRequest.java | 7 ++----- .../rest/requests/TestPlanTableScanRequest.java | 16 ++-------------- .../TestFetchPlanningResultResponseParser.java | 8 +++----- .../TestFetchScanTasksResponseParser.java | 4 ---- .../TestPlanTableScanResponseParser.java | 2 -- 14 files changed, 28 insertions(+), 58 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 0626338c15e6..33519bb8a9a6 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -134,6 +134,10 @@ static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGenerator generator.writeEndObject(); } + static ContentFile fromJson(JsonNode jsonNode, PartitionSpec spec) { + return fromJson(jsonNode, spec == null ? null : Map.of(spec.specId(), spec)); + } + static ContentFile fromJson(JsonNode jsonNode, Map specsById) { Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for content file: null"); Preconditions.checkArgument( diff --git a/core/src/main/java/org/apache/iceberg/DataTaskParser.java b/core/src/main/java/org/apache/iceberg/DataTaskParser.java index 585ac29b3748..428bcf15e7e2 100644 --- a/core/src/main/java/org/apache/iceberg/DataTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/DataTaskParser.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; -import java.util.Map; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; @@ -65,8 +64,7 @@ static StaticDataTask fromJson(JsonNode jsonNode) { DataFile metadataFile = (DataFile) ContentFileParser.fromJson( - JsonUtil.get(METADATA_FILE, jsonNode), - Map.of(PartitionSpec.unpartitioned().specId(), PartitionSpec.unpartitioned())); + JsonUtil.get(METADATA_FILE, jsonNode), PartitionSpec.unpartitioned()); JsonNode rowsArray = JsonUtil.get(ROWS, jsonNode); Preconditions.checkArgument( diff --git a/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java b/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java index 01d1443efdb8..7ae7dc74a72e 100644 --- a/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; -import java.util.Map; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.ExpressionParser; import org.apache.iceberg.expressions.Expressions; @@ -87,9 +86,7 @@ static FileScanTask fromJson(JsonNode jsonNode, boolean caseSensitive) { DataFile dataFile = null; if (jsonNode.has(DATA_FILE)) { - dataFile = - (DataFile) - ContentFileParser.fromJson(jsonNode.get(DATA_FILE), Map.of(spec.specId(), spec)); + dataFile = (DataFile) ContentFileParser.fromJson(jsonNode.get(DATA_FILE), spec); } long start = JsonUtil.getLong(START, jsonNode); @@ -105,8 +102,7 @@ static FileScanTask fromJson(JsonNode jsonNode, boolean caseSensitive) { // parse the schema array ImmutableList.Builder builder = ImmutableList.builder(); for (JsonNode deleteFileNode : deletesArray) { - DeleteFile deleteFile = - (DeleteFile) ContentFileParser.fromJson(deleteFileNode, Map.of(spec.specId(), spec)); + DeleteFile deleteFile = (DeleteFile) ContentFileParser.fromJson(deleteFileNode, spec); builder.add(deleteFile); } diff --git a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java index 56d16c395478..5add84c19230 100644 --- a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java @@ -28,8 +28,6 @@ import org.apache.iceberg.expressions.ExpressionParser; import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.util.JsonUtil; public class RESTFileScanTaskParser { @@ -59,6 +57,7 @@ public static void toJson( generator.writeFieldName(RESIDUAL_FILTER); ExpressionParser.toJson(fileScanTask.residual(), generator); } + generator.writeEndObject(); } @@ -76,13 +75,12 @@ public static FileScanTask fromJson( int specId = dataFile.specId(); DeleteFile[] deleteFiles = null; - Set deleteFileReferences = Sets.newHashSet(); if (jsonNode.has(DELETE_FILE_REFERENCES)) { - deleteFileReferences.addAll(JsonUtil.getIntegerList(DELETE_FILE_REFERENCES, jsonNode)); - ImmutableList.Builder builder = ImmutableList.builder(); - deleteFileReferences.forEach( - delIdx -> builder.add((GenericDeleteFile) allDeleteFiles.get(delIdx))); - deleteFiles = builder.build().toArray(new GenericDeleteFile[0]); + List indices = JsonUtil.getIntegerList(DELETE_FILE_REFERENCES, jsonNode); + deleteFiles = + indices.stream() + .map(index -> (GenericDeleteFile) allDeleteFiles.get(index)) + .toArray(GenericDeleteFile[]::new); } Expression filter = null; diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequestParser.java b/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequestParser.java index fa9af3da0c90..81f559e47b4f 100644 --- a/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequestParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/requests/FetchScanTasksRequestParser.java @@ -38,7 +38,7 @@ public static String toJson(FetchScanTasksRequest request, boolean pretty) { } public static void toJson(FetchScanTasksRequest request, JsonGenerator gen) throws IOException { - Preconditions.checkArgument(null != request, "Invalid request: fetchScanTasks request null"); + Preconditions.checkArgument(null != request, "Invalid fetchScanTasks request: null"); gen.writeStartObject(); gen.writeStringField(PLAN_TASK, request.planTask()); gen.writeEndObject(); @@ -49,7 +49,7 @@ public static FetchScanTasksRequest fromJson(String json) { } public static FetchScanTasksRequest fromJson(JsonNode json) { - Preconditions.checkArgument(null != json, "Invalid request: fetchScanTasks null"); + Preconditions.checkArgument(null != json, "Invalid fetchScanTasks request: null"); String planTask = JsonUtil.getString(PLAN_TASK, json); return new FetchScanTasksRequest(planTask); diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequestParser.java b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequestParser.java index e840841fcfcf..9b2eb9adb4e1 100644 --- a/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequestParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequestParser.java @@ -49,7 +49,7 @@ public static String toJson(PlanTableScanRequest request, boolean pretty) { @SuppressWarnings("checkstyle:CyclomaticComplexity") public static void toJson(PlanTableScanRequest request, JsonGenerator gen) throws IOException { - Preconditions.checkArgument(null != request, "Invalid request: planTableScanRequest null"); + Preconditions.checkArgument(null != request, "Invalid planTableScanRequest: null"); if (request.snapshotId() != null || request.startSnapshotId() != null @@ -96,7 +96,7 @@ public static PlanTableScanRequest fromJson(String json) { } public static PlanTableScanRequest fromJson(JsonNode json) { - Preconditions.checkArgument(null != json, "Invalid request: planTableScanRequest null"); + Preconditions.checkArgument(null != json, "Invalid planTableScanRequest: null"); Long snapshotId = JsonUtil.getLongOrNull(SNAPSHOT_ID, json); Long startSnapshotId = JsonUtil.getLongOrNull(START_SNAPSHOT_ID, json); diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java index 179db81cbe53..b2527e3538de 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java @@ -47,8 +47,7 @@ public static String toJson(FetchPlanningResultResponse response, boolean pretty public static void toJson(FetchPlanningResultResponse response, JsonGenerator gen) throws IOException { - Preconditions.checkArgument( - null != response, "Invalid response: fetchPanningResultResponse null"); + Preconditions.checkArgument(null != response, "Invalid fetchPlanningResult response: null"); Preconditions.checkArgument( response.specsById() != null || (response.fileScanTasks() == null || response.fileScanTasks().isEmpty()), @@ -66,7 +65,7 @@ public static void toJson(FetchPlanningResultResponse response, JsonGenerator ge public static FetchPlanningResultResponse fromJson( String json, Map specsById, boolean caseSensitive) { - Preconditions.checkArgument(json != null, "Invalid response: fetchPanningResultResponse null"); + Preconditions.checkArgument(json != null, "Invalid fetchPlanningResult response: null"); return JsonUtil.parse( json, node -> { @@ -77,8 +76,7 @@ public static FetchPlanningResultResponse fromJson( public static FetchPlanningResultResponse fromJson( JsonNode json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument( - json != null && !json.isEmpty(), - "Invalid response: fetchPanningResultResponse null or empty"); + json != null && !json.isEmpty(), "Invalid fetchPlanningResult response: null or empty"); PlanStatus planStatus = PlanStatus.fromName(JsonUtil.getString(PLAN_STATUS, json)); List planTasks = JsonUtil.getStringListOrNull(PLAN_TASKS, json); diff --git a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java index 8c3f43df9e8a..3f463f722e9e 100644 --- a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java +++ b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java @@ -59,7 +59,7 @@ public void testNullArguments() throws Exception { String jsonStr = ContentFileParser.toJson(TestBase.FILE_A, TestBase.SPEC); JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); - assertThatThrownBy(() -> ContentFileParser.fromJson(jsonNode, null)) + assertThatThrownBy(() -> ContentFileParser.fromJson(jsonNode, (PartitionSpec) null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid partition spec: null"); } diff --git a/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java b/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java index 1b550eae3231..47c27c975762 100644 --- a/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java +++ b/core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java @@ -214,7 +214,6 @@ protected RESTCatalog initCatalog(String catalogName, Map additi "catalog:12345", "header.test-header", "test-value"); - catalog.initialize( catalogName, ImmutableMap.builder() diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestFetchScanTasksRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestFetchScanTasksRequest.java index a68f0d3d2b29..a911bb5ae403 100644 --- a/core/src/test/java/org/apache/iceberg/rest/requests/TestFetchScanTasksRequest.java +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestFetchScanTasksRequest.java @@ -30,11 +30,11 @@ public class TestFetchScanTasksRequest { public void nullAndEmptyCheck() { assertThatThrownBy(() -> FetchScanTasksRequestParser.toJson(null)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid request: fetchScanTasks request null"); + .hasMessage("Invalid fetchScanTasks request: null"); assertThatThrownBy(() -> FetchScanTasksRequestParser.fromJson((JsonNode) null)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid request: fetchScanTasks null"); + .hasMessage("Invalid fetchScanTasks request: null"); } @Test @@ -43,9 +43,6 @@ public void roundTripSerdeWithPlanTask() { String expectedJson = "{\"plan-task\":\"somePlanTask\"}"; String json = FetchScanTasksRequestParser.toJson(request, false); assertThat(json).isEqualTo(expectedJson); - - // can't do an equality comparison on FetchScanTasksRequest because we don't implement - // equals/hashcode assertThat( FetchScanTasksRequestParser.toJson(FetchScanTasksRequestParser.fromJson(json), false)) .isEqualTo(expectedJson); diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java index 43cf0d8b3aa4..281742386e4a 100644 --- a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java @@ -32,11 +32,11 @@ public class TestPlanTableScanRequest { public void nullAndEmptyCheck() { assertThatThrownBy(() -> PlanTableScanRequestParser.toJson(null)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid request: planTableScanRequest null"); + .hasMessage("Invalid planTableScanRequest: null"); assertThatThrownBy(() -> PlanTableScanRequestParser.fromJson((JsonNode) null)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid request: planTableScanRequest null"); + .hasMessage("Invalid planTableScanRequest: null"); } @Test @@ -47,9 +47,6 @@ public void roundTripSerdeWithEmptyRequestAndDefaultsPresent() { String json = PlanTableScanRequestParser.toJson(request, false); assertThat(json).isEqualTo(expectedJson); - - // can't do an equality comparison on PlanTableScanRequest because we don't implement - // equals/hashcode assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) .isEqualTo(expectedJson); } @@ -70,9 +67,6 @@ public void roundTripSerdeWithSelectField() { String json = PlanTableScanRequestParser.toJson(request, false); assertThat(json).isEqualTo(expectedJson); - - // can't do an equality comparison on PlanTableScanRequest because we don't implement - // equals/hashcode assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) .isEqualTo(expectedJson); } @@ -93,9 +87,6 @@ public void roundTripSerdeWithFilterField() { String json = PlanTableScanRequestParser.toJson(request, false); assertThat(json).isEqualTo(expectedJson); - - // can't do an equality comparison on PlanTableScanRequest because we don't implement - // equals/hashcode assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) .isEqualTo(expectedJson); } @@ -144,9 +135,6 @@ public void roundTripSerdeWithAllFieldsExceptSnapShotId() { String json = PlanTableScanRequestParser.toJson(request, false); assertThat(json).isEqualTo(expectedJson); - - // can't do an equality comparison on PlanTableScanRequest because we don't implement - // equals/hashcode assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) .isEqualTo(expectedJson); } diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java index 7f9035f6165a..a578bfe29fd1 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java @@ -64,14 +64,14 @@ public void before() { public void nullAndEmptyCheck() { assertThatThrownBy(() -> FetchPlanningResultResponseParser.toJson(null)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid response: fetchPanningResultResponse null"); + .hasMessage("Invalid fetchPlanningResult response: null"); assertThatThrownBy( () -> FetchPlanningResultResponseParser.fromJson( (JsonNode) null, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid response: fetchPanningResultResponse null or empty"); + .hasMessage("Invalid fetchPlanningResult response: null or empty"); } @Test @@ -88,7 +88,7 @@ public void roundTripSerdeWithEmptyObject() { () -> FetchPlanningResultResponseParser.fromJson(emptyJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid response: fetchPanningResultResponse null or empty"); + .hasMessage("Invalid fetchPlanningResult response: null or empty"); } @Test @@ -226,8 +226,6 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() throws JsonProcessin .withSpecsById(PARTITION_SPECS_BY_ID) .build(); - // can't do an equality comparison on PlanTableScanRequest because we don't implement - // equals/hashcode assertThat(FetchPlanningResultResponseParser.toJson(copyResponse, false)) .isEqualTo(expectedToJson); } diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java index 2399e82b100a..01ca8288fb40 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java @@ -78,8 +78,6 @@ public void roundTripSerdeWithPlanTasks() { FetchScanTasksResponse fromResponse = FetchScanTasksResponseParser.fromJson(json, PARTITION_SPECS_BY_ID, false); - // can't do an equality comparison on PlanTableScanRequest because we don't implement - // equals/hashcode assertThat(FetchScanTasksResponseParser.toJson(fromResponse, false)).isEqualTo(expectedJson); } @@ -156,8 +154,6 @@ public void roundTripSerdeWithFileScanTasks() { .withSpecsById(PARTITION_SPECS_BY_ID) .build(); - // can't do an equality comparison on PlanTableScanRequest because we don't implement - // equals/hashcode assertThat(FetchScanTasksResponseParser.toJson(copyResponse, false)).isEqualTo(expectedToJson); } } diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java index 315898d32e28..138016836e18 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java @@ -229,8 +229,6 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() { .withSpecsById(PARTITION_SPECS_BY_ID) .build(); - // can't do an equality comparison on PlanTableScanRequest because we don't implement - // equals/hashcode assertThat(PlanTableScanResponseParser.toJson(copyResponse)).isEqualTo(expectedToJson); } } From dd41c9d2ff26950ccaecbd8221333da227032f23 Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Sat, 9 Aug 2025 10:31:55 -0700 Subject: [PATCH 5/9] refactor --- .../rest/responses/BaseScanResponse.java | 97 +++++++++++++++++++ .../FetchPlanningResultResponse.java | 57 ++--------- .../responses/FetchScanTasksResponse.java | 65 ++----------- .../rest/responses/PlanTableScanResponse.java | 62 ++---------- .../rest/responses/TableScanResponse.java | 23 ----- 5 files changed, 119 insertions(+), 185 deletions(-) create mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java delete mode 100644 core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponse.java diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java new file mode 100644 index 000000000000..2eedd21730ae --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.rest.responses; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.rest.RESTResponse; + +public abstract class BaseScanResponse implements RESTResponse { + + private final List planTasks; + private final List fileScanTasks; + private final List deleteFiles; + private final Map specsById; + + protected BaseScanResponse( + List planTasks, + List fileScanTasks, + List deleteFiles, + Map specsById) { + this.planTasks = planTasks; + this.fileScanTasks = fileScanTasks; + this.deleteFiles = deleteFiles; + this.specsById = specsById; + } + + public List planTasks() { + return planTasks; + } + + public List fileScanTasks() { + return fileScanTasks; + } + + public List deleteFiles() { + return deleteFiles; + } + + public Map specsById() { + return specsById; + } + + public abstract static class Builder, R extends BaseScanResponse> { + protected List planTasks; + protected List fileScanTasks; + protected List deleteFiles; + protected Map specsById; + + protected Builder() {} + + @SuppressWarnings("unchecked") + public B self() { + return (B) this; + } + + public B withPlanTasks(List planTasks) { + this.planTasks = planTasks; + return self(); + } + + public B withFileScanTasks(List fileScanTasks) { + this.fileScanTasks = fileScanTasks; + return self(); + } + + public B withDeleteFiles(List deleteFiles) { + this.deleteFiles = deleteFiles; + return self(); + } + + public B withSpecsById(Map specsById) { + this.specsById = specsById; + return self(); + } + + public abstract R build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java index 29af32a03d38..a18bba8ad496 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java @@ -26,12 +26,8 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; -public class FetchPlanningResultResponse implements TableScanResponse { +public class FetchPlanningResultResponse extends BaseScanResponse { private final PlanStatus planStatus; - private final List planTasks; - private final List fileScanTasks; - private final List deleteFiles; - private final Map specsById; private FetchPlanningResultResponse( PlanStatus planStatus, @@ -39,11 +35,8 @@ private FetchPlanningResultResponse( List fileScanTasks, List deleteFiles, Map specsById) { + super(planTasks, fileScanTasks, deleteFiles, specsById); this.planStatus = planStatus; - this.planTasks = planTasks; - this.fileScanTasks = fileScanTasks; - this.deleteFiles = deleteFiles; - this.specsById = specsById; validate(); } @@ -51,22 +44,6 @@ public PlanStatus planStatus() { return planStatus; } - public List planTasks() { - return planTasks; - } - - public List fileScanTasks() { - return fileScanTasks; - } - - public List deleteFiles() { - return deleteFiles; - } - - public Map specsById() { - return specsById; - } - public static Builder builder() { return new Builder(); } @@ -77,47 +54,25 @@ public void validate() { Preconditions.checkArgument( planStatus() == PlanStatus.COMPLETED || (planTasks() == null && fileScanTasks() == null), "Invalid response: tasks can only be returned in a 'completed' status"); - if (fileScanTasks() == null || fileScanTasks.isEmpty()) { + if (fileScanTasks() == null || fileScanTasks().isEmpty()) { Preconditions.checkArgument( (deleteFiles() == null || deleteFiles().isEmpty()), "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); } } - public static class Builder { + public static class Builder + extends BaseScanResponse.Builder { private Builder() {} private PlanStatus planStatus; - private List planTasks; - private List fileScanTasks; - private List deleteFiles; - private Map specsById; public Builder withPlanStatus(PlanStatus status) { this.planStatus = status; return this; } - public Builder withPlanTasks(List tasks) { - this.planTasks = tasks; - return this; - } - - public Builder withFileScanTasks(List tasks) { - this.fileScanTasks = tasks; - return this; - } - - public Builder withDeleteFiles(List deletes) { - this.deleteFiles = deletes; - return this; - } - - public Builder withSpecsById(Map specs) { - this.specsById = specs; - return this; - } - + @Override public FetchPlanningResultResponse build() { return new FetchPlanningResultResponse( planStatus, planTasks, fileScanTasks, deleteFiles, specsById); diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java index fd512ab3b499..16914297ba2b 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java @@ -25,47 +25,20 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -public class FetchScanTasksResponse implements TableScanResponse { - private final List planTasks; - private final List fileScanTasks; - private final List deleteFiles; - private final Map specsById; +public class FetchScanTasksResponse extends BaseScanResponse { private FetchScanTasksResponse( List planTasks, List fileScanTasks, List deleteFiles, Map specsById) { - this.planTasks = planTasks; - this.fileScanTasks = fileScanTasks; - this.deleteFiles = deleteFiles; - this.specsById = specsById; + super(planTasks, fileScanTasks, deleteFiles, specsById); validate(); } - public List planTasks() { - return planTasks; - } - - public List fileScanTasks() { - return fileScanTasks; - } - - public List deleteFiles() { - return deleteFiles; - } - - public Map specsById() { - return specsById; - } - - public static Builder builder() { - return new Builder(); - } - @Override public void validate() { - if (fileScanTasks() == null || fileScanTasks.isEmpty()) { + if (fileScanTasks() == null || fileScanTasks().isEmpty()) { Preconditions.checkArgument( (deleteFiles() == null || deleteFiles().isEmpty()), "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); @@ -76,34 +49,14 @@ public void validate() { "Invalid response: planTasks and fileScanTask cannot both be null"); } - public static class Builder { - private Builder() {} - - private List planTasks; - private List fileScanTasks; - private List deleteFiles; - private Map specsById; - - public Builder withPlanTasks(List tasks) { - this.planTasks = tasks; - return this; - } - - public Builder withFileScanTasks(List tasks) { - this.fileScanTasks = tasks; - return this; - } - - public Builder withDeleteFiles(List deletes) { - this.deleteFiles = deletes; - return this; - } + public static Builder builder() { + return new Builder(); + } - public Builder withSpecsById(Map specs) { - this.specsById = specs; - return this; - } + public static class Builder extends BaseScanResponse.Builder { + private Builder() {} + @Override public FetchScanTasksResponse build() { return new FetchScanTasksResponse(planTasks, fileScanTasks, deleteFiles, specsById); } diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java index dbe0efaf1de8..e3f3b2eac1a9 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java @@ -27,13 +27,9 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; -public class PlanTableScanResponse implements TableScanResponse { +public class PlanTableScanResponse extends BaseScanResponse { private final PlanStatus planStatus; private final String planId; - private final List planTasks; - private final List fileScanTasks; - private final List deleteFiles; - private final Map specsById; private PlanTableScanResponse( PlanStatus planStatus, @@ -42,12 +38,9 @@ private PlanTableScanResponse( List fileScanTasks, List deleteFiles, Map specsById) { + super(planTasks, fileScanTasks, deleteFiles, specsById); this.planStatus = planStatus; this.planId = planId; - this.planTasks = planTasks; - this.fileScanTasks = fileScanTasks; - this.deleteFiles = deleteFiles; - this.specsById = specsById; validate(); } @@ -59,27 +52,11 @@ public String planId() { return planId; } - public List planTasks() { - return planTasks; - } - - public List fileScanTasks() { - return fileScanTasks; - } - - public List deleteFiles() { - return deleteFiles; - } - - public Map specsById() { - return specsById; - } - @Override public String toString() { return MoreObjects.toStringHelper(this) - .add("planStatus", planStatus) - .add("planId", planId) + .add("planStatus", planStatus()) + .add("planId", planId()) .toString(); } @@ -99,7 +76,7 @@ public void validate() { Preconditions.checkArgument( planStatus() == PlanStatus.SUBMITTED || planId() == null, "Invalid response: plan id can only be returned in a 'submitted' status"); - if (fileScanTasks() == null || fileScanTasks.isEmpty()) { + if (fileScanTasks() == null || fileScanTasks().isEmpty()) { Preconditions.checkArgument( (deleteFiles() == null || deleteFiles().isEmpty()), "Invalid response: deleteFiles should only be returned with fileScanTasks that reference them"); @@ -110,15 +87,9 @@ public static Builder builder() { return new Builder(); } - public static class Builder { - private Builder() {} - + public static class Builder extends BaseScanResponse.Builder { private PlanStatus planStatus; private String planId; - private List planTasks; - private List fileScanTasks; - private List deleteFiles; - private Map specsById; public Builder withPlanStatus(PlanStatus status) { this.planStatus = status; @@ -130,26 +101,7 @@ public Builder withPlanId(String id) { return this; } - public Builder withPlanTasks(List tasks) { - this.planTasks = tasks; - return this; - } - - public Builder withFileScanTasks(List tasks) { - this.fileScanTasks = tasks; - return this; - } - - public Builder withDeleteFiles(List deletes) { - this.deleteFiles = deletes; - return this; - } - - public Builder withSpecsById(Map specs) { - this.specsById = specs; - return this; - } - + @Override public PlanTableScanResponse build() { return new PlanTableScanResponse( planStatus, planId, planTasks, fileScanTasks, deleteFiles, specsById); diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponse.java deleted file mode 100644 index 4213b50881b9..000000000000 --- a/core/src/main/java/org/apache/iceberg/rest/responses/TableScanResponse.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.rest.responses; - -import org.apache.iceberg.rest.RESTResponse; - -public interface TableScanResponse extends RESTResponse {} From 0f7a7cc6167e60a3be9f78af75c8d58256834be0 Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Mon, 11 Aug 2025 11:41:49 -0700 Subject: [PATCH 6/9] Address more feedbacks --- .../rest/responses/BaseScanResponse.java | 40 +++++++++++++------ .../FetchPlanningResultResponse.java | 2 +- .../responses/FetchScanTasksResponse.java | 2 +- .../rest/responses/PlanTableScanResponse.java | 2 +- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java index 2eedd21730ae..5fbb969f1620 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java @@ -60,10 +60,10 @@ public Map specsById() { } public abstract static class Builder, R extends BaseScanResponse> { - protected List planTasks; - protected List fileScanTasks; - protected List deleteFiles; - protected Map specsById; + private List planTasks; + private List fileScanTasks; + private List deleteFiles; + private Map specsById; protected Builder() {} @@ -72,26 +72,42 @@ public B self() { return (B) this; } - public B withPlanTasks(List planTasks) { - this.planTasks = planTasks; + public B withPlanTasks(List tasks) { + this.planTasks = tasks; return self(); } - public B withFileScanTasks(List fileScanTasks) { - this.fileScanTasks = fileScanTasks; + public B withFileScanTasks(List tasks) { + this.fileScanTasks = tasks; return self(); } - public B withDeleteFiles(List deleteFiles) { - this.deleteFiles = deleteFiles; + public B withDeleteFiles(List deleteFilesList) { + this.deleteFiles = deleteFilesList; return self(); } - public B withSpecsById(Map specsById) { - this.specsById = specsById; + public B withSpecsById(Map specs) { + this.specsById = specs; return self(); } + public List planTasks() { + return planTasks; + } + + public List fileScanTasks() { + return fileScanTasks; + } + + public List deleteFiles() { + return deleteFiles; + } + + public Map specsById() { + return specsById; + } + public abstract R build(); } } diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java index a18bba8ad496..bb13028acc2e 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java @@ -75,7 +75,7 @@ public Builder withPlanStatus(PlanStatus status) { @Override public FetchPlanningResultResponse build() { return new FetchPlanningResultResponse( - planStatus, planTasks, fileScanTasks, deleteFiles, specsById); + planStatus, planTasks(), fileScanTasks(), deleteFiles(), specsById()); } } } diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java index 16914297ba2b..640643b95b90 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java @@ -58,7 +58,7 @@ private Builder() {} @Override public FetchScanTasksResponse build() { - return new FetchScanTasksResponse(planTasks, fileScanTasks, deleteFiles, specsById); + return new FetchScanTasksResponse(planTasks(), fileScanTasks(), deleteFiles(), specsById()); } } } diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java index e3f3b2eac1a9..d5fdf234da6a 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java @@ -104,7 +104,7 @@ public Builder withPlanId(String id) { @Override public PlanTableScanResponse build() { return new PlanTableScanResponse( - planStatus, planId, planTasks, fileScanTasks, deleteFiles, specsById); + planStatus, planId, planTasks(), fileScanTasks(), deleteFiles(), specsById()); } } } From 08b6a69ec2bf691858b0bc532c8f73030f40fc61 Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Tue, 12 Aug 2025 17:22:00 -0700 Subject: [PATCH 7/9] Address second round review feedback --- .../org/apache/iceberg/ContentFileParser.java | 11 ++-- .../iceberg/RESTFileScanTaskParser.java | 6 ++ .../iceberg/TableScanResponseParser.java | 4 +- .../org/apache/iceberg/rest/PlanStatus.java | 2 +- .../apache/iceberg/rest/RESTSerializers.java | 57 +++++++++++++------ .../rest/requests/PlanTableScanRequest.java | 1 + ...esponse.java => BaseScanTaskResponse.java} | 6 +- .../FetchPlanningResultResponse.java | 4 +- .../FetchPlanningResultResponseParser.java | 7 ++- .../responses/FetchScanTasksResponse.java | 5 +- .../FetchScanTasksResponseParser.java | 4 +- .../rest/responses/PlanTableScanResponse.java | 8 +-- .../PlanTableScanResponseParser.java | 4 +- .../requests/TestPlanTableScanRequest.java | 27 +++++---- ...TestFetchPlanningResultResponseParser.java | 3 +- .../TestPlanTableScanResponseParser.java | 12 ++-- 16 files changed, 96 insertions(+), 65 deletions(-) rename core/src/main/java/org/apache/iceberg/rest/responses/{BaseScanResponse.java => BaseScanTaskResponse.java} (95%) diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 33519bb8a9a6..da9e149ece7d 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -144,25 +144,24 @@ static ContentFile fromJson(JsonNode jsonNode, Map sp jsonNode.isObject(), "Invalid JSON node for content file: non-object (%s)", jsonNode); Preconditions.checkArgument(specsById != null, "Invalid partition spec: null"); int specId = JsonUtil.getInt(SPEC_ID, jsonNode); + PartitionSpec spec = specsById.get(specId); + Preconditions.checkArgument(spec != null, "Invalid partition specId: %s", specId); FileContent fileContent = FileContent.valueOf(JsonUtil.getString(CONTENT, jsonNode)); String filePath = JsonUtil.getString(FILE_PATH, jsonNode); FileFormat fileFormat = FileFormat.fromString(JsonUtil.getString(FILE_FORMAT, jsonNode)); PartitionData partitionData = null; if (jsonNode.has(PARTITION)) { - partitionData = new PartitionData(specsById.get(specId).partitionType()); + partitionData = new PartitionData(spec.partitionType()); StructLike structLike = - (StructLike) - SingleValueParser.fromJson( - specsById.get(specId).partitionType(), jsonNode.get(PARTITION)); + (StructLike) SingleValueParser.fromJson(spec.partitionType(), jsonNode.get(PARTITION)); Preconditions.checkState( partitionData.size() == structLike.size(), "Invalid partition data size: expected = %s, actual = %s", partitionData.size(), structLike.size()); for (int pos = 0; pos < partitionData.size(); ++pos) { - Class javaClass = - specsById.get(specId).partitionType().fields().get(pos).type().typeId().javaClass(); + Class javaClass = spec.partitionType().fields().get(pos).type().typeId().javaClass(); partitionData.set(pos, structLike.get(pos, javaClass)); } } diff --git a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java index 5add84c19230..a4fe89a1400e 100644 --- a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; @@ -77,6 +78,11 @@ public static FileScanTask fromJson( DeleteFile[] deleteFiles = null; if (jsonNode.has(DELETE_FILE_REFERENCES)) { List indices = JsonUtil.getIntegerList(DELETE_FILE_REFERENCES, jsonNode); + Preconditions.checkArgument( + Collections.max(indices) < allDeleteFiles.size(), + "Invalid delete file references: %s, expected indices < %s", + indices, + allDeleteFiles.size()); deleteFiles = indices.stream() .map(index -> (GenericDeleteFile) allDeleteFiles.get(index)) diff --git a/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java index 9c76a99d6684..e24d18ede5f8 100644 --- a/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java @@ -52,7 +52,7 @@ public static List parseDeleteFiles( return deleteFilesBuilder.build(); } - return null; + return Lists.newArrayList(); } public static List parseFileScanTasks( @@ -85,7 +85,7 @@ public static void serializeScanTasks( JsonGenerator gen) throws IOException { Map deleteFilePathToIndex = Maps.newHashMap(); - if (deleteFiles != null) { + if (deleteFiles != null && !deleteFiles.isEmpty()) { Preconditions.checkArgument( specsById != null, "Cannot serialize response without specs by ID defined"); gen.writeArrayFieldStart(DELETE_FILES); diff --git a/core/src/main/java/org/apache/iceberg/rest/PlanStatus.java b/core/src/main/java/org/apache/iceberg/rest/PlanStatus.java index 5603d51e9aa2..7ad1291b5140 100644 --- a/core/src/main/java/org/apache/iceberg/rest/PlanStatus.java +++ b/core/src/main/java/org/apache/iceberg/rest/PlanStatus.java @@ -40,7 +40,7 @@ public String status() { public static PlanStatus fromName(String status) { Preconditions.checkArgument(status != null, "Status is null"); try { - return PlanStatus.valueOf(status.toUpperCase(Locale.ENGLISH)); + return PlanStatus.valueOf(status.toUpperCase(Locale.ROOT)); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(String.format("Invalid status name: %s", status), e); } diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java index 37f703c5740d..3e0e1750115f 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java @@ -545,13 +545,11 @@ static class PlanTableScanResponseDeserializer @Override public T deserialize(JsonParser p, DeserializationContext context) throws IOException { JsonNode jsonNode = p.getCodec().readTree(p); - // Retrieve injectable values - @SuppressWarnings("unchecked") - Map specsById = - (Map) context.findInjectableValue("specsById", null, null); + TableScanResponseContext scanContext = parseScanResponseContext(context); - boolean caseSensitive = (boolean) context.findInjectableValue("caseSensitive", null, null); - return (T) PlanTableScanResponseParser.fromJson(jsonNode, specsById, caseSensitive); + return (T) + PlanTableScanResponseParser.fromJson( + jsonNode, scanContext.getSpecsById(), scanContext.isCaseSensitive()); } } @@ -569,13 +567,11 @@ static class FetchPlanningResultResponseDeserializer specsById = - (Map) context.findInjectableValue("specsById", null, null); - boolean caseSensitive = (boolean) context.findInjectableValue("caseSensitive", null, null); - return (T) FetchPlanningResultResponseParser.fromJson(jsonNode, specsById, caseSensitive); + TableScanResponseContext scanContext = parseScanResponseContext(context); + return (T) + FetchPlanningResultResponseParser.fromJson( + jsonNode, scanContext.getSpecsById(), scanContext.isCaseSensitive()); } } @@ -593,13 +589,38 @@ static class FetchScanTaskResponseDeserializer @Override public T deserialize(JsonParser p, DeserializationContext context) throws IOException { JsonNode jsonNode = p.getCodec().readTree(p); - // Retrieve injectable values - @SuppressWarnings("unchecked") - Map specsById = - (Map) context.findInjectableValue("specsById", null, null); - boolean caseSensitive = (boolean) context.findInjectableValue("caseSensitive", null, null); - return (T) FetchScanTasksResponseParser.fromJson(jsonNode, specsById, caseSensitive); + TableScanResponseContext scanContext = parseScanResponseContext(context); + return (T) + FetchScanTasksResponseParser.fromJson( + jsonNode, scanContext.getSpecsById(), scanContext.isCaseSensitive()); + } + } + + private static TableScanResponseContext parseScanResponseContext(DeserializationContext context) + throws IOException { + @SuppressWarnings("unchecked") + Map specsById = + (Map) context.findInjectableValue("specsById", null, null); + boolean caseSensitive = (boolean) context.findInjectableValue("caseSensitive", null, null); + return new TableScanResponseContext(specsById, caseSensitive); + } + + static class TableScanResponseContext { + private final Map specsById; + private final boolean caseSensitive; + + TableScanResponseContext(Map specs, boolean isCaseSensitive) { + this.specsById = specs; + this.caseSensitive = isCaseSensitive; + } + + Map getSpecsById() { + return specsById; + } + + boolean isCaseSensitive() { + return caseSensitive; } } } diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java index d85ee324b0dd..720e5c74e67a 100644 --- a/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java +++ b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java @@ -83,6 +83,7 @@ private PlanTableScanRequest( this.startSnapshotId = startSnapshotId; this.endSnapshotId = endSnapshotId; this.statsFields = statsFields; + validate(); } @Override diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java similarity index 95% rename from core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java rename to core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java index 5fbb969f1620..b7649618e809 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/BaseScanTaskResponse.java @@ -25,14 +25,14 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.rest.RESTResponse; -public abstract class BaseScanResponse implements RESTResponse { +public abstract class BaseScanTaskResponse implements RESTResponse { private final List planTasks; private final List fileScanTasks; private final List deleteFiles; private final Map specsById; - protected BaseScanResponse( + protected BaseScanTaskResponse( List planTasks, List fileScanTasks, List deleteFiles, @@ -59,7 +59,7 @@ public Map specsById() { return specsById; } - public abstract static class Builder, R extends BaseScanResponse> { + public abstract static class Builder, R extends BaseScanTaskResponse> { private List planTasks; private List fileScanTasks; private List deleteFiles; diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java index bb13028acc2e..05d64a235891 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponse.java @@ -26,7 +26,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; -public class FetchPlanningResultResponse extends BaseScanResponse { +public class FetchPlanningResultResponse extends BaseScanTaskResponse { private final PlanStatus planStatus; private FetchPlanningResultResponse( @@ -62,7 +62,7 @@ public void validate() { } public static class Builder - extends BaseScanResponse.Builder { + extends BaseScanTaskResponse.Builder { private Builder() {} private PlanStatus planStatus; diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java index b2527e3538de..b3453adbd6b5 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java @@ -27,6 +27,7 @@ import org.apache.iceberg.FileScanTask; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.TableScanResponseParser; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; import org.apache.iceberg.util.JsonUtil; @@ -63,9 +64,11 @@ public static void toJson(FetchPlanningResultResponse response, JsonGenerator ge gen.writeEndObject(); } - public static FetchPlanningResultResponse fromJson( + @VisibleForTesting + static FetchPlanningResultResponse fromJson( String json, Map specsById, boolean caseSensitive) { - Preconditions.checkArgument(json != null, "Invalid fetchPlanningResult response: null"); + Preconditions.checkArgument( + json != null, "Invalid fetchPlanningResult response: null or empty"); return JsonUtil.parse( json, node -> { diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java index 640643b95b90..6dcd85e6d307 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponse.java @@ -25,7 +25,7 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -public class FetchScanTasksResponse extends BaseScanResponse { +public class FetchScanTasksResponse extends BaseScanTaskResponse { private FetchScanTasksResponse( List planTasks, @@ -53,7 +53,8 @@ public static Builder builder() { return new Builder(); } - public static class Builder extends BaseScanResponse.Builder { + public static class Builder + extends BaseScanTaskResponse.Builder { private Builder() {} @Override diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java index 455f12a70124..a3eb2fa029af 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java @@ -27,6 +27,7 @@ import org.apache.iceberg.FileScanTask; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.TableScanResponseParser; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; @@ -59,7 +60,8 @@ public static void toJson(FetchScanTasksResponse response, JsonGenerator gen) th gen.writeEndObject(); } - public static FetchScanTasksResponse fromJson( + @VisibleForTesting + static FetchScanTasksResponse fromJson( String json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument(json != null, "Cannot parse fetchScanTasks response from null"); return JsonUtil.parse( diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java index d5fdf234da6a..4596f8d5cda2 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponse.java @@ -27,7 +27,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; -public class PlanTableScanResponse extends BaseScanResponse { +public class PlanTableScanResponse extends BaseScanTaskResponse { private final PlanStatus planStatus; private final String planId; @@ -72,10 +72,10 @@ public void validate() { "Invalid response: 'cancelled' is not a valid status for planTableScan"); Preconditions.checkArgument( planStatus() == PlanStatus.COMPLETED || (planTasks() == null && fileScanTasks() == null), - "Invalid response: tasks can only be returned in a 'completed' status"); + "Invalid response: tasks can only be defined when status is 'completed'"); Preconditions.checkArgument( planStatus() == PlanStatus.SUBMITTED || planId() == null, - "Invalid response: plan id can only be returned in a 'submitted' status"); + "Invalid response: plan id can only be defined when status is 'submitted'"); if (fileScanTasks() == null || fileScanTasks().isEmpty()) { Preconditions.checkArgument( (deleteFiles() == null || deleteFiles().isEmpty()), @@ -87,7 +87,7 @@ public static Builder builder() { return new Builder(); } - public static class Builder extends BaseScanResponse.Builder { + public static class Builder extends BaseScanTaskResponse.Builder { private PlanStatus planStatus; private String planId; diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java index 2943850fdf74..bdbf3e9b08a4 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.PartitionSpec; @@ -69,7 +70,8 @@ public static void toJson(PlanTableScanResponse response, JsonGenerator gen) thr gen.writeEndObject(); } - public static PlanTableScanResponse fromJson( + @VisibleForTesting + static PlanTableScanResponse fromJson( String json, Map specsById, boolean caseSensitive) { Preconditions.checkArgument( json != null, "Cannot parse planTableScan response from empty or null object"); diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java index 281742386e4a..1fc1303fa724 100644 --- a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java @@ -92,20 +92,19 @@ public void roundTripSerdeWithFilterField() { } @Test - public void roundTripSerdeWithAllFieldsInvalidRequest() { - PlanTableScanRequest request = - new PlanTableScanRequest.Builder() - .withSnapshotId(1L) - .withSelect(Lists.newArrayList("col1", "col2")) - .withFilter(Expressions.alwaysTrue()) - .withStartSnapshotId(1L) - .withEndSnapshotId(2L) - .withCaseSensitive(false) - .withUseSnapshotSchema(true) - .withStatsFields(Lists.newArrayList("col1", "col2")) - .build(); - - assertThatThrownBy(() -> PlanTableScanRequestParser.toJson(request)) + public void planTableScanRequestWithAllFieldsInvalidRequest() { + assertThatThrownBy( + () -> + new PlanTableScanRequest.Builder() + .withSnapshotId(1L) + .withSelect(Lists.newArrayList("col1", "col2")) + .withFilter(Expressions.alwaysTrue()) + .withStartSnapshotId(1L) + .withEndSnapshotId(2L) + .withCaseSensitive(false) + .withUseSnapshotSchema(true) + .withStatsFields(Lists.newArrayList("col1", "col2")) + .build()) .isInstanceOf(IllegalArgumentException.class) .hasMessage( "Either snapshotId must be provided or both startSnapshotId and endSnapshotId must be provided"); diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java index a578bfe29fd1..bac182b77ea2 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java @@ -30,7 +30,6 @@ import com.fasterxml.jackson.core.JsonFactoryBuilder; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.InjectableValues; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import java.util.List; @@ -69,7 +68,7 @@ public void nullAndEmptyCheck() { assertThatThrownBy( () -> FetchPlanningResultResponseParser.fromJson( - (JsonNode) null, PARTITION_SPECS_BY_ID, false)) + (String) null, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid fetchPlanningResult response: null or empty"); } diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java index 138016836e18..49c0ad1fa0af 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java @@ -26,7 +26,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import com.fasterxml.jackson.databind.JsonNode; import java.util.List; import org.apache.iceberg.BaseFileScanTask; import org.apache.iceberg.DeleteFile; @@ -47,8 +46,7 @@ public void nullAndEmptyCheck() { .hasMessage("Invalid response: planTableScanResponse null"); assertThatThrownBy( - () -> - PlanTableScanResponseParser.fromJson((JsonNode) null, PARTITION_SPECS_BY_ID, false)) + () -> PlanTableScanResponseParser.fromJson((String) null, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse planTableScan response from empty or null object"); } @@ -118,7 +116,7 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithTasksPresent() { .withPlanTasks(List.of("task1", "task2")) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid response: tasks can only be returned in a 'completed' status"); + .hasMessage("Invalid response: tasks can only be defined when status is 'completed'"); String invalidJson = "{\"plan-status\":\"submitted\"," @@ -128,7 +126,7 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithTasksPresent() { assertThatThrownBy( () -> PlanTableScanResponseParser.fromJson(invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid response: tasks can only be returned in a 'completed' status"); + .hasMessage("Invalid response: tasks can only be defined when status is 'completed'"); } @Test @@ -141,14 +139,14 @@ public void roundTripSerdeWithInvalidPlanIdWithIncorrectStatus() { .withPlanId("somePlanId") .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid response: plan id can only be returned in a 'submitted' status"); + .hasMessage("Invalid response: plan id can only be defined when status is 'submitted'"); String invalidJson = "{\"plan-status\":\"failed\"," + "\"plan-id\":\"somePlanId\"}"; assertThatThrownBy( () -> PlanTableScanResponseParser.fromJson(invalidJson, PARTITION_SPECS_BY_ID, false)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid response: plan id can only be returned in a 'submitted' status"); + .hasMessage("Invalid response: plan id can only be defined when status is 'submitted'"); } @Test From b385d221d84f5f5a9a61d8f8c2d54667b2973ed5 Mon Sep 17 00:00:00 2001 From: Amogh Jahagirdar Date: Fri, 15 Aug 2025 09:14:39 -0600 Subject: [PATCH 8/9] Move rest parsers to rest module, make content file parser public, minor style changes --- .../org/apache/iceberg/ContentFileParser.java | 10 +++--- .../{ => rest}/RESTFileScanTaskParser.java | 17 ++++++---- .../{ => rest}/TableScanResponseParser.java | 9 +++++- .../rest/requests/PlanTableScanRequest.java | 11 ++++--- .../FetchPlanningResultResponseParser.java | 2 +- .../FetchScanTasksResponseParser.java | 2 +- .../PlanTableScanResponseParser.java | 2 +- .../requests/TestPlanTableScanRequest.java | 32 ++++++++++++------- 8 files changed, 53 insertions(+), 32 deletions(-) rename core/src/main/java/org/apache/iceberg/{ => rest}/RESTFileScanTaskParser.java (88%) rename core/src/main/java/org/apache/iceberg/{ => rest}/TableScanResponseParser.java (95%) diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index da9e149ece7d..b48334d8222d 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -27,7 +27,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; -class ContentFileParser { +public class ContentFileParser { private static final String SPEC_ID = "spec-id"; private static final String CONTENT = "content"; private static final String FILE_PATH = "file-path"; @@ -56,12 +56,12 @@ private static boolean hasPartitionData(StructLike partitionData) { return partitionData != null && partitionData.size() > 0; } - static String toJson(ContentFile contentFile, PartitionSpec spec) { + public static String toJson(ContentFile contentFile, PartitionSpec spec) { return JsonUtil.generate( generator -> ContentFileParser.toJson(contentFile, spec, generator), false); } - static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGenerator generator) + public static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGenerator generator) throws IOException { Preconditions.checkArgument(contentFile != null, "Invalid content file: null"); Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); @@ -134,11 +134,11 @@ static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGenerator generator.writeEndObject(); } - static ContentFile fromJson(JsonNode jsonNode, PartitionSpec spec) { + public static ContentFile fromJson(JsonNode jsonNode, PartitionSpec spec) { return fromJson(jsonNode, spec == null ? null : Map.of(spec.specId(), spec)); } - static ContentFile fromJson(JsonNode jsonNode, Map specsById) { + public static ContentFile fromJson(JsonNode jsonNode, Map specsById) { Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for content file: null"); Preconditions.checkArgument( jsonNode.isObject(), "Invalid JSON node for content file: non-object (%s)", jsonNode); diff --git a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java b/core/src/main/java/org/apache/iceberg/rest/RESTFileScanTaskParser.java similarity index 88% rename from core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java rename to core/src/main/java/org/apache/iceberg/rest/RESTFileScanTaskParser.java index a4fe89a1400e..0ada9083eea6 100644 --- a/core/src/main/java/org/apache/iceberg/RESTFileScanTaskParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTFileScanTaskParser.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.iceberg; +package org.apache.iceberg.rest; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; @@ -25,13 +25,21 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.iceberg.BaseFileScanTask; +import org.apache.iceberg.ContentFileParser; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.PartitionSpecParser; +import org.apache.iceberg.SchemaParser; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.ExpressionParser; import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; -public class RESTFileScanTaskParser { +class RESTFileScanTaskParser { private static final String DATA_FILE = "data-file"; private static final String DELETE_FILE_REFERENCES = "delete-file-references"; private static final String RESIDUAL_FILTER = "residual-filter"; @@ -83,10 +91,7 @@ public static FileScanTask fromJson( "Invalid delete file references: %s, expected indices < %s", indices, allDeleteFiles.size()); - deleteFiles = - indices.stream() - .map(index -> (GenericDeleteFile) allDeleteFiles.get(index)) - .toArray(GenericDeleteFile[]::new); + deleteFiles = indices.stream().map(allDeleteFiles::get).toArray(DeleteFile[]::new); } Expression filter = null; diff --git a/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java similarity index 95% rename from core/src/main/java/org/apache/iceberg/TableScanResponseParser.java rename to core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java index e24d18ede5f8..67f71c418440 100644 --- a/core/src/main/java/org/apache/iceberg/TableScanResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/TableScanResponseParser.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.iceberg; +package org.apache.iceberg.rest; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; @@ -24,6 +24,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.iceberg.ContentFileParser; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -49,6 +53,7 @@ public static List parseDeleteFiles( DeleteFile deleteFile = (DeleteFile) ContentFileParser.fromJson(deleteFileNode, specsById); deleteFilesBuilder.add(deleteFile); } + return deleteFilesBuilder.build(); } @@ -94,6 +99,7 @@ public static void serializeScanTasks( deleteFilePathToIndex.put(String.valueOf(deleteFile.path()), i); ContentFileParser.toJson(deleteFiles.get(i), specsById.get(deleteFile.specId()), gen); } + gen.writeEndArray(); } @@ -114,6 +120,7 @@ public static void serializeScanTasks( fileScanTask.file().specId()); RESTFileScanTaskParser.toJson(fileScanTask, deleteFileReferences, spec, gen); } + gen.writeEndArray(); } } diff --git a/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java index 720e5c74e67a..14e14eab4bc7 100644 --- a/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java +++ b/core/src/main/java/org/apache/iceberg/rest/requests/PlanTableScanRequest.java @@ -88,21 +88,22 @@ private PlanTableScanRequest( @Override public void validate() { - if (snapshotId != null || startSnapshotId != null || endSnapshotId != null) { - Preconditions.checkArgument( - snapshotId != null ^ (startSnapshotId != null && endSnapshotId != null), - "Either snapshotId must be provided or both startSnapshotId and endSnapshotId must be provided"); - } + Preconditions.checkArgument( + snapshotId != null ^ (startSnapshotId != null && endSnapshotId != null), + "Either snapshotId must be provided or both startSnapshotId and endSnapshotId must be provided"); } @Override public String toString() { return MoreObjects.toStringHelper(this) .add("snapshotId", snapshotId) + .add("select", select) + .add("filter", filter) .add("caseSensitive", caseSensitive) .add("useSnapshotSchema", useSnapshotSchema) .add("startSnapshotId", startSnapshotId) .add("endSnapshotId", endSnapshotId) + .add("statsFields", statsFields) .toString(); } diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java index b3453adbd6b5..5dcfe2d59a2f 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchPlanningResultResponseParser.java @@ -26,10 +26,10 @@ import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.TableScanResponseParser; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; +import org.apache.iceberg.rest.TableScanResponseParser; import org.apache.iceberg.util.JsonUtil; public class FetchPlanningResultResponseParser { diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java index a3eb2fa029af..f54243b8772e 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/FetchScanTasksResponseParser.java @@ -26,9 +26,9 @@ import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.TableScanResponseParser; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.rest.TableScanResponseParser; import org.apache.iceberg.util.JsonUtil; public class FetchScanTasksResponseParser { diff --git a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java index bdbf3e9b08a4..523770e6cc36 100644 --- a/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java +++ b/core/src/main/java/org/apache/iceberg/rest/responses/PlanTableScanResponseParser.java @@ -27,9 +27,9 @@ import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.TableScanResponseParser; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.rest.PlanStatus; +import org.apache.iceberg.rest.TableScanResponseParser; import org.apache.iceberg.util.JsonUtil; public class PlanTableScanResponseParser { diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java index 1fc1303fa724..c00df15967c5 100644 --- a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java @@ -39,18 +39,6 @@ public void nullAndEmptyCheck() { .hasMessage("Invalid planTableScanRequest: null"); } - @Test - public void roundTripSerdeWithEmptyRequestAndDefaultsPresent() { - PlanTableScanRequest request = new PlanTableScanRequest.Builder().build(); - - String expectedJson = "{" + "\"case-sensitive\":true," + "\"use-snapshot-schema\":false}"; - - String json = PlanTableScanRequestParser.toJson(request, false); - assertThat(json).isEqualTo(expectedJson); - assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) - .isEqualTo(expectedJson); - } - @Test public void roundTripSerdeWithSelectField() { PlanTableScanRequest request = @@ -137,4 +125,24 @@ public void roundTripSerdeWithAllFieldsExceptSnapShotId() { assertThat(PlanTableScanRequestParser.toJson(PlanTableScanRequestParser.fromJson(json), false)) .isEqualTo(expectedJson); } + + @Test + public void testToStringContainsAllFields() { + PlanTableScanRequest request = new PlanTableScanRequest.Builder() + .withSnapshotId(123L) + .withSelect(Lists.newArrayList("colA", "colB")) + .withFilter(Expressions.alwaysTrue()) + .withCaseSensitive(false) + .withUseSnapshotSchema(true) + .withStatsFields(Lists.newArrayList("stat1")) + .build(); + + String str = request.toString(); + assertThat(str).contains("snapshotId=123"); + assertThat(str).contains("select=[colA, colB]"); + assertThat(str).contains("filter=true"); + assertThat(str).contains("caseSensitive=false"); + assertThat(str).contains("useSnapshotSchema=true"); + assertThat(str).contains("statsFields=[stat1]"); + } } From 27d56568b3acbbec50d9bb64179b37c856048419 Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Fri, 15 Aug 2025 11:48:40 -0700 Subject: [PATCH 9/9] style check --- .../apache/iceberg/rest/requests/TestPlanTableScanRequest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java index c00df15967c5..f18928a1a349 100644 --- a/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java +++ b/core/src/test/java/org/apache/iceberg/rest/requests/TestPlanTableScanRequest.java @@ -128,7 +128,8 @@ public void roundTripSerdeWithAllFieldsExceptSnapShotId() { @Test public void testToStringContainsAllFields() { - PlanTableScanRequest request = new PlanTableScanRequest.Builder() + PlanTableScanRequest request = + new PlanTableScanRequest.Builder() .withSnapshotId(123L) .withSelect(Lists.newArrayList("colA", "colB")) .withFilter(Expressions.alwaysTrue())