From a9e74fcc1a1df1a0192e918b357e6fef013b9894 Mon Sep 17 00:00:00 2001 From: geruh Date: Tue, 2 Dec 2025 02:29:41 -0800 Subject: [PATCH 1/6] Core: Align ContentFile Content Field with REST Spec --- .../org/apache/iceberg/ContentFileParser.java | 26 +++++++++- .../apache/iceberg/TestContentFileParser.java | 51 +++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 0033fa97725a..267dbbe00419 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; @@ -84,7 +85,8 @@ public static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGe // as it isn't used and BaseFile constructor doesn't support it. generator.writeNumberField(SPEC_ID, contentFile.specId()); - generator.writeStringField(CONTENT, contentFile.content().name()); + String contentValue = contentFile.content().name().toLowerCase(Locale.ROOT).replace('_', '-'); + generator.writeStringField(CONTENT, contentValue); generator.writeStringField(FILE_PATH, contentFile.location()); generator.writeStringField(FILE_FORMAT, contentFile.format().name()); @@ -147,7 +149,7 @@ public static ContentFile fromJson(JsonNode jsonNode, Map ContentFileParser.fromJson(node, Map.of(0, PartitionSpec.unpartitioned()))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid file content value: 'invalid-content'"); + } + + @ParameterizedTest + @MethodSource("enumContentTypeCases") + public void testEnumContentTypeSerialization(FileContent content, String expectedJsonContent) + throws Exception { + String jsonStr = + "{\"spec-id\":0," + + "\"content\":\"" + + content.name() + + "\"," + + "\"file-path\":\"/path/to/data.parquet\"," + + "\"file-format\":\"PARQUET\"," + + "\"partition\":{}," + + "\"file-size-in-bytes\":1," + + "\"record-count\":1}"; + + JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); + ContentFile deserializedContentFile = + ContentFileParser.fromJson(jsonNode, Map.of(0, PartitionSpec.unpartitioned())); + assertThat(deserializedContentFile.content()).isEqualTo(content); + + String serializedStr = + ContentFileParser.toJson(deserializedContentFile, PartitionSpec.unpartitioned()); + assertThat(serializedStr).contains("\"content\":\"" + expectedJsonContent + "\""); + } + + private static Stream enumContentTypeCases() { + return Stream.of( + Arguments.of(FileContent.DATA, "data"), + Arguments.of(FileContent.POSITION_DELETES, "position-deletes"), + Arguments.of(FileContent.EQUALITY_DELETES, "equality-deletes")); + } + private static Stream provideSpecAndDataFile() { return Stream.of( Arguments.of( From 6f6d4b0b8946904ee8e7bc0bd3f340eddd37d479 Mon Sep 17 00:00:00 2001 From: geruh Date: Tue, 2 Dec 2025 17:17:47 -0800 Subject: [PATCH 2/6] Update the rest of the test --- .../apache/iceberg/TestContentFileParser.java | 30 +++++++++---------- .../apache/iceberg/TestDataTaskParser.java | 6 ++-- .../iceberg/TestFileScanTaskParser.java | 18 +++++------ ...TestFetchPlanningResultResponseParser.java | 6 ++-- .../TestFetchScanTasksResponseParser.java | 6 ++-- .../TestPlanTableScanResponseParser.java | 24 +++++++-------- 6 files changed, 45 insertions(+), 45 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java index e4d5b97715d4..7357edd6022c 100644 --- a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java +++ b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java @@ -109,7 +109,7 @@ public void testDeleteFile(PartitionSpec spec, DeleteFile deleteFile, String exp public void testPartitionJsonArrayWrongSize() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String jsonStr = - "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data.parquet\"," + "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[],\"file-size-in-bytes\":10," + "\"record-count\":1}"; @@ -124,7 +124,7 @@ public void testPartitionJsonArrayWrongSize() throws Exception { public void testPartitionJsonInvalidType() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String jsonStr = - "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data.parquet\"," + "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":\"invalid\",\"file-size-in-bytes\":10," + "\"record-count\":1}"; @@ -139,7 +139,7 @@ public void testPartitionJsonInvalidType() throws Exception { public void testParsesFieldIdPartitionMap() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String legacyJson = - "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data.parquet\"," + "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":\"foo\"},\"file-size-in-bytes\":10," + "\"record-count\":1}"; @@ -155,7 +155,7 @@ public void testParsesFieldIdPartitionMap() throws Exception { public void testPartitionStructObjectContainsExtraField() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String jsonStr = - "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data.parquet\"," + "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":\"foo\",\"9999\":\"bar\"}," + "\"file-size-in-bytes\":10,\"record-count\":1}"; @@ -169,7 +169,7 @@ public void testPartitionStructObjectContainsExtraField() throws Exception { public void testPartitionStructObjectEmptyIsNull() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String jsonStr = - "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data.parquet\"," + "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":{},\"file-size-in-bytes\":10," + "\"record-count\":1}"; @@ -322,17 +322,17 @@ private static DataFile dataFileWithOnlyNanCounts(PartitionSpec spec) { private static String dataFileJsonWithRequiredOnly(PartitionSpec spec) { if (spec.isUnpartitioned()) { - return "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\"," + return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[],\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}"; } else { - return "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\"," + return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[1],\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}"; } } private static String dataFileJsonWithAllOptional(PartitionSpec spec) { if (spec.isUnpartitioned()) { - return "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-with-stats.parquet\"," + return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-with-stats.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[],\"file-size-in-bytes\":350,\"record-count\":10," + "\"column-sizes\":{\"keys\":[3,4],\"values\":[100,200]}," + "\"value-counts\":{\"keys\":[3,4],\"values\":[90,180]}," @@ -343,7 +343,7 @@ private static String dataFileJsonWithAllOptional(PartitionSpec spec) { + "\"key-metadata\":\"00000000000000000000000000000000\"," + "\"split-offsets\":[128,256],\"sort-order-id\":1}"; } else { - return "{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-with-stats.parquet\"," + return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-with-stats.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[1],\"file-size-in-bytes\":350,\"record-count\":10," + "\"column-sizes\":{\"keys\":[3,4],\"values\":[100,200]}," + "\"value-counts\":{\"keys\":[3,4],\"values\":[90,180]}," @@ -439,7 +439,7 @@ private static DeleteFile deleteFileWithDataRef(PartitionSpec spec) { } private static String deleteFileWithDataRefJson() { - return "{\"spec-id\":0,\"content\":\"POSITION_DELETES\",\"file-path\":\"/path/to/delete.parquet\"," + return "{\"spec-id\":0,\"content\":\"position-deletes\",\"file-path\":\"/path/to/delete.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[4],\"file-size-in-bytes\":1234," + "\"record-count\":10,\"referenced-data-file\":\"/path/to/data/file.parquet\"}"; } @@ -465,7 +465,7 @@ private static DeleteFile dv(PartitionSpec spec) { } private static String dvJson() { - return "{\"spec-id\":0,\"content\":\"POSITION_DELETES\",\"file-path\":\"/path/to/delete.puffin\"," + return "{\"spec-id\":0,\"content\":\"position-deletes\",\"file-path\":\"/path/to/delete.puffin\"," + "\"file-format\":\"PUFFIN\",\"partition\":[4],\"file-size-in-bytes\":1234,\"record-count\":10," + "\"referenced-data-file\":\"/path/to/data/file.parquet\",\"content-offset\":4,\"content-size-in-bytes\":40}"; } @@ -538,17 +538,17 @@ private static DeleteFile deleteFileWithAllOptional(PartitionSpec spec) { private static String deleteFileJsonWithRequiredOnly(PartitionSpec spec) { if (spec.isUnpartitioned()) { - return "{\"spec-id\":0,\"content\":\"POSITION_DELETES\",\"file-path\":\"/path/to/delete-a.parquet\"," + return "{\"spec-id\":0,\"content\":\"position-deletes\",\"file-path\":\"/path/to/delete-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[],\"file-size-in-bytes\":1234,\"record-count\":9}"; } else { - return "{\"spec-id\":0,\"content\":\"POSITION_DELETES\",\"file-path\":\"/path/to/delete-a.parquet\"," + return "{\"spec-id\":0,\"content\":\"position-deletes\",\"file-path\":\"/path/to/delete-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[9],\"file-size-in-bytes\":1234,\"record-count\":9}"; } } private static String deleteFileJsonWithAllOptional(PartitionSpec spec) { if (spec.isUnpartitioned()) { - return "{\"spec-id\":0,\"content\":\"EQUALITY_DELETES\",\"file-path\":\"/path/to/delete-with-stats.parquet\"," + return "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/delete-with-stats.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[],\"file-size-in-bytes\":1234,\"record-count\":10," + "\"column-sizes\":{\"keys\":[3,4],\"values\":[100,200]}," + "\"value-counts\":{\"keys\":[3,4],\"values\":[90,180]}," @@ -559,7 +559,7 @@ private static String deleteFileJsonWithAllOptional(PartitionSpec spec) { + "\"key-metadata\":\"00000000000000000000000000000000\"," + "\"split-offsets\":[128],\"equality-ids\":[3],\"sort-order-id\":1}"; } else { - return "{\"spec-id\":0,\"content\":\"EQUALITY_DELETES\",\"file-path\":\"/path/to/delete-with-stats.parquet\"," + return "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/delete-with-stats.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[9],\"file-size-in-bytes\":1234,\"record-count\":10," + "\"column-sizes\":{\"keys\":[3,4],\"values\":[100,200]}," + "\"value-counts\":{\"keys\":[3,4],\"values\":[90,180]}," diff --git a/core/src/test/java/org/apache/iceberg/TestDataTaskParser.java b/core/src/test/java/org/apache/iceberg/TestDataTaskParser.java index 758c11db788a..7359b3ebdb75 100644 --- a/core/src/test/java/org/apache/iceberg/TestDataTaskParser.java +++ b/core/src/test/java/org/apache/iceberg/TestDataTaskParser.java @@ -144,7 +144,7 @@ public void missingFields() throws Exception { + "{\"id\":6,\"name\":\"summary\",\"required\":false,\"type\":{\"type\":\"map\"," + "\"key-id\":7,\"key\":\"string\",\"value-id\":8," + "\"value\":\"string\",\"value-required\":true}}]}," - + "\"metadata-file\":{\"spec-id\":0,\"content\":\"DATA\"," + + "\"metadata-file\":{\"spec-id\":0,\"content\":\"data\"," + "\"file-path\":\"/tmp/metadata2.json\"," + "\"file-format\":\"METADATA\",\"partition\":[]," + "\"file-size-in-bytes\":0,\"record-count\":2,\"sort-order-id\":0}" @@ -172,7 +172,7 @@ public void testDataTaskParsesFieldIdPartitionMap() { + "\"fields\":[{\"id\":1,\"name\":\"committed_at\",\"required\":true,\"type\":\"timestamptz\"}]}," + "\"projection\":{\"type\":\"struct\",\"schema-id\":0," + "\"fields\":[{\"id\":1,\"name\":\"committed_at\",\"required\":true,\"type\":\"timestamptz\"}]}," - + "\"metadata-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/tmp/metadata.json\"," + + "\"metadata-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/tmp/metadata.json\"," + "\"file-format\":\"METADATA\",\"partition\":{},\"file-size-in-bytes\":0,\"record-count\":1,\"sort-order-id\":0}," + "\"rows\":[{\"1\":\"2009-02-13T23:31:30+00:00\"}]}"; @@ -263,7 +263,7 @@ private String snapshotsDataTaskJson() { + "{\"id\":6,\"name\":\"summary\",\"required\":false,\"type\":{\"type\":\"map\"," + "\"key-id\":7,\"key\":\"string\",\"value-id\":8," + "\"value\":\"string\",\"value-required\":true}}]}," - + "\"metadata-file\":{\"spec-id\":0,\"content\":\"DATA\"," + + "\"metadata-file\":{\"spec-id\":0,\"content\":\"data\"," + "\"file-path\":\"/tmp/metadata2.json\"," + "\"file-format\":\"METADATA\",\"partition\":[]," + "\"file-size-in-bytes\":0,\"record-count\":2,\"sort-order-id\":0}," diff --git a/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java b/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java index 882c2b33496d..9c1cd744b2cd 100644 --- a/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java +++ b/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java @@ -96,14 +96,14 @@ private String fileScanTaskJsonWithoutTaskType() { + "{\"id\":4,\"name\":\"data\",\"required\":true,\"type\":\"string\"}]}," + "\"spec\":{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}," - + "\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"start\":0,\"length\":10," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}," - + "{\"spec-id\":0,\"content\":\"EQUALITY_DELETES\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," + + "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[0],\"file-size-in-bytes\":10," + "\"record-count\":1,\"equality-ids\":[1],\"sort-order-id\":0}]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}"; @@ -116,14 +116,14 @@ private String fileScanTaskJson() { + "{\"id\":4,\"name\":\"data\",\"required\":true,\"type\":\"string\"}]}," + "\"spec\":{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}," - + "\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"start\":0,\"length\":10," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}," - + "{\"spec-id\":0,\"content\":\"EQUALITY_DELETES\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," + + "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[0],\"file-size-in-bytes\":10," + "\"record-count\":1,\"equality-ids\":[1],\"sort-order-id\":0}]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}"; @@ -136,14 +136,14 @@ private String fileScanTaskFieldIdPartitionMapJson() { + "{\"id\":4,\"name\":\"data\",\"required\":true,\"type\":\"string\"}]}," + "\"spec\":{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}," - + "\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0}," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"start\":0,\"length\":10," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}," - + "{\"spec-id\":0,\"content\":\"EQUALITY_DELETES\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," + + "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0},\"file-size-in-bytes\":10," + "\"record-count\":1,\"equality-ids\":[1],\"sort-order-id\":0}]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}"; diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java index 8b82e9794d5d..dba45e9935ed 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java @@ -155,7 +155,7 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileSca String invalidJson = "{\"status\":\"submitted\"," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; @@ -193,11 +193,11 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() throws JsonProcessin String expectedToJson = "{\"status\":\"completed\"," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]," + "\"file-scan-tasks\":[" - + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "{\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"delete-file-references\":[0]," diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java index 62de75e3818b..dfc1bb9f5172 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java @@ -95,7 +95,7 @@ public void roundTripSerdeWithDeleteFilesNoFileScanTasksPresent() { String invalidJson = "{\"plan-tasks\":[\"task1\",\"task2\"]," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; @@ -129,11 +129,11 @@ public void roundTripSerdeWithFileScanTasks() { String expectedToJson = "{" - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]," + "\"file-scan-tasks\":[" - + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "{\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"delete-file-references\":[0]," diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java index a59bbbd16b45..449b4e36fc0f 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java @@ -210,7 +210,7 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileSca String invalidJson = "{\"status\":\"submitted\"," + "\"plan-id\":\"somePlanId\"," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; @@ -244,11 +244,11 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() { String expectedToJson = "{\"status\":\"completed\"," - + "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\"," + + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]," + "\"file-scan-tasks\":[" - + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "{\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"delete-file-references\":[0]," @@ -316,7 +316,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"status\" : \"completed\",\n" + " \"delete-files\" : [ {\n" + " \"spec-id\" : 0,\n" - + " \"content\" : \"POSITION_DELETES\",\n" + + " \"content\" : \"position-deletes\",\n" + " \"file-path\" : \"/path/to/data-a-deletes.parquet\",\n" + " \"file-format\" : \"PARQUET\",\n" + " \"partition\" : [ 0 ],\n" @@ -324,7 +324,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"record-count\" : 1\n" + " }, {\n" + " \"spec-id\" : 0,\n" - + " \"content\" : \"POSITION_DELETES\",\n" + + " \"content\" : \"position-deletes\",\n" + " \"file-path\" : \"/path/to/data-b-deletes.parquet\",\n" + " \"file-format\" : \"PARQUET\",\n" + " \"partition\" : [ 1 ],\n" @@ -332,7 +332,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"record-count\" : 1\n" + " }, {\n" + " \"spec-id\" : 0,\n" - + " \"content\" : \"EQUALITY_DELETES\",\n" + + " \"content\" : \"equality-deletes\",\n" + " \"file-path\" : \"/path/to/data-c-deletes.parquet\",\n" + " \"file-format\" : \"PARQUET\",\n" + " \"partition\" : [ 2 ],\n" @@ -344,7 +344,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"file-scan-tasks\" : [ {\n" + " \"data-file\" : {\n" + " \"spec-id\" : 0,\n" - + " \"content\" : \"DATA\",\n" + + " \"content\" : \"data\",\n" + " \"file-path\" : \"/path/to/data-a.parquet\",\n" + " \"file-format\" : \"PARQUET\",\n" + " \"partition\" : [ 0 ],\n" @@ -357,7 +357,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " }, {\n" + " \"data-file\" : {\n" + " \"spec-id\" : 0,\n" - + " \"content\" : \"DATA\",\n" + + " \"content\" : \"data\",\n" + " \"file-path\" : \"/path/to/data-b.parquet\",\n" + " \"file-format\" : \"PARQUET\",\n" + " \"partition\" : [ 1 ],\n" @@ -371,7 +371,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " }, {\n" + " \"data-file\" : {\n" + " \"spec-id\" : 0,\n" - + " \"content\" : \"DATA\",\n" + + " \"content\" : \"data\",\n" + " \"file-path\" : \"/path/to/data-c.parquet\",\n" + " \"file-format\" : \"PARQUET\",\n" + " \"partition\" : [ 2 ],\n" @@ -409,7 +409,7 @@ public void roundTripSerdeWithoutDeleteFiles() { String expectedJson = "{\"status\":\"completed\"," + "\"file-scan-tasks\":[" - + "{\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\"," + + "{\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," + "\"file-format\":\"PARQUET\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" @@ -608,7 +608,7 @@ public void roundTripSerdeWithValidStatusAndFileScanTasksAndCredentials() { + " } ],\n" + " \"delete-files\" : [ {\n" + " \"spec-id\" : 0,\n" - + " \"content\" : \"POSITION_DELETES\",\n" + + " \"content\" : \"position-deletes\",\n" + " \"file-path\" : \"/path/to/data-a-deletes.parquet\",\n" + " \"file-format\" : \"PARQUET\",\n" + " \"partition\" : [ 0 ],\n" @@ -618,7 +618,7 @@ public void roundTripSerdeWithValidStatusAndFileScanTasksAndCredentials() { + " \"file-scan-tasks\" : [ {\n" + " \"data-file\" : {\n" + " \"spec-id\" : 0,\n" - + " \"content\" : \"DATA\",\n" + + " \"content\" : \"data\",\n" + " \"file-path\" : \"/path/to/data-a.parquet\",\n" + " \"file-format\" : \"PARQUET\",\n" + " \"partition\" : [ 0 ],\n" From 3afd321d81c1935aec3be2bf4924930a281780b3 Mon Sep 17 00:00:00 2001 From: geruh Date: Tue, 2 Dec 2025 22:55:14 -0800 Subject: [PATCH 3/6] align file format with rest spec --- .../org/apache/iceberg/ContentFileParser.java | 7 ++-- .../apache/iceberg/TestContentFileParser.java | 34 +++++++++---------- .../apache/iceberg/TestDataTaskParser.java | 6 ++-- .../iceberg/TestFileScanTaskParser.java | 18 +++++----- ...TestFetchPlanningResultResponseParser.java | 6 ++-- .../TestFetchScanTasksResponseParser.java | 6 ++-- .../TestPlanTableScanResponseParser.java | 24 ++++++------- 7 files changed, 52 insertions(+), 49 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 267dbbe00419..982e97586554 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -85,10 +85,13 @@ public static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGe // as it isn't used and BaseFile constructor doesn't support it. generator.writeNumberField(SPEC_ID, contentFile.specId()); + // Since 1.11, we serialize content as lowercase kebab-case values like "equality-deletes" String contentValue = contentFile.content().name().toLowerCase(Locale.ROOT).replace('_', '-'); generator.writeStringField(CONTENT, contentValue); generator.writeStringField(FILE_PATH, contentFile.location()); - generator.writeStringField(FILE_FORMAT, contentFile.format().name()); + // Since 1.11, we serialize format as lower-case strings (e.g., "parquet") + String formatValue = contentFile.format().name().toLowerCase(Locale.ROOT); + generator.writeStringField(FILE_FORMAT, formatValue); if (contentFile.partition() != null) { generator.writeFieldName(PARTITION); @@ -358,7 +361,7 @@ private static FileContent fileContentFromJson(String content) { case "equality-deletes": return FileContent.EQUALITY_DELETES; default: - // Otherwise, fall back to the enum name + // In 1.10 and before, file content is serialized as the FileContent enum value try { return FileContent.valueOf(content); } catch (IllegalArgumentException e) { diff --git a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java index 7357edd6022c..b141eeb766fe 100644 --- a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java +++ b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java @@ -110,7 +110,7 @@ public void testPartitionJsonArrayWrongSize() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String jsonStr = "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[],\"file-size-in-bytes\":10," + + "\"file-format\":\"parquet\",\"partition\":[],\"file-size-in-bytes\":10," + "\"record-count\":1}"; JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); @@ -125,7 +125,7 @@ public void testPartitionJsonInvalidType() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String jsonStr = "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":\"invalid\",\"file-size-in-bytes\":10," + + "\"file-format\":\"parquet\",\"partition\":\"invalid\",\"file-size-in-bytes\":10," + "\"record-count\":1}"; JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); @@ -140,7 +140,7 @@ public void testParsesFieldIdPartitionMap() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String legacyJson = "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":\"foo\"},\"file-size-in-bytes\":10," + + "\"file-format\":\"parquet\",\"partition\":{\"1000\":\"foo\"},\"file-size-in-bytes\":10," + "\"record-count\":1}"; JsonNode jsonNode = JsonUtil.mapper().readTree(legacyJson); @@ -156,7 +156,7 @@ public void testPartitionStructObjectContainsExtraField() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String jsonStr = "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":\"foo\",\"9999\":\"bar\"}," + + "\"file-format\":\"parquet\",\"partition\":{\"1000\":\"foo\",\"9999\":\"bar\"}," + "\"file-size-in-bytes\":10,\"record-count\":1}"; JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); @@ -170,7 +170,7 @@ public void testPartitionStructObjectEmptyIsNull() throws Exception { PartitionSpec spec = PartitionSpec.builderFor(TestBase.SCHEMA).identity("data").build(); String jsonStr = "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":{},\"file-size-in-bytes\":10," + + "\"file-format\":\"parquet\",\"partition\":{},\"file-size-in-bytes\":10," + "\"record-count\":1}"; JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); @@ -216,7 +216,7 @@ public void testInvalidContentType() throws Exception { "{\"spec-id\":0," + "\"content\":\"invalid-content\"," + "\"file-path\":\"/path/to/file.parquet\"," - + "\"file-format\":\"PARQUET\"," + + "\"file-format\":\"parquet\"," + "\"partition\":{}," + "\"file-size-in-bytes\":1," + "\"record-count\":1}"; @@ -239,7 +239,7 @@ public void testEnumContentTypeSerialization(FileContent content, String expecte + content.name() + "\"," + "\"file-path\":\"/path/to/data.parquet\"," - + "\"file-format\":\"PARQUET\"," + + "\"file-format\":\"parquet\"," + "\"partition\":{}," + "\"file-size-in-bytes\":1," + "\"record-count\":1}"; @@ -322,10 +322,10 @@ private static DataFile dataFileWithOnlyNanCounts(PartitionSpec spec) { private static String dataFileJsonWithRequiredOnly(PartitionSpec spec) { if (spec.isUnpartitioned()) { - return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\"," + return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[],\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}"; } else { - return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\"," + return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[1],\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}"; } } @@ -333,7 +333,7 @@ private static String dataFileJsonWithRequiredOnly(PartitionSpec spec) { private static String dataFileJsonWithAllOptional(PartitionSpec spec) { if (spec.isUnpartitioned()) { return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-with-stats.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[],\"file-size-in-bytes\":350,\"record-count\":10," + + "\"file-format\":\"parquet\",\"partition\":[],\"file-size-in-bytes\":350,\"record-count\":10," + "\"column-sizes\":{\"keys\":[3,4],\"values\":[100,200]}," + "\"value-counts\":{\"keys\":[3,4],\"values\":[90,180]}," + "\"null-value-counts\":{\"keys\":[3,4],\"values\":[10,20]}," @@ -344,7 +344,7 @@ private static String dataFileJsonWithAllOptional(PartitionSpec spec) { + "\"split-offsets\":[128,256],\"sort-order-id\":1}"; } else { return "{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-with-stats.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[1],\"file-size-in-bytes\":350,\"record-count\":10," + + "\"file-format\":\"parquet\",\"partition\":[1],\"file-size-in-bytes\":350,\"record-count\":10," + "\"column-sizes\":{\"keys\":[3,4],\"values\":[100,200]}," + "\"value-counts\":{\"keys\":[3,4],\"values\":[90,180]}," + "\"null-value-counts\":{\"keys\":[3,4],\"values\":[10,20]}," @@ -440,7 +440,7 @@ private static DeleteFile deleteFileWithDataRef(PartitionSpec spec) { private static String deleteFileWithDataRefJson() { return "{\"spec-id\":0,\"content\":\"position-deletes\",\"file-path\":\"/path/to/delete.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[4],\"file-size-in-bytes\":1234," + + "\"file-format\":\"parquet\",\"partition\":[4],\"file-size-in-bytes\":1234," + "\"record-count\":10,\"referenced-data-file\":\"/path/to/data/file.parquet\"}"; } @@ -466,7 +466,7 @@ private static DeleteFile dv(PartitionSpec spec) { private static String dvJson() { return "{\"spec-id\":0,\"content\":\"position-deletes\",\"file-path\":\"/path/to/delete.puffin\"," - + "\"file-format\":\"PUFFIN\",\"partition\":[4],\"file-size-in-bytes\":1234,\"record-count\":10," + + "\"file-format\":\"puffin\",\"partition\":[4],\"file-size-in-bytes\":1234,\"record-count\":10," + "\"referenced-data-file\":\"/path/to/data/file.parquet\",\"content-offset\":4,\"content-size-in-bytes\":40}"; } @@ -539,17 +539,17 @@ private static DeleteFile deleteFileWithAllOptional(PartitionSpec spec) { private static String deleteFileJsonWithRequiredOnly(PartitionSpec spec) { if (spec.isUnpartitioned()) { return "{\"spec-id\":0,\"content\":\"position-deletes\",\"file-path\":\"/path/to/delete-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[],\"file-size-in-bytes\":1234,\"record-count\":9}"; + + "\"file-format\":\"parquet\",\"partition\":[],\"file-size-in-bytes\":1234,\"record-count\":9}"; } else { return "{\"spec-id\":0,\"content\":\"position-deletes\",\"file-path\":\"/path/to/delete-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[9],\"file-size-in-bytes\":1234,\"record-count\":9}"; + + "\"file-format\":\"parquet\",\"partition\":[9],\"file-size-in-bytes\":1234,\"record-count\":9}"; } } private static String deleteFileJsonWithAllOptional(PartitionSpec spec) { if (spec.isUnpartitioned()) { return "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/delete-with-stats.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[],\"file-size-in-bytes\":1234,\"record-count\":10," + + "\"file-format\":\"parquet\",\"partition\":[],\"file-size-in-bytes\":1234,\"record-count\":10," + "\"column-sizes\":{\"keys\":[3,4],\"values\":[100,200]}," + "\"value-counts\":{\"keys\":[3,4],\"values\":[90,180]}," + "\"null-value-counts\":{\"keys\":[3,4],\"values\":[10,20]}," @@ -560,7 +560,7 @@ private static String deleteFileJsonWithAllOptional(PartitionSpec spec) { + "\"split-offsets\":[128],\"equality-ids\":[3],\"sort-order-id\":1}"; } else { return "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/delete-with-stats.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[9],\"file-size-in-bytes\":1234,\"record-count\":10," + + "\"file-format\":\"parquet\",\"partition\":[9],\"file-size-in-bytes\":1234,\"record-count\":10," + "\"column-sizes\":{\"keys\":[3,4],\"values\":[100,200]}," + "\"value-counts\":{\"keys\":[3,4],\"values\":[90,180]}," + "\"null-value-counts\":{\"keys\":[3,4],\"values\":[10,20]}," diff --git a/core/src/test/java/org/apache/iceberg/TestDataTaskParser.java b/core/src/test/java/org/apache/iceberg/TestDataTaskParser.java index 7359b3ebdb75..03065abe8744 100644 --- a/core/src/test/java/org/apache/iceberg/TestDataTaskParser.java +++ b/core/src/test/java/org/apache/iceberg/TestDataTaskParser.java @@ -146,7 +146,7 @@ public void missingFields() throws Exception { + "\"value\":\"string\",\"value-required\":true}}]}," + "\"metadata-file\":{\"spec-id\":0,\"content\":\"data\"," + "\"file-path\":\"/tmp/metadata2.json\"," - + "\"file-format\":\"METADATA\",\"partition\":[]," + + "\"file-format\":\"metadata\",\"partition\":[]," + "\"file-size-in-bytes\":0,\"record-count\":2,\"sort-order-id\":0}" + "}"; JsonNode missingTableRowsNode = mapper.reader().readTree(missingTableRowsStr); @@ -173,7 +173,7 @@ public void testDataTaskParsesFieldIdPartitionMap() { + "\"projection\":{\"type\":\"struct\",\"schema-id\":0," + "\"fields\":[{\"id\":1,\"name\":\"committed_at\",\"required\":true,\"type\":\"timestamptz\"}]}," + "\"metadata-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/tmp/metadata.json\"," - + "\"file-format\":\"METADATA\",\"partition\":{},\"file-size-in-bytes\":0,\"record-count\":1,\"sort-order-id\":0}," + + "\"file-format\":\"metadata\",\"partition\":{},\"file-size-in-bytes\":0,\"record-count\":1,\"sort-order-id\":0}," + "\"rows\":[{\"1\":\"2009-02-13T23:31:30+00:00\"}]}"; StaticDataTask deserializedTask = (StaticDataTask) ScanTaskParser.fromJson(jsonStr, true); @@ -265,7 +265,7 @@ private String snapshotsDataTaskJson() { + "\"value\":\"string\",\"value-required\":true}}]}," + "\"metadata-file\":{\"spec-id\":0,\"content\":\"data\"," + "\"file-path\":\"/tmp/metadata2.json\"," - + "\"file-format\":\"METADATA\",\"partition\":[]," + + "\"file-format\":\"metadata\",\"partition\":[]," + "\"file-size-in-bytes\":0,\"record-count\":2,\"sort-order-id\":0}," + "\"rows\":[{\"1\":\"2009-02-13T23:31:30+00:00\",\"2\":1,\"4\":\"append\"," + "\"5\":\"file:/tmp/manifest1.avro\"," diff --git a/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java b/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java index 9c1cd744b2cd..1eff487d86cf 100644 --- a/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java +++ b/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java @@ -97,14 +97,14 @@ private String fileScanTaskJsonWithoutTaskType() { + "\"spec\":{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}," + "\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[0]," + + "\"file-format\":\"parquet\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"start\":0,\"length\":10," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}," + "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[0],\"file-size-in-bytes\":10," + + "\"file-format\":\"parquet\",\"partition\":[0],\"file-size-in-bytes\":10," + "\"record-count\":1,\"equality-ids\":[1],\"sort-order-id\":0}]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}"; } @@ -117,14 +117,14 @@ private String fileScanTaskJson() { + "\"spec\":{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}," + "\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[0]," + + "\"file-format\":\"parquet\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"start\":0,\"length\":10," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}," + "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[0],\"file-size-in-bytes\":10," + + "\"file-format\":\"parquet\",\"partition\":[0],\"file-size-in-bytes\":10," + "\"record-count\":1,\"equality-ids\":[1],\"sort-order-id\":0}]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}"; } @@ -137,14 +137,14 @@ private String fileScanTaskFieldIdPartitionMapJson() { + "\"spec\":{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\"," + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}," + "\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0}," + + "\"file-format\":\"parquet\",\"partition\":{\"1000\":0}," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"start\":0,\"length\":10," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1}," + "{\"spec-id\":0,\"content\":\"equality-deletes\",\"file-path\":\"/path/to/data-a2-deletes.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0},\"file-size-in-bytes\":10," + + "\"file-format\":\"parquet\",\"partition\":{\"1000\":0},\"file-size-in-bytes\":10," + "\"record-count\":1,\"equality-ids\":[1],\"sort-order-id\":0}]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}"; } diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java index dba45e9935ed..7c1021d4afab 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchPlanningResultResponseParser.java @@ -156,7 +156,7 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileSca String invalidJson = "{\"status\":\"submitted\"," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; @@ -194,11 +194,11 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() throws JsonProcessin String expectedToJson = "{\"status\":\"completed\"," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]," + "\"file-scan-tasks\":[" + "{\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[0]," + + "\"file-format\":\"parquet\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"delete-file-references\":[0]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java index dfc1bb9f5172..d7824bc6a673 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestFetchScanTasksResponseParser.java @@ -96,7 +96,7 @@ public void roundTripSerdeWithDeleteFilesNoFileScanTasksPresent() { String invalidJson = "{\"plan-tasks\":[\"task1\",\"task2\"]," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; @@ -130,11 +130,11 @@ public void roundTripSerdeWithFileScanTasks() { String expectedToJson = "{" + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]," + "\"file-scan-tasks\":[" + "{\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[0]," + + "\"file-format\":\"parquet\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"delete-file-references\":[0]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" diff --git a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java index 449b4e36fc0f..e2c9f21dabba 100644 --- a/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java +++ b/core/src/test/java/org/apache/iceberg/rest/responses/TestPlanTableScanResponseParser.java @@ -211,7 +211,7 @@ public void roundTripSerdeWithInvalidPlanStatusSubmittedWithDeleteFilesNoFileSca "{\"status\":\"submitted\"," + "\"plan-id\":\"somePlanId\"," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]" + "}"; @@ -245,11 +245,11 @@ public void roundTripSerdeWithValidStatusAndFileScanTasks() { String expectedToJson = "{\"status\":\"completed\"," + "\"delete-files\":[{\"spec-id\":0,\"content\":\"position-deletes\"," - + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\"," + + "\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"parquet\"," + "\"partition\":[0],\"file-size-in-bytes\":10,\"record-count\":1}]," + "\"file-scan-tasks\":[" + "{\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[0]," + + "\"file-format\":\"parquet\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"delete-file-references\":[0]," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" @@ -318,7 +318,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"spec-id\" : 0,\n" + " \"content\" : \"position-deletes\",\n" + " \"file-path\" : \"/path/to/data-a-deletes.parquet\",\n" - + " \"file-format\" : \"PARQUET\",\n" + + " \"file-format\" : \"parquet\",\n" + " \"partition\" : [ 0 ],\n" + " \"file-size-in-bytes\" : 10,\n" + " \"record-count\" : 1\n" @@ -326,7 +326,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"spec-id\" : 0,\n" + " \"content\" : \"position-deletes\",\n" + " \"file-path\" : \"/path/to/data-b-deletes.parquet\",\n" - + " \"file-format\" : \"PARQUET\",\n" + + " \"file-format\" : \"parquet\",\n" + " \"partition\" : [ 1 ],\n" + " \"file-size-in-bytes\" : 10,\n" + " \"record-count\" : 1\n" @@ -334,7 +334,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"spec-id\" : 0,\n" + " \"content\" : \"equality-deletes\",\n" + " \"file-path\" : \"/path/to/data-c-deletes.parquet\",\n" - + " \"file-format\" : \"PARQUET\",\n" + + " \"file-format\" : \"parquet\",\n" + " \"partition\" : [ 2 ],\n" + " \"file-size-in-bytes\" : 10,\n" + " \"record-count\" : 1,\n" @@ -346,7 +346,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"spec-id\" : 0,\n" + " \"content\" : \"data\",\n" + " \"file-path\" : \"/path/to/data-a.parquet\",\n" - + " \"file-format\" : \"PARQUET\",\n" + + " \"file-format\" : \"parquet\",\n" + " \"partition\" : [ 0 ],\n" + " \"file-size-in-bytes\" : 10,\n" + " \"record-count\" : 1,\n" @@ -359,7 +359,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"spec-id\" : 0,\n" + " \"content\" : \"data\",\n" + " \"file-path\" : \"/path/to/data-b.parquet\",\n" - + " \"file-format\" : \"PARQUET\",\n" + + " \"file-format\" : \"parquet\",\n" + " \"partition\" : [ 1 ],\n" + " \"file-size-in-bytes\" : 10,\n" + " \"record-count\" : 1,\n" @@ -373,7 +373,7 @@ public void multipleTasksWithDifferentDeleteFilesDontAccumulateReferences() { + " \"spec-id\" : 0,\n" + " \"content\" : \"data\",\n" + " \"file-path\" : \"/path/to/data-c.parquet\",\n" - + " \"file-format\" : \"PARQUET\",\n" + + " \"file-format\" : \"parquet\",\n" + " \"partition\" : [ 2 ],\n" + " \"file-size-in-bytes\" : 10,\n" + " \"record-count\" : 1,\n" @@ -410,7 +410,7 @@ public void roundTripSerdeWithoutDeleteFiles() { "{\"status\":\"completed\"," + "\"file-scan-tasks\":[" + "{\"data-file\":{\"spec-id\":0,\"content\":\"data\",\"file-path\":\"/path/to/data-a.parquet\"," - + "\"file-format\":\"PARQUET\",\"partition\":[0]," + + "\"file-format\":\"parquet\",\"partition\":[0]," + "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0}," + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1}}]" + "}"; @@ -610,7 +610,7 @@ public void roundTripSerdeWithValidStatusAndFileScanTasksAndCredentials() { + " \"spec-id\" : 0,\n" + " \"content\" : \"position-deletes\",\n" + " \"file-path\" : \"/path/to/data-a-deletes.parquet\",\n" - + " \"file-format\" : \"PARQUET\",\n" + + " \"file-format\" : \"parquet\",\n" + " \"partition\" : [ 0 ],\n" + " \"file-size-in-bytes\" : 10,\n" + " \"record-count\" : 1\n" @@ -620,7 +620,7 @@ public void roundTripSerdeWithValidStatusAndFileScanTasksAndCredentials() { + " \"spec-id\" : 0,\n" + " \"content\" : \"data\",\n" + " \"file-path\" : \"/path/to/data-a.parquet\",\n" - + " \"file-format\" : \"PARQUET\",\n" + + " \"file-format\" : \"parquet\",\n" + " \"partition\" : [ 0 ],\n" + " \"file-size-in-bytes\" : 10,\n" + " \"record-count\" : 1,\n" From d1cf7fcf1b70d6bed6bb02f87d9466b48c0f9567 Mon Sep 17 00:00:00 2001 From: geruh Date: Tue, 2 Dec 2025 22:56:04 -0800 Subject: [PATCH 4/6] take out e.g. --- core/src/main/java/org/apache/iceberg/ContentFileParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 982e97586554..83d8f0f64d38 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -89,7 +89,7 @@ public static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGe String contentValue = contentFile.content().name().toLowerCase(Locale.ROOT).replace('_', '-'); generator.writeStringField(CONTENT, contentValue); generator.writeStringField(FILE_PATH, contentFile.location()); - // Since 1.11, we serialize format as lower-case strings (e.g., "parquet") + // Since 1.11, we serialize format as lower-case strings like "parquet" String formatValue = contentFile.format().name().toLowerCase(Locale.ROOT); generator.writeStringField(FILE_FORMAT, formatValue); From 309ff17cf2a4a1ceb22e04f583d454c22694bcae Mon Sep 17 00:00:00 2001 From: geruh Date: Wed, 3 Dec 2025 12:31:26 -0800 Subject: [PATCH 5/6] address comments --- .../org/apache/iceberg/ContentFileParser.java | 17 +++++++++-------- .../apache/iceberg/TestContentFileParser.java | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 83d8f0f64d38..0499fef6d90a 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -51,6 +51,9 @@ public class ContentFileParser { private static final String REFERENCED_DATA_FILE = "referenced-data-file"; private static final String CONTENT_OFFSET = "content-offset"; private static final String CONTENT_SIZE = "content-size-in-bytes"; + private static final String CONTENT_DATA = "data"; + private static final String CONTENT_POSITION_DELETES = "position-deletes"; + private static final String CONTENT_EQUALITY_DELETES = "equality-deletes"; private ContentFileParser() {} @@ -86,12 +89,11 @@ public static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGe generator.writeNumberField(SPEC_ID, contentFile.specId()); // Since 1.11, we serialize content as lowercase kebab-case values like "equality-deletes" - String contentValue = contentFile.content().name().toLowerCase(Locale.ROOT).replace('_', '-'); - generator.writeStringField(CONTENT, contentValue); + generator.writeStringField( + CONTENT, contentFile.content().name().toLowerCase(Locale.ROOT).replace('_', '-')); generator.writeStringField(FILE_PATH, contentFile.location()); // Since 1.11, we serialize format as lower-case strings like "parquet" - String formatValue = contentFile.format().name().toLowerCase(Locale.ROOT); - generator.writeStringField(FILE_FORMAT, formatValue); + generator.writeStringField(FILE_FORMAT, contentFile.format().name().toLowerCase(Locale.ROOT)); if (contentFile.partition() != null) { generator.writeFieldName(PARTITION); @@ -352,13 +354,12 @@ private static PartitionData partitionFromJson( } private static FileContent fileContentFromJson(String content) { - Preconditions.checkArgument(content != null, "Invalid file content: null"); switch (content) { - case "data": + case CONTENT_DATA: return FileContent.DATA; - case "position-deletes": + case CONTENT_POSITION_DELETES: return FileContent.POSITION_DELETES; - case "equality-deletes": + case CONTENT_EQUALITY_DELETES: return FileContent.EQUALITY_DELETES; default: // In 1.10 and before, file content is serialized as the FileContent enum value diff --git a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java index b141eeb766fe..6fe4f571c871 100644 --- a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java +++ b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java @@ -229,6 +229,23 @@ public void testInvalidContentType() throws Exception { .hasMessage("Invalid file content value: 'invalid-content'"); } + @Test + public void testUppercaseFileFormat() throws Exception { + String jsonStr = + "{\"spec-id\":0," + + "\"content\":\"data\"," + + "\"file-path\":\"/path/to/file.parquet\"," + + "\"file-format\":\"PARQUET\"," + + "\"partition\":{}," + + "\"file-size-in-bytes\":1," + + "\"record-count\":1}"; + + JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr); + ContentFile deserializedContentFile = + ContentFileParser.fromJson(jsonNode, Map.of(0, PartitionSpec.unpartitioned())); + assertThat(deserializedContentFile.format()).isEqualTo(FileFormat.PARQUET); + } + @ParameterizedTest @MethodSource("enumContentTypeCases") public void testEnumContentTypeSerialization(FileContent content, String expectedJsonContent) From 2a30899b340060f2da6224f793cabcf950df2965 Mon Sep 17 00:00:00 2001 From: geruh Date: Thu, 4 Dec 2025 00:05:00 -0800 Subject: [PATCH 6/6] use locale english --- core/src/main/java/org/apache/iceberg/ContentFileParser.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java b/core/src/main/java/org/apache/iceberg/ContentFileParser.java index 0499fef6d90a..f024a24b18ce 100644 --- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java +++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java @@ -90,10 +90,11 @@ public static void toJson(ContentFile contentFile, PartitionSpec spec, JsonGe generator.writeNumberField(SPEC_ID, contentFile.specId()); // Since 1.11, we serialize content as lowercase kebab-case values like "equality-deletes" generator.writeStringField( - CONTENT, contentFile.content().name().toLowerCase(Locale.ROOT).replace('_', '-')); + CONTENT, contentFile.content().name().toLowerCase(Locale.ENGLISH).replace('_', '-')); generator.writeStringField(FILE_PATH, contentFile.location()); // Since 1.11, we serialize format as lower-case strings like "parquet" - generator.writeStringField(FILE_FORMAT, contentFile.format().name().toLowerCase(Locale.ROOT)); + generator.writeStringField( + FILE_FORMAT, contentFile.format().name().toLowerCase(Locale.ENGLISH)); if (contentFile.partition() != null) { generator.writeFieldName(PARTITION);