diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index 04d23fa33abe..3f5be0756fad 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import java.io.Serializable; +import java.util.Objects; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -128,6 +129,26 @@ static Builder builder() { return new Builder(); } + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (!(other instanceof DeletionVectorStruct)) { + return false; + } + + DeletionVectorStruct that = (DeletionVectorStruct) other; + return Objects.equals(location, that.location) + && offset == that.offset + && sizeInBytes == that.sizeInBytes + && cardinality == that.cardinality; + } + + @Override + public int hashCode() { + return Objects.hash(location, offset, sizeInBytes, cardinality); + } + @Override public String toString() { return MoreObjects.toStringHelper(this) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java b/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java new file mode 100644 index 000000000000..cfffd0b9c8ca --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +class TrackedFileBuilder { + private final long snapshotId; + private final FileContent contentType; + + // Required fields + private Integer writerFormatVersion = null; + private String location = null; + private FileFormat fileFormat = null; + private Long recordCount = null; + private Long fileSizeInBytes = null; + private PartitionData partitionData = null; + + // optional fields + private Integer specId = null; + private ContentStats contentStats = null; + private Integer sortOrderId = null; + private DeletionVector deletionVector = null; + private ManifestInfo manifestInfo = null; + private ByteBuffer keyMetadata = null; + private List splitOffsets = null; + private List equalityIds = null; + + // tracking-related fields + private Tracking sourceTracking = null; + private boolean dvUpdated = false; + private ByteBuffer deletedPositions = null; + private ByteBuffer replacedPositions = null; + + /** + * Creates a builder for a newly added data file entry. + * + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder data(long newSnapshotId) { + return new TrackedFileBuilder(FileContent.DATA, newSnapshotId); + } + + /** + * Creates a builder for a newly added equality delete file entry. + * + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder equalityDelete(long newSnapshotId) { + return new TrackedFileBuilder(FileContent.EQUALITY_DELETES, newSnapshotId); + } + + /** + * Creates a builder for a newly added data manifest entry. + * + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder dataManifest(long newSnapshotId) { + return new TrackedFileBuilder(FileContent.DATA_MANIFEST, newSnapshotId); + } + + /** + * Creates a builder for a newly added delete manifest entry. + * + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder deleteManifest(long newSnapshotId) { + return new TrackedFileBuilder(FileContent.DELETE_MANIFEST, newSnapshotId); + } + + /** + * Creates a builder for a tracked file derived from {@code source}. + * + * @param source source tracked file to copy fields from + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder from(TrackedFile source, long newSnapshotId) { + Preconditions.checkArgument(source != null, "Invalid source: null"); + return new TrackedFileBuilder(source, newSnapshotId); + } + + /** + * Returns a DELETED tracked file derived from {@code source}. + * + * @param source source tracked file + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFile deleted(TrackedFile source, long newSnapshotId) { + Preconditions.checkArgument(source != null, "Invalid source: null"); + return terminal(source, TrackingBuilder.deleted(source.tracking(), newSnapshotId)); + } + + /** + * Returns a REPLACED tracked file derived from {@code source}. + * + *

Manifest entries cannot transition to REPLACED. + * + * @param source source tracked file + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFile replaced(TrackedFile source, long newSnapshotId) { + Preconditions.checkArgument(source != null, "Invalid source: null"); + Preconditions.checkArgument( + !isLeafManifest(source.contentType()), + "Manifest entries cannot transition to REPLACED, but entry type is: %s", + source.contentType()); + return terminal(source, TrackingBuilder.replaced(source.tracking(), newSnapshotId)); + } + + private static TrackedFile terminal(TrackedFile source, Tracking tracking) { + return new TrackedFileStruct( + tracking, + source.contentType(), + source.writerFormatVersion(), + source.location(), + source.fileFormat(), + (PartitionData) source.partition(), + source.recordCount(), + source.fileSizeInBytes(), + source.specId(), + source.contentStats(), + source.sortOrderId(), + source.deletionVector(), + source.manifestInfo(), + source.keyMetadata(), + source.splitOffsets(), + source.equalityIds()); + } + + private TrackedFileBuilder(FileContent contentType, long snapshotId) { + this.contentType = contentType; + this.snapshotId = snapshotId; + } + + private TrackedFileBuilder(TrackedFile source, long snapshotId) { + this.contentType = source.contentType(); + this.snapshotId = snapshotId; + this.writerFormatVersion = source.writerFormatVersion(); + this.location = source.location(); + this.fileFormat = source.fileFormat(); + this.recordCount = source.recordCount(); + this.fileSizeInBytes = source.fileSizeInBytes(); + this.partitionData = (PartitionData) source.partition(); + this.specId = source.specId(); + this.contentStats = source.contentStats(); + this.sortOrderId = source.sortOrderId(); + this.deletionVector = source.deletionVector(); + this.manifestInfo = source.manifestInfo(); + this.keyMetadata = source.keyMetadata(); + this.splitOffsets = source.splitOffsets(); + this.equalityIds = source.equalityIds(); + this.sourceTracking = source.tracking(); + } + + TrackedFileBuilder writerFormatVersion(int newWriterFormatVersion) { + Preconditions.checkArgument( + newWriterFormatVersion >= 0, + "Invalid writer format version: %s (must be >= 0)", + newWriterFormatVersion); + this.writerFormatVersion = newWriterFormatVersion; + return this; + } + + TrackedFileBuilder location(String newLocation) { + Preconditions.checkArgument(newLocation != null, "Invalid location: null"); + this.location = newLocation; + return this; + } + + TrackedFileBuilder fileFormat(FileFormat newFileFormat) { + Preconditions.checkArgument(newFileFormat != null, "Invalid file format: null"); + this.fileFormat = newFileFormat; + return this; + } + + TrackedFileBuilder recordCount(long newRecordCount) { + Preconditions.checkArgument( + newRecordCount >= 0, "Invalid record count: %s (must be >= 0)", newRecordCount); + this.recordCount = newRecordCount; + return this; + } + + TrackedFileBuilder fileSizeInBytes(long newFileSizeInBytes) { + Preconditions.checkArgument( + newFileSizeInBytes >= 0, + "Invalid file size in bytes: %s (must be >= 0)", + newFileSizeInBytes); + this.fileSizeInBytes = newFileSizeInBytes; + return this; + } + + TrackedFileBuilder specId(int newSpecId) { + Preconditions.checkArgument(newSpecId >= 0, "Invalid spec ID: %s (must be >= 0)", newSpecId); + this.specId = newSpecId; + return this; + } + + TrackedFileBuilder partition(PartitionData newPartitionData) { + Preconditions.checkArgument(newPartitionData != null, "Invalid partition: null"); + this.partitionData = newPartitionData; + return this; + } + + TrackedFileBuilder contentStats(ContentStats newContentStats) { + Preconditions.checkArgument(newContentStats != null, "Invalid content stats: null"); + this.contentStats = newContentStats; + return this; + } + + TrackedFileBuilder sortOrderId(int newSortOrderId) { + Preconditions.checkArgument( + !isLeafManifest(contentType), + "Sort order ID cannot be added to manifest entries, but entry type is: %s", + contentType); + Preconditions.checkArgument( + newSortOrderId >= 0, "Invalid sort order ID: %s (must be >= 0)", newSortOrderId); + this.sortOrderId = newSortOrderId; + return this; + } + + TrackedFileBuilder deletionVector(DeletionVector newDeletionVector) { + Preconditions.checkArgument(newDeletionVector != null, "Invalid deletion vector: null"); + Preconditions.checkArgument( + contentType == FileContent.DATA, + "Deletion vector can only be added to DATA entries, but entry type is: %s", + contentType); + Preconditions.checkArgument( + this.deletionVector == null || !this.deletionVector.equals(newDeletionVector), + "The same deletion vector already added"); + this.deletionVector = newDeletionVector; + this.dvUpdated = true; + return this; + } + + TrackedFileBuilder manifestInfo(ManifestInfo newManifestInfo) { + Preconditions.checkArgument(newManifestInfo != null, "Invalid manifest info: null"); + Preconditions.checkArgument( + isLeafManifest(contentType), + "Manifest info can only be added to manifests, but entry type is: %s", + contentType); + this.manifestInfo = newManifestInfo; + return this; + } + + TrackedFileBuilder keyMetadata(ByteBuffer newKeyMetadata) { + Preconditions.checkArgument(newKeyMetadata != null, "Invalid key metadata: null"); + this.keyMetadata = newKeyMetadata; + return this; + } + + TrackedFileBuilder splitOffsets(List newSplitOffsets) { + Preconditions.checkArgument(newSplitOffsets != null, "Invalid split offsets: null"); + Preconditions.checkArgument( + !isLeafManifest(contentType), + "Split offsets cannot be added to manifest entries, but entry type is: %s", + contentType); + this.splitOffsets = newSplitOffsets; + return this; + } + + TrackedFileBuilder equalityIds(List newEqualityIds) { + Preconditions.checkArgument(newEqualityIds != null, "Invalid equality IDs: null"); + Preconditions.checkArgument( + contentType == FileContent.EQUALITY_DELETES, + "Equality IDs can only be added to EQUALITY_DELETES entries, but entry type is: %s", + contentType); + this.equalityIds = newEqualityIds; + return this; + } + + TrackedFileBuilder deletedPositions(ByteBuffer newDeletedPositions) { + Preconditions.checkArgument(newDeletedPositions != null, "Invalid deleted positions: null"); + Preconditions.checkArgument( + isLeafManifest(contentType), + "Deleted positions can only be added to manifest entries, but entry type is: %s", + contentType); + this.deletedPositions = newDeletedPositions; + return this; + } + + TrackedFileBuilder replacedPositions(ByteBuffer newReplacedPositions) { + Preconditions.checkArgument(newReplacedPositions != null, "Invalid replaced positions: null"); + Preconditions.checkArgument( + isLeafManifest(contentType), + "Replaced positions can only be added to manifest entries, but entry type is: %s", + contentType); + this.replacedPositions = newReplacedPositions; + return this; + } + + private static boolean isLeafManifest(FileContent contentType) { + return contentType == FileContent.DATA_MANIFEST || contentType == FileContent.DELETE_MANIFEST; + } + + TrackedFile build() { + Preconditions.checkArgument( + writerFormatVersion != null, "Missing required field: writer format version"); + Preconditions.checkArgument(location != null, "Missing required field: location"); + Preconditions.checkArgument(fileFormat != null, "Missing required field: file format"); + Preconditions.checkArgument(recordCount != null, "Missing required field: record count"); + Preconditions.checkArgument( + fileSizeInBytes != null, "Missing required field: file size in bytes"); + Preconditions.checkArgument(partitionData != null, "Missing required field: partition data"); + Preconditions.checkArgument( + !isLeafManifest(contentType) || manifestInfo != null, + "Missing required field: manifest info"); + Preconditions.checkArgument( + contentType != FileContent.EQUALITY_DELETES || equalityIds != null, + "Missing required field: equality IDs"); + + TrackingBuilder trackingBuilder = + sourceTracking == null + ? TrackingBuilder.added(snapshotId) + : TrackingBuilder.from(sourceTracking, snapshotId); + + if (dvUpdated) { + trackingBuilder.dvUpdated(); + } + + if (deletedPositions != null) { + trackingBuilder.deletedPositions(deletedPositions); + } + + if (replacedPositions != null) { + trackingBuilder.replacedPositions(replacedPositions); + } + + return new TrackedFileStruct( + trackingBuilder.build(), + contentType, + writerFormatVersion, + location, + fileFormat, + partitionData, + recordCount, + fileSizeInBytes, + specId, + contentStats, + sortOrderId, + deletionVector, + manifestInfo, + keyMetadata, + splitOffsets, + equalityIds); + } +} diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java index 3c350b89373d..9a44e8045cbb 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java @@ -72,13 +72,13 @@ public PartitionData copy() { private int writerFormatVersion = -1; private String location = null; private FileFormat fileFormat = null; + private Tracking tracking = null; private long recordCount = -1L; private long fileSizeInBytes = -1L; - private Integer specId = null; private PartitionData partitionData = EMPTY_PARTITION_DATA; // optional fields - private Tracking tracking = null; + private Integer specId = null; private ContentStats contentStats = null; private Integer sortOrderId = null; private DeletionVector deletionVector = null; @@ -102,7 +102,6 @@ public PartitionData copy() { super(BASE_TYPE.fields().size()); } - /** Constructor that accepts required fields. */ TrackedFileStruct( Tracking tracking, FileContent contentType, @@ -111,7 +110,15 @@ public PartitionData copy() { FileFormat fileFormat, PartitionData partition, long recordCount, - long fileSizeInBytes) { + long fileSizeInBytes, + Integer specId, + ContentStats contentStats, + Integer sortOrderId, + DeletionVector deletionVector, + ManifestInfo manifestInfo, + ByteBuffer keyMetadata, + List splitOffsets, + List equalityIds) { super(BASE_TYPE.fields().size()); this.tracking = tracking; this.contentType = contentType; @@ -123,6 +130,15 @@ public PartitionData copy() { if (partition != null) { this.partitionData = partition; } + + this.specId = specId; + this.contentStats = contentStats; + this.sortOrderId = sortOrderId; + this.deletionVector = deletionVector; + this.manifestInfo = manifestInfo; + this.keyMetadata = ByteBuffers.toByteArray(keyMetadata); + this.splitOffsets = ArrayUtil.toLongArray(splitOffsets); + this.equalityIds = ArrayUtil.toIntArray(equalityIds); } /** Copy constructor. */ diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index 8242be38e94a..0f08b59e150d 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -163,6 +163,64 @@ void testBuilderMissingRequiredFields() { .hasMessage("Missing required value: cardinality"); } + @Test + void testDvEquality() { + DeletionVectorStruct dv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); + + DeletionVectorStruct sameDv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); + + DeletionVectorStruct dvWithDifferentLocation = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv2.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); + + DeletionVectorStruct dvWithDifferentOffset = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(1L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); + + DeletionVectorStruct dvWithDifferentSize = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(8L) + .cardinality(42L) + .build(); + + DeletionVectorStruct dvWithDifferentCardinality = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(2L) + .build(); + + assertThat(dv).isEqualTo(dv); + assertThat(dv).isEqualTo(sameDv); + assertThat(dv).isNotEqualTo(dvWithDifferentLocation); + assertThat(dv).isNotEqualTo(dvWithDifferentOffset); + assertThat(dv).isNotEqualTo(dvWithDifferentSize); + assertThat(dv).isNotEqualTo(dvWithDifferentCardinality); + } + @Test void testBuilderRejectsInvalidValuesAtSetter() { assertThatThrownBy(() -> DeletionVectorStruct.builder().location(null)) diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index e13a342b8d5a..dc8f26dce8a8 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -57,13 +57,9 @@ class TestTrackedFileAdapters { .identity("category") .withSpecId(PARTITIONED_SPEC_ID) .build(); - - // Passed for unpartitioned test files, where there is no partition tuple. - private static final PartitionData NO_PARTITION = null; + private static final PartitionData PARTITION = partition("books"); // Tracking field ordinals, looked up from the schema so the tests do not hard-code offsets. - private static final int STATUS_ORDINAL = ordinalOf(Tracking.schema(), "status"); - private static final int SNAPSHOT_ID_ORDINAL = ordinalOf(Tracking.schema(), "snapshot_id"); private static final int DATA_SEQUENCE_NUMBER_ORDINAL = ordinalOf(Tracking.schema(), "sequence_number"); private static final int FILE_SEQUENCE_NUMBER_ORDINAL = @@ -75,40 +71,34 @@ class TestTrackedFileAdapters { // TrackedFile optional field ordinals, looked up from the schema. private static final Types.StructType TRACKED_FILE_SCHEMA = TrackedFile.schemaWithContentStats(Types.StructType.of(), Types.StructType.of()); + private static final int CONTENT_TYPE_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "content_type"); private static final int SPEC_ID_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "spec_id"); - private static final int CONTENT_STATS_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "content_stats"); - private static final int SORT_ORDER_ID_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "sort_order_id"); private static final int DELETION_VECTOR_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "deletion_vector"); - private static final int KEY_METADATA_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "key_metadata"); - private static final int SPLIT_OFFSETS_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "split_offsets"); - private static final int EQUALITY_IDS_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "equality_ids"); @Test void testDataFileAdapterDelegation() { - PartitionData partition = partition("books"); - - TrackedFileStruct file = - new TrackedFileStruct( - createTracking(), - FileContent.DATA, - WRITER_FORMAT_VERSION, - DATA_FILE_LOCATION, - FileFormat.PARQUET, - partition, - 100L, - 1024L); - file.set(SPEC_ID_ORDINAL, PARTITIONED_SPEC_ID); - file.set(CONTENT_STATS_ORDINAL, createContentStats()); - file.set(SORT_ORDER_ID_ORDINAL, 3); - file.set(KEY_METADATA_ORDINAL, ByteBuffer.wrap(new byte[] {1, 2, 3})); - file.set(SPLIT_OFFSETS_ORDINAL, ImmutableList.of(50L, 100L)); + TrackedFile file = + TrackedFileBuilder.data(42L) + .writerFormatVersion(WRITER_FORMAT_VERSION) + .location(DATA_FILE_LOCATION) + .fileFormat(FileFormat.PARQUET) + .partition(PARTITION) + .recordCount(100L) + .fileSizeInBytes(1024L) + .specId(PARTITIONED_SPEC_ID) + .contentStats(createContentStats()) + .sortOrderId(3) + .keyMetadata(ByteBuffer.wrap(new byte[] {1, 2, 3})) + .splitOffsets(ImmutableList.of(50L, 100L)) + .build(); + populateTrackingFields(file); DataFile dataFile = TrackedFileAdapters.asDataFile(file, specsById(PARTITIONED_SPEC)); assertThat(dataFile.pos()).isEqualTo(MANIFEST_POS); assertThat(dataFile.specId()).isEqualTo(PARTITIONED_SPEC_ID); - assertThat(dataFile.partition()).isSameAs(partition); + assertThat(dataFile.partition()).isSameAs(PARTITION); assertThat(dataFile.content()).isEqualTo(FileContent.DATA); assertThat(dataFile.location()).isEqualTo(DATA_FILE_LOCATION); assertThat(dataFile.format()).isEqualTo(FileFormat.PARQUET); @@ -139,7 +129,7 @@ void testDataFileAdapterDelegation() { @ParameterizedTest @EnumSource(value = FileContent.class, mode = EnumSource.Mode.EXCLUDE, names = "DATA") void testDataFileAdapterRejectsNonDataContent(FileContent contentType) { - TrackedFileStruct file = trackedFile(contentType); + TrackedFileStruct file = dummyTrackedFile(contentType); assertThatThrownBy(() -> TrackedFileAdapters.asDataFile(file, UNPARTITIONED)) .isInstanceOf(IllegalArgumentException.class) @@ -148,31 +138,29 @@ void testDataFileAdapterRejectsNonDataContent(FileContent contentType) { @Test void testEqualityDeleteFileAdapterDelegation() { - PartitionData partition = partition("books"); - - TrackedFileStruct file = - new TrackedFileStruct( - createTracking(), - FileContent.EQUALITY_DELETES, - WRITER_FORMAT_VERSION, - "s3://bucket/eq-delete.avro", - FileFormat.AVRO, - partition, - 50L, - 512L); - file.set(SPEC_ID_ORDINAL, PARTITIONED_SPEC_ID); - file.set(CONTENT_STATS_ORDINAL, createContentStats()); - file.set(SORT_ORDER_ID_ORDINAL, 5); - file.set(KEY_METADATA_ORDINAL, ByteBuffer.wrap(new byte[] {4, 5})); - file.set(SPLIT_OFFSETS_ORDINAL, ImmutableList.of(200L)); - file.set(EQUALITY_IDS_ORDINAL, ImmutableList.of(1, 2, 3)); + TrackedFile file = + TrackedFileBuilder.equalityDelete(42L) + .writerFormatVersion(WRITER_FORMAT_VERSION) + .location("s3://bucket/eq-delete.avro") + .fileFormat(FileFormat.AVRO) + .partition(PARTITION) + .recordCount(50L) + .fileSizeInBytes(512L) + .specId(PARTITIONED_SPEC_ID) + .contentStats(createContentStats()) + .sortOrderId(5) + .keyMetadata(ByteBuffer.wrap(new byte[] {4, 5})) + .splitOffsets(ImmutableList.of(200L)) + .equalityIds(ImmutableList.of(1, 2, 3)) + .build(); + populateTrackingFields(file); DeleteFile deleteFile = TrackedFileAdapters.asEqualityDeleteFile(file, specsById(PARTITIONED_SPEC)); assertThat(deleteFile.pos()).isEqualTo(MANIFEST_POS); assertThat(deleteFile.specId()).isEqualTo(PARTITIONED_SPEC_ID); - assertThat(deleteFile.partition()).isSameAs(partition); + assertThat(deleteFile.partition()).isSameAs(PARTITION); assertThat(deleteFile.content()).isEqualTo(FileContent.EQUALITY_DELETES); assertThat(deleteFile.location()).isEqualTo("s3://bucket/eq-delete.avro"); assertThat(deleteFile.format()).isEqualTo(FileFormat.AVRO); @@ -203,7 +191,7 @@ void testEqualityDeleteFileAdapterDelegation() { @ParameterizedTest @EnumSource(value = FileContent.class, mode = EnumSource.Mode.EXCLUDE, names = "EQUALITY_DELETES") void testEqualityDeleteFileAdapterRejectsNonEqualityContent(FileContent contentType) { - TrackedFileStruct file = trackedFile(contentType); + TrackedFileStruct file = dummyTrackedFile(contentType); assertThatThrownBy(() -> TrackedFileAdapters.asEqualityDeleteFile(file, UNPARTITIONED)) .isInstanceOf(IllegalArgumentException.class) @@ -220,19 +208,18 @@ void testDVDeleteFileAdapterDelegation() { .cardinality(10L) .build(); - PartitionData partition = partition("books"); - TrackedFileStruct file = - new TrackedFileStruct( - createTracking(), - FileContent.DATA, - WRITER_FORMAT_VERSION, - DATA_FILE_LOCATION, - FileFormat.PARQUET, - partition, - 100L, - 1024L); - file.set(SPEC_ID_ORDINAL, PARTITIONED_SPEC_ID); - file.set(DELETION_VECTOR_ORDINAL, dv); + TrackedFile file = + TrackedFileBuilder.data(42L) + .writerFormatVersion(WRITER_FORMAT_VERSION) + .location(DATA_FILE_LOCATION) + .fileFormat(FileFormat.PARQUET) + .partition(PARTITION) + .recordCount(100L) + .fileSizeInBytes(1024L) + .specId(PARTITIONED_SPEC_ID) + .deletionVector(dv) + .build(); + populateTrackingFields(file); DeleteFile dvFile = TrackedFileAdapters.asDVDeleteFile(file, specsById(PARTITIONED_SPEC)); @@ -251,7 +238,7 @@ void testDVDeleteFileAdapterDelegation() { // fields delegated from TrackedFile / Tracking assertThat(dvFile.pos()).isEqualTo(MANIFEST_POS); assertThat(dvFile.specId()).isEqualTo(PARTITIONED_SPEC_ID); - assertThat(dvFile.partition()).isSameAs(partition); + assertThat(dvFile.partition()).isSameAs(PARTITION); assertThat(dvFile.dataSequenceNumber()).isEqualTo(DATA_SEQUENCE_NUMBER); assertThat(dvFile.fileSequenceNumber()).isEqualTo(FILE_SEQUENCE_NUMBER); assertThat(dvFile.manifestLocation()).isEqualTo(MANIFEST_LOCATION); @@ -273,7 +260,7 @@ void testDVDeleteFileAdapterDelegation() { @ParameterizedTest @EnumSource(value = FileContent.class, mode = EnumSource.Mode.EXCLUDE, names = "DATA") void testDVDeleteFileAdapterRejectsNonDataContent(FileContent contentType) { - TrackedFileStruct file = trackedFile(contentType); + TrackedFileStruct file = dummyTrackedFile(contentType); assertThatThrownBy(() -> TrackedFileAdapters.asDVDeleteFile(file, UNPARTITIONED)) .isInstanceOf(IllegalArgumentException.class) @@ -282,7 +269,7 @@ void testDVDeleteFileAdapterRejectsNonDataContent(FileContent contentType) { @Test void testDVDeleteFileAdapterRejectsNullDeletionVector() { - TrackedFileStruct file = trackedFile(FileContent.DATA); + TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); assertThatThrownBy(() -> TrackedFileAdapters.asDVDeleteFile(file, UNPARTITIONED)) .isInstanceOf(IllegalArgumentException.class) @@ -291,7 +278,7 @@ void testDVDeleteFileAdapterRejectsNullDeletionVector() { @Test void testNullContentStatsReturnsNullStats() { - TrackedFileStruct file = trackedFile(FileContent.DATA); + TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); DataFile dataFile = TrackedFileAdapters.asDataFile(file, UNPARTITIONED); @@ -307,19 +294,19 @@ void testNullTrackingReturnsNullTrackingFields() { // Files read before manifest inheritance have no tracking; tracking-derived fields must be // null rather than throwing. assertNullTrackingFields( - TrackedFileAdapters.asDataFile(trackedFile(FileContent.DATA), UNPARTITIONED)); + TrackedFileAdapters.asDataFile(dummyTrackedFile(FileContent.DATA), UNPARTITIONED)); assertNullTrackingFields( TrackedFileAdapters.asEqualityDeleteFile( - trackedFile(FileContent.EQUALITY_DELETES), UNPARTITIONED)); + dummyTrackedFile(FileContent.EQUALITY_DELETES), UNPARTITIONED)); - TrackedFileStruct dvFile = trackedFile(FileContent.DATA); + TrackedFileStruct dvFile = dummyTrackedFile(FileContent.DATA); dvFile.set(DELETION_VECTOR_ORDINAL, deletionVector()); assertNullTrackingFields(TrackedFileAdapters.asDVDeleteFile(dvFile, UNPARTITIONED)); } @Test void testUnpartitionedFilePartitionIsEmpty() { - TrackedFileStruct file = trackedFile(FileContent.DATA); + TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); DataFile dataFile = TrackedFileAdapters.asDataFile(file, UNPARTITIONED); @@ -330,7 +317,7 @@ void testUnpartitionedFilePartitionIsEmpty() { @Test void testNullSpecIdResolvesToUnpartitionedSpec() { PartitionSpec unpartitioned = PartitionSpec.builderFor(new Schema()).withSpecId(5).build(); - TrackedFileStruct file = trackedFile(FileContent.DATA); + TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); DataFile dataFile = TrackedFileAdapters.asDataFile(file, specsById(unpartitioned)); @@ -341,7 +328,7 @@ void testNullSpecIdResolvesToUnpartitionedSpec() { void testNullSpecIdThrowsWhenNoUnpartitionedSpec() { Schema schema = new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get())); PartitionSpec partitioned = PartitionSpec.builderFor(schema).identity("id").build(); - TrackedFileStruct file = trackedFile(FileContent.DATA); + TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); assertThatThrownBy(() -> TrackedFileAdapters.asDataFile(file, specsById(partitioned))) .isInstanceOf(IllegalArgumentException.class) @@ -350,7 +337,7 @@ void testNullSpecIdThrowsWhenNoUnpartitionedSpec() { @Test void testUnknownSpecIdThrows() { - TrackedFileStruct file = trackedFile(FileContent.DATA); + TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); file.set(SPEC_ID_ORDINAL, 99); assertThatThrownBy(() -> TrackedFileAdapters.asDataFile(file, ImmutableMap.of())) @@ -361,7 +348,7 @@ void testUnknownSpecIdThrows() { @Test void testSpecIdMismatchThrows() { int mismatchedSpecId = PARTITIONED_SPEC_ID + 1; - TrackedFileStruct file = trackedFile(FileContent.DATA); + TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); file.set(SPEC_ID_ORDINAL, PARTITIONED_SPEC_ID); PartitionSpec mismatched = PartitionSpec.builderFor(PARTITION_SCHEMA) @@ -399,28 +386,19 @@ private static PartitionData partition(String category) { } /** Minimal file with no tracking, used by the rejection and null-tracking tests. */ - private static TrackedFileStruct trackedFile(FileContent contentType) { - return new TrackedFileStruct( - null, - contentType, - WRITER_FORMAT_VERSION, - "s3://bucket/file", - FileFormat.PARQUET, - NO_PARTITION, - 1L, - 1L); + private static TrackedFileStruct dummyTrackedFile(FileContent contentType) { + TrackedFileStruct file = new TrackedFileStruct(); + file.set(CONTENT_TYPE_ORDINAL, contentType.id()); + return file; } - private static TrackingStruct createTracking() { - TrackingStruct tracking = new TrackingStruct(); - tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); - tracking.set(SNAPSHOT_ID_ORDINAL, 42L); + private static void populateTrackingFields(TrackedFile file) { + TrackingStruct tracking = (TrackingStruct) file.tracking(); tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, DATA_SEQUENCE_NUMBER); tracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, FILE_SEQUENCE_NUMBER); tracking.set(FIRST_ROW_ID_ORDINAL, FIRST_ROW_ID); tracking.setManifestLocation(MANIFEST_LOCATION); tracking.set(MANIFEST_POS_ORDINAL, MANIFEST_POS); - return tracking; } private static DeletionVector deletionVector() { diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java new file mode 100644 index 000000000000..9e96923f2fc0 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java @@ -0,0 +1,863 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.nio.ByteBuffer; +import java.util.stream.Stream; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class TestTrackedFileBuilder { + private static final int WRITER_FORMAT_VERSION_V4 = 4; + private static final Schema TABLE_SCHEMA = + new Schema( + optional(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get())); + private static final Types.StructType PARTITION_TYPE = + PartitionSpec.builderFor(TABLE_SCHEMA).identity("id").build().partitionType(); + private static final PartitionData PARTITION_DATA = new PartitionData(PARTITION_TYPE); + private static final ManifestInfo MANIFEST_INFO = + ManifestInfoStruct.builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .build(); + private static final DeletionVector DELETION_VECTOR = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(0L) + .sizeInBytes(128L) + .cardinality(10L) + .build(); + private static final ContentStats CONTENT_STATS = + BaseContentStats.builder() + .withTableSchema(TABLE_SCHEMA) + .withFieldStats( + BaseFieldStats.builder() + .fieldId(1) + .type(Types.IntegerType.get()) + .valueCount(2000L) + .nullValueCount(0L) + .lowerBound(1) + .upperBound(1000) + .build()) + .withFieldStats( + BaseFieldStats.builder() + .fieldId(2) + .type(Types.StringType.get()) + .valueCount(2000L) + .nullValueCount(5L) + .lowerBound("a") + .upperBound("z") + .build()) + .build(); + private static final ByteBuffer KEY_METADATA = ByteBuffer.wrap(new byte[] {1, 2, 3}); + private static final ImmutableList SPLIT_OFFSETS = ImmutableList.of(0L, 4096L, 8192L); + private static final ByteBuffer DELETED_POSITIONS = ByteBuffer.wrap(new byte[] {10, 11, 12}); + private static final ByteBuffer REPLACED_POSITIONS = ByteBuffer.wrap(new byte[] {20, 21, 22}); + + private static Stream missingRequiredFieldCases() { + return Stream.of( + Arguments.of("writerFormatVersion", "Missing required field: writer format version"), + Arguments.of("location", "Missing required field: location"), + Arguments.of("fileFormat", "Missing required field: file format"), + Arguments.of("recordCount", "Missing required field: record count"), + Arguments.of("fileSizeInBytes", "Missing required field: file size in bytes"), + Arguments.of("partition", "Missing required field: partition data")); + } + + @ParameterizedTest + @MethodSource("missingRequiredFieldCases") + public void missingRequiredFields(String missingField, String expectedMessage) { + TrackedFileBuilder dataBuilder = + builderWithMissingRequiredField(TrackedFileBuilder.data(50L), missingField); + TrackedFileBuilder equalityDeleteBuilder = + builderWithMissingRequiredField(TrackedFileBuilder.equalityDelete(50L), missingField); + TrackedFileBuilder dataManifestBuilder = + builderWithMissingRequiredField(TrackedFileBuilder.dataManifest(50L), missingField); + TrackedFileBuilder deleteManifestBuilder = + builderWithMissingRequiredField(TrackedFileBuilder.deleteManifest(50L), missingField); + + assertThatThrownBy(dataBuilder::build) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage(expectedMessage); + assertThatThrownBy(equalityDeleteBuilder::build) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage(expectedMessage); + assertThatThrownBy(dataManifestBuilder::build) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage(expectedMessage); + assertThatThrownBy(deleteManifestBuilder::build) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage(expectedMessage); + } + + private TrackedFileBuilder builderWithMissingRequiredField( + TrackedFileBuilder builder, String missingField) { + if (!"writerFormatVersion".equals(missingField)) { + builder.writerFormatVersion(WRITER_FORMAT_VERSION_V4); + } + if (!"location".equals(missingField)) { + builder.location("s3://bucket/data/file"); + } + if (!"fileFormat".equals(missingField)) { + builder.fileFormat(FileFormat.PARQUET); + } + if (!"recordCount".equals(missingField)) { + builder.recordCount(2000L); + } + if (!"fileSizeInBytes".equals(missingField)) { + builder.fileSizeInBytes(12345L); + } + if (!"partition".equals(missingField)) { + builder.partition(PARTITION_DATA); + } + return builder; + } + + @ParameterizedTest + @MethodSource("manifestBuilders") + public void missingFieldsForManifests(TrackedFileBuilder builder, FileContent contentType) { + assertThatThrownBy( + () -> + builder + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/manifest.avro") + .fileFormat(FileFormat.AVRO) + .recordCount(420L) + .fileSizeInBytes(556L) + .partition(PARTITION_DATA) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: manifest info"); + } + + @Test + public void missingEqualityIdsForEqualityDeletes() { + assertThatThrownBy( + () -> + TrackedFileBuilder.equalityDelete(50L) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/eq_delete.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: equality IDs"); + } + + private static Stream nonEqualityDeleteBuilders() { + return Stream.of( + Arguments.of(TrackedFileBuilder.data(10L), FileContent.DATA), + Arguments.of(TrackedFileBuilder.dataManifest(10L), FileContent.DATA_MANIFEST), + Arguments.of(TrackedFileBuilder.deleteManifest(10L), FileContent.DELETE_MANIFEST), + Arguments.of(TrackedFileBuilder.from(sourceData(12L), 20L), FileContent.DATA), + Arguments.of( + TrackedFileBuilder.from(sourceDataManifest(21L), 25L), FileContent.DATA_MANIFEST), + Arguments.of( + TrackedFileBuilder.from(sourceDeleteManifest(12L), 20L), FileContent.DELETE_MANIFEST)); + } + + @ParameterizedTest + @MethodSource("nonEqualityDeleteBuilders") + public void invalidEqualityIdsForContentType( + TrackedFileBuilder builder, FileContent contentType) { + assertThatThrownBy(() -> builder.equalityIds(ImmutableList.of(1))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Equality IDs can only be added to EQUALITY_DELETES entries, but entry type is: " + + contentType); + } + + private static Stream nonDataBuilders() { + return Stream.of( + Arguments.of(TrackedFileBuilder.equalityDelete(10L), FileContent.EQUALITY_DELETES), + Arguments.of(TrackedFileBuilder.dataManifest(10L), FileContent.DATA_MANIFEST), + Arguments.of(TrackedFileBuilder.deleteManifest(10L), FileContent.DELETE_MANIFEST), + Arguments.of( + TrackedFileBuilder.from(sourceEqualityDelete(12L), 20L), FileContent.EQUALITY_DELETES), + Arguments.of( + TrackedFileBuilder.from(sourceDataManifest(21L), 25L), FileContent.DATA_MANIFEST), + Arguments.of( + TrackedFileBuilder.from(sourceDeleteManifest(12L), 20L), FileContent.DELETE_MANIFEST)); + } + + @ParameterizedTest + @MethodSource("nonDataBuilders") + public void invalidDeletionVectorForContentType( + TrackedFileBuilder builder, FileContent contentType) { + assertThatThrownBy(() -> builder.deletionVector(DELETION_VECTOR)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Deletion vector can only be added to DATA entries, but entry type is: " + contentType); + } + + @ParameterizedTest + @MethodSource("manifestBuilders") + public void invalidSortOrderIdForContentType( + TrackedFileBuilder builder, FileContent contentType) { + assertThatThrownBy(() -> builder.sortOrderId(1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Sort order ID cannot be added to manifest entries, but entry type is: " + contentType); + } + + @ParameterizedTest + @MethodSource("manifestBuilders") + public void invalidSplitOffsetsForContentType( + TrackedFileBuilder builder, FileContent contentType) { + assertThatThrownBy(() -> builder.splitOffsets(SPLIT_OFFSETS)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Split offsets cannot be added to manifest entries, but entry type is: " + contentType); + } + + private static Stream nonManifestBuilders() { + return Stream.of( + Arguments.of(TrackedFileBuilder.data(10L), FileContent.DATA), + Arguments.of(TrackedFileBuilder.equalityDelete(10L), FileContent.EQUALITY_DELETES), + Arguments.of(TrackedFileBuilder.from(sourceData(12L), 20L), FileContent.DATA), + Arguments.of( + TrackedFileBuilder.from(sourceEqualityDelete(12L), 20L), FileContent.EQUALITY_DELETES)); + } + + @ParameterizedTest + @MethodSource("nonManifestBuilders") + public void invalidManifestInfoForContentType( + TrackedFileBuilder builder, FileContent contentType) { + assertThatThrownBy(() -> builder.manifestInfo(MANIFEST_INFO)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Manifest info can only be added to manifests, but entry type is: " + contentType); + } + + @ParameterizedTest + @MethodSource("nonManifestBuilders") + public void invalidDeletedPositionsForContentType( + TrackedFileBuilder builder, FileContent contentType) { + assertThatThrownBy(() -> builder.deletedPositions(DELETED_POSITIONS)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Deleted positions can only be added to manifest entries, but entry type is: " + + contentType); + } + + @ParameterizedTest + @MethodSource("nonManifestBuilders") + public void invalidReplacedPositionsForContentType( + TrackedFileBuilder builder, FileContent contentType) { + assertThatThrownBy(() -> builder.replacedPositions(REPLACED_POSITIONS)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Replaced positions can only be added to manifest entries, but entry type is: " + + contentType); + } + + @Test + public void invalidNullInputs() { + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).location(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).fileFormat(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid file format: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).partition(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid partition: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).contentStats(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid content stats: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).deletionVector(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid deletion vector: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(30L).manifestInfo(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid manifest info: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).keyMetadata(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid key metadata: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).splitOffsets(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid split offsets: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.equalityDelete(30L).equalityIds(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid equality IDs: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.from(null, 20L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source: null"); + + assertThatThrownBy( + () -> + TrackedFileBuilder.from( + entryWithInheritedSeqNums(sourceDataManifest(10L), 15L), 20L) + .deletedPositions(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid deleted positions: null"); + + assertThatThrownBy( + () -> + TrackedFileBuilder.from( + entryWithInheritedSeqNums(sourceDeleteManifest(100L), 150L), 200L) + .replacedPositions(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid replaced positions: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.deleted(null, 20L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.replaced(null, 20L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source: null"); + } + + @Test + public void invalidNegativeInputs() { + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).writerFormatVersion(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid writer format version: -1 (must be >= 0)"); + + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).recordCount(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid record count: -1 (must be >= 0)"); + + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).fileSizeInBytes(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid file size in bytes: -1 (must be >= 0)"); + + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).specId(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid spec ID: -1 (must be >= 0)"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(40L).sortOrderId(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid sort order ID: -1 (must be >= 0)"); + } + + @Test + public void buildDataFileWithRequiredFieldsOnly() { + TrackedFile trackedFile = + TrackedFileBuilder.data(50L) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .build(); + + assertThat(trackedFile.writerFormatVersion()).isEqualTo(WRITER_FORMAT_VERSION_V4); + assertThat(trackedFile.contentType()).isEqualTo(FileContent.DATA); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/file.parquet"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); + assertThat(trackedFile.recordCount()).isEqualTo(2000L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + assertThat(trackedFile.tracking().dvSnapshotId()).isNull(); + + assertThat(trackedFile.specId()).isNull(); + assertThat(trackedFile.contentStats()).isNull(); + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.manifestInfo()).isNull(); + assertThat(trackedFile.keyMetadata()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + assertThat(trackedFile.equalityIds()).isNull(); + } + + @Test + public void buildDataFileWithAllFields() { + TrackedFile trackedFile = + TrackedFileBuilder.data(50L) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .specId(7) + .partition(PARTITION_DATA) + .contentStats(CONTENT_STATS) + .sortOrderId(3) + .deletionVector(DELETION_VECTOR) + .keyMetadata(KEY_METADATA) + .splitOffsets(SPLIT_OFFSETS) + .build(); + + assertThat(trackedFile.writerFormatVersion()).isEqualTo(WRITER_FORMAT_VERSION_V4); + assertThat(trackedFile.contentType()).isEqualTo(FileContent.DATA); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/file.parquet"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); + assertThat(trackedFile.recordCount()).isEqualTo(2000L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); + assertThat(trackedFile.specId()).isEqualTo(7); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); + assertThat(trackedFile.sortOrderId()).isEqualTo(3); + assertThat(trackedFile.deletionVector()).isSameAs(DELETION_VECTOR); + assertThat(trackedFile.keyMetadata()).isEqualTo(KEY_METADATA); + assertThat(trackedFile.splitOffsets()).isEqualTo(SPLIT_OFFSETS); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + assertThat(trackedFile.tracking().dvSnapshotId()).isEqualTo(50L); + + // Unsupported fields for data files + assertThat(trackedFile.manifestInfo()).isNull(); + assertThat(trackedFile.equalityIds()).isNull(); + } + + @Test + public void buildEqualityDeleteFileWithRequiredFieldsOnly() { + TrackedFile trackedFile = + TrackedFileBuilder.equalityDelete(50L) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/eq_delete.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .equalityIds(ImmutableList.of(1)) + .build(); + + assertThat(trackedFile.writerFormatVersion()).isEqualTo(WRITER_FORMAT_VERSION_V4); + assertThat(trackedFile.contentType()).isEqualTo(FileContent.EQUALITY_DELETES); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/eq_delete.parquet"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); + assertThat(trackedFile.recordCount()).isEqualTo(2000L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.equalityIds()).containsExactly(1); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + + assertThat(trackedFile.specId()).isNull(); + assertThat(trackedFile.contentStats()).isNull(); + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.manifestInfo()).isNull(); + assertThat(trackedFile.keyMetadata()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + } + + @Test + public void buildEqualityDeleteFileWithAllFields() { + TrackedFile trackedFile = + TrackedFileBuilder.equalityDelete(50L) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/eq_delete.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .specId(7) + .partition(PARTITION_DATA) + .contentStats(CONTENT_STATS) + .keyMetadata(KEY_METADATA) + .splitOffsets(SPLIT_OFFSETS) + .sortOrderId(3) + .equalityIds(ImmutableList.of(1, 2)) + .build(); + + assertThat(trackedFile.writerFormatVersion()).isEqualTo(WRITER_FORMAT_VERSION_V4); + assertThat(trackedFile.contentType()).isEqualTo(FileContent.EQUALITY_DELETES); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/eq_delete.parquet"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); + assertThat(trackedFile.recordCount()).isEqualTo(2000L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); + assertThat(trackedFile.specId()).isEqualTo(7); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); + assertThat(trackedFile.keyMetadata()).isEqualTo(KEY_METADATA); + assertThat(trackedFile.splitOffsets()).isEqualTo(SPLIT_OFFSETS); + assertThat(trackedFile.sortOrderId()).isEqualTo(3); + assertThat(trackedFile.equalityIds()).containsExactly(1, 2); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + + // Unsupported fields for equality delete files + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.manifestInfo()).isNull(); + } + + private static Stream manifestBuilders() { + return Stream.of( + Arguments.of(TrackedFileBuilder.dataManifest(50L), FileContent.DATA_MANIFEST), + Arguments.of(TrackedFileBuilder.deleteManifest(50L), FileContent.DELETE_MANIFEST)); + } + + @ParameterizedTest + @MethodSource("manifestBuilders") + public void buildManifestWithRequiredFieldsOnly( + TrackedFileBuilder builder, FileContent contentType) { + TrackedFile trackedFile = + builder + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/manifest.avro") + .fileFormat(FileFormat.AVRO) + .recordCount(420L) + .fileSizeInBytes(556L) + .partition(PARTITION_DATA) + .manifestInfo(MANIFEST_INFO) + .build(); + + assertThat(trackedFile.writerFormatVersion()).isEqualTo(WRITER_FORMAT_VERSION_V4); + assertThat(trackedFile.contentType()).isEqualTo(contentType); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/manifest.avro"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.AVRO); + assertThat(trackedFile.recordCount()).isEqualTo(420L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(556L); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.manifestInfo()).isSameAs(MANIFEST_INFO); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + + assertThat(trackedFile.specId()).isNull(); + assertThat(trackedFile.contentStats()).isNull(); + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.keyMetadata()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + assertThat(trackedFile.equalityIds()).isNull(); + } + + @ParameterizedTest + @MethodSource("manifestBuilders") + public void buildManifestWithAllFields(TrackedFileBuilder builder, FileContent contentType) { + TrackedFile trackedFile = + builder + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/manifest.avro") + .fileFormat(FileFormat.AVRO) + .recordCount(420L) + .fileSizeInBytes(556L) + .specId(7) + .partition(PARTITION_DATA) + .contentStats(CONTENT_STATS) + .keyMetadata(KEY_METADATA) + .manifestInfo(MANIFEST_INFO) + .build(); + + assertThat(trackedFile.writerFormatVersion()).isEqualTo(WRITER_FORMAT_VERSION_V4); + assertThat(trackedFile.contentType()).isEqualTo(contentType); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/manifest.avro"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.AVRO); + assertThat(trackedFile.recordCount()).isEqualTo(420L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(556L); + assertThat(trackedFile.specId()).isEqualTo(7); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); + assertThat(trackedFile.keyMetadata()).isEqualTo(KEY_METADATA); + assertThat(trackedFile.manifestInfo()).isSameAs(MANIFEST_INFO); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + + // Unsupported fields for manifests + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + assertThat(trackedFile.equalityIds()).isNull(); + } + + private static Stream manifestSources() { + return Stream.of( + Arguments.of(sourceDataManifest(10L), FileContent.DATA_MANIFEST), + Arguments.of(sourceDeleteManifest(10L), FileContent.DELETE_MANIFEST)); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void buildManifestFromSourceWithDeletedPositions( + TrackedFile source, FileContent contentType) { + entryWithInheritedSeqNums(source, 7L); + + TrackedFile trackedFile = + TrackedFileBuilder.from(source, 20L).deletedPositions(DELETED_POSITIONS).build(); + + assertThat(trackedFile.contentType()).isEqualTo(contentType); + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.MODIFIED); + assertThat(trackedFile.tracking().dvSnapshotId()).isEqualTo(20L); + assertThat(trackedFile.tracking().deletedPositions()).isEqualTo(DELETED_POSITIONS); + assertThat(trackedFile.tracking().replacedPositions()).isNull(); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void buildManifestFromSourceWithReplacedPositions( + TrackedFile source, FileContent contentType) { + entryWithInheritedSeqNums(source, 7L); + + TrackedFile trackedFile = + TrackedFileBuilder.from(source, 20L).replacedPositions(REPLACED_POSITIONS).build(); + + assertThat(trackedFile.contentType()).isEqualTo(contentType); + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.MODIFIED); + assertThat(trackedFile.tracking().dvSnapshotId()).isEqualTo(20L); + assertThat(trackedFile.tracking().deletedPositions()).isNull(); + assertThat(trackedFile.tracking().replacedPositions()).isEqualTo(REPLACED_POSITIONS); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void buildManifestFromSourceClearsPositions(TrackedFile source, FileContent contentType) { + entryWithInheritedSeqNums(source, 7L); + + TrackedFile sourceWithPositions = + TrackedFileBuilder.from(source, 15L) + .deletedPositions(DELETED_POSITIONS) + .replacedPositions(REPLACED_POSITIONS) + .build(); + + TrackedFile newEntry = TrackedFileBuilder.from(sourceWithPositions, 20L).build(); + + // Building a new entry from this source should not carry the positions over + assertThat(newEntry.contentType()).isEqualTo(contentType); + assertThat(newEntry.tracking().status()).isEqualTo(EntryStatus.EXISTING); + assertThat(newEntry.tracking().deletedPositions()).isNull(); + assertThat(newEntry.tracking().replacedPositions()).isNull(); + assertThat(newEntry.tracking().dvSnapshotId()).isEqualTo(15L); + } + + @Test + public void buildDataFileFromSource() { + TrackedFile source = entryWithInheritedSeqNums(sourceData(10L), 45L); + + TrackedFile trackedFile = TrackedFileBuilder.from(source, 20L).build(); + + assertThat(trackedFile.contentType()).isEqualTo(FileContent.DATA); + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.EXISTING); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(source.tracking().snapshotId()); + verifyFieldsAreFromSource(trackedFile, source); + } + + @Test + public void updateDVWhenBuildingDataFileFromSource() { + TrackedFile source = entryWithInheritedSeqNums(sourceData(10L), 45L); + + DeletionVector dv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/new_dv.puffin") + .offset(5L) + .sizeInBytes(256L) + .cardinality(40L) + .build(); + + TrackedFile trackedFile = TrackedFileBuilder.from(source, 20L).deletionVector(dv).build(); + + assertThat(trackedFile.deletionVector()).isNotSameAs(source.deletionVector()).isSameAs(dv); + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.MODIFIED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(10L); + assertThat(trackedFile.tracking().dataSequenceNumber()).isEqualTo(45L); + assertThat(trackedFile.tracking().fileSequenceNumber()).isEqualTo(45L); + assertThat(trackedFile.tracking().dvSnapshotId()).isEqualTo(20L); + } + + @Test + public void addingSameDeletionVectorFails() { + TrackedFile source = entryWithInheritedSeqNums(sourceData(10L), 45L); + + DeletionVector dv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/new_dv.puffin") + .offset(5L) + .sizeInBytes(256L) + .cardinality(40L) + .build(); + + DeletionVector dvCopy = dv.copy(); + + TrackedFile trackedFile = TrackedFileBuilder.from(source, 20L).deletionVector(dv).build(); + + assertThatThrownBy(() -> TrackedFileBuilder.from(trackedFile, 30L).deletionVector(dv)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("The same deletion vector already added"); + assertThatThrownBy(() -> TrackedFileBuilder.from(trackedFile, 30L).deletionVector(dvCopy)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("The same deletion vector already added"); + } + + private static Stream nonManifestSources() { + return Stream.of( + Arguments.of(sourceData(10L), FileContent.DATA), + Arguments.of(sourceEqualityDelete(10L), FileContent.EQUALITY_DELETES)); + } + + private static Stream allSources() { + return Stream.concat(nonManifestSources(), manifestSources()); + } + + @ParameterizedTest + @MethodSource("allSources") + public void deletedFromSource(TrackedFile source, FileContent contentType) { + entryWithInheritedSeqNums(source, 15L); + + TrackedFile deleted = TrackedFileBuilder.deleted(source, 20L); + + assertThat(deleted.contentType()).isEqualTo(contentType); + assertThat(deleted.tracking().status()).isEqualTo(EntryStatus.DELETED); + assertThat(deleted.tracking().snapshotId()).isEqualTo(20L); + verifyFieldsAreFromSource(deleted, source); + } + + @ParameterizedTest + @MethodSource("nonManifestSources") + public void replacedFromNonManifestSource(TrackedFile source, FileContent contentType) { + entryWithInheritedSeqNums(source, 15L); + + TrackedFile replaced = TrackedFileBuilder.replaced(source, 20L); + + assertThat(replaced.contentType()).isEqualTo(contentType); + assertThat(replaced.tracking().status()).isEqualTo(EntryStatus.REPLACED); + assertThat(replaced.tracking().snapshotId()).isEqualTo(20L); + verifyFieldsAreFromSource(replaced, source); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void replacedFromManifestSourceFails(TrackedFile source, FileContent contentType) { + entryWithInheritedSeqNums(source, 15L); + + assertThatThrownBy(() -> TrackedFileBuilder.replaced(source, 20L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Manifest entries cannot transition to REPLACED, but entry type is: " + contentType); + } + + private static TrackedFile sourceData(long snapshotId) { + return TrackedFileBuilder.data(snapshotId) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .specId(7) + .contentStats(CONTENT_STATS) + .sortOrderId(3) + .deletionVector(DELETION_VECTOR) + .keyMetadata(KEY_METADATA) + .splitOffsets(SPLIT_OFFSETS) + .build(); + } + + private static TrackedFile sourceEqualityDelete(long snapshotId) { + return TrackedFileBuilder.equalityDelete(snapshotId) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/eq_delete.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .equalityIds(ImmutableList.of(1)) + .build(); + } + + private static TrackedFile sourceDataManifest(long snapshotId) { + return TrackedFileBuilder.dataManifest(snapshotId) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/data_manifest.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(420L) + .fileSizeInBytes(556L) + .partition(PARTITION_DATA) + .manifestInfo(MANIFEST_INFO) + .build(); + } + + private static TrackedFile sourceDeleteManifest(long snapshotId) { + return TrackedFileBuilder.deleteManifest(snapshotId) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/delete_manifest.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(100L) + .fileSizeInBytes(543L) + .partition(PARTITION_DATA) + .manifestInfo(MANIFEST_INFO) + .build(); + } + + private static TrackedFile entryWithInheritedSeqNums(TrackedFile entry, long sequenceNumber) { + Tracking manifestTrackingToInheritFrom = + new TrackingStruct( + EntryStatus.EXISTING, 123L, sequenceNumber, sequenceNumber, null, null, null, null); + + ((TrackingStruct) entry.tracking()).inheritFrom(manifestTrackingToInheritFrom); + return entry; + } + + /** + * Verifies that fields in entry are the same as in source. Note, snapshot ID can't be verified + * here, because based on the entry's status it is either carried over or not. + */ + private static void verifyFieldsAreFromSource(TrackedFile entry, TrackedFile source) { + assertThat(entry.writerFormatVersion()).isEqualTo(source.writerFormatVersion()); + assertThat(entry.location()).isEqualTo(source.location()); + assertThat(entry.fileFormat()).isEqualTo(source.fileFormat()); + assertThat(entry.recordCount()).isEqualTo(source.recordCount()); + assertThat(entry.fileSizeInBytes()).isEqualTo(source.fileSizeInBytes()); + assertThat(entry.specId()).isEqualTo(source.specId()); + assertThat(entry.partition()).isSameAs(source.partition()); + assertThat(entry.contentStats()).isSameAs(source.contentStats()); + assertThat(entry.sortOrderId()).isEqualTo(source.sortOrderId()); + assertThat(entry.deletionVector()).isSameAs(source.deletionVector()); + assertThat(entry.keyMetadata()).isEqualTo(source.keyMetadata()); + assertThat(entry.splitOffsets()).isEqualTo(source.splitOffsets()); + assertThat(entry.manifestInfo()).isSameAs(source.manifestInfo()); + assertThat(entry.equalityIds()).isEqualTo(source.equalityIds()); + + assertThat(entry.tracking().dataSequenceNumber()) + .isEqualTo(source.tracking().dataSequenceNumber()); + assertThat(entry.tracking().fileSequenceNumber()) + .isEqualTo(source.tracking().fileSequenceNumber()); + assertThat(entry.tracking().dvSnapshotId()).isEqualTo(source.tracking().dvSnapshotId()); + assertThat(entry.tracking().firstRowId()).isEqualTo(source.tracking().firstRowId()); + assertThat(entry.tracking().deletedPositions()).isEqualTo(source.tracking().deletedPositions()); + assertThat(entry.tracking().replacedPositions()) + .isEqualTo(source.tracking().replacedPositions()); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index 0b725a39fb6b..4f5452e31455 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -374,10 +374,6 @@ void testKryoSerializationRoundTrip() throws IOException { } static TrackedFileStruct createFullTrackedFile() { - TrackingStruct tracking = (TrackingStruct) TrackingBuilder.added(42L).build(); - tracking.setManifestLocation("s3://bucket/manifest.avro"); - tracking.set(MANIFEST_POS_ORDINAL, 3L); - DeletionVectorStruct dv = DeletionVectorStruct.builder() .location("s3://bucket/dv.puffin") @@ -387,20 +383,24 @@ static TrackedFileStruct createFullTrackedFile() { .build(); TrackedFileStruct file = - new TrackedFileStruct( - tracking, - FileContent.DATA, - WRITER_FORMAT_VERSION_V4, - "s3://bucket/data/file.parquet", - FileFormat.PARQUET, - newPartition(7, "music"), - 100L, - 1024L); - file.set(SPEC_ID_ORDINAL, 0); - file.set(SORT_ORDER_ID_ORDINAL, 1); - file.set(DELETION_VECTOR_ORDINAL, dv); - file.set(KEY_METADATA_ORDINAL, ByteBuffer.wrap(new byte[] {1, 2, 3})); - file.set(SPLIT_OFFSETS_ORDINAL, ImmutableList.of(50L)); + (TrackedFileStruct) + TrackedFileBuilder.data(42L) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .partition(newPartition(7, "music")) + .recordCount(100L) + .fileSizeInBytes(1024L) + .specId(0) + .sortOrderId(1) + .deletionVector(dv) + .keyMetadata(ByteBuffer.wrap(new byte[] {1, 2, 3})) + .splitOffsets(ImmutableList.of(50L)) + .build(); + + TrackingStruct tracking = (TrackingStruct) file.tracking(); + tracking.setManifestLocation("s3://bucket/manifest.avro"); + tracking.set(MANIFEST_POS_ORDINAL, 3L); return file; } @@ -460,19 +460,16 @@ static TrackedFileStruct createTrackedFileWithStats() { .withFieldStats(fieldStatsList) .build(); - TrackedFileStruct file = - new TrackedFileStruct( - null, - FileContent.DATA, - WRITER_FORMAT_VERSION_V4, - "s3://bucket/data/file.parquet", - FileFormat.PARQUET, - new PartitionData(Types.StructType.of()), - 100L, - 1024L); - file.set(SPEC_ID_ORDINAL, 0); - file.set(CONTENT_STATS_ORDINAL, stats); - - return file; + return (TrackedFileStruct) + TrackedFileBuilder.data(0L) + .writerFormatVersion(WRITER_FORMAT_VERSION_V4) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .partition(new PartitionData(Types.StructType.of())) + .recordCount(100L) + .fileSizeInBytes(1024L) + .specId(0) + .contentStats(stats) + .build(); } }