Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions core/src/main/java/org/apache/iceberg/TrackedFileStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.Set;
import org.apache.iceberg.avro.SupportsIndexProjection;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.ArrayUtil;
Expand Down Expand Up @@ -162,6 +163,14 @@ private TrackedFileStruct(TrackedFileStruct toCopy, boolean withStats, Set<Integ
: null;
}

/** Sets the first row id on this {@link TrackedFileStruct} */
public void setFirstRowId(int firstRowId) {
Preconditions.checkState(
contentType == FileContent.DATA || contentType == FileContent.DATA_MANIFEST,
"Cannot set first row id on an equality delete or delete manifest entry");
((TrackingStruct) tracking).setFirstRowId(firstRowId);

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self: once the TrackedFileBuilder adjustments are merged, we won't have TrackingString but a TrackingBuilder. Should we introduce such a function on the builder?

}

@Override
public Tracking tracking() {
return tracking;
Expand Down
8 changes: 8 additions & 0 deletions core/src/main/java/org/apache/iceberg/TrackingStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ void inheritFrom(Tracking manifestTracking) {
}
}

void setFirstRowId(long newFirstRowId) {
Preconditions.checkArgument(
newFirstRowId >= 0, "Invalid first row ID: %s (must be >= 0)", newFirstRowId);
Preconditions.checkState(status != EntryStatus.ADDED, "Cannot set first row id on ADDED entry");
Preconditions.checkState(firstRowId == null, "First row ID is already set");
this.firstRowId = newFirstRowId;
}

void setManifestLocation(String location) {
this.manifestLocation = location;
}
Expand Down
52 changes: 52 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.io.IOException;
import java.nio.ByteBuffer;
Expand All @@ -28,6 +29,8 @@
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.types.Types;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

class TestTrackedFileStruct {
private static final int WRITER_FORMAT_VERSION_V4 = 4;
Expand Down Expand Up @@ -325,6 +328,48 @@ void testAllFileContentTypesSupported() {
}
}

@ParameterizedTest
@EnumSource(
value = FileContent.class,
names = {"DATA", "DATA_MANIFEST"})
public void testSettingFirstRowId(FileContent content) {
TrackedFileStruct file =
new TrackedFileStruct(
existingTracking(),
content,
WRITER_FORMAT_VERSION_V4,
"s3://bucket/data/file",
FileFormat.PARQUET,
new PartitionData(Types.StructType.of()),
0L,
0L);

file.setFirstRowId(42);

assertThat(file.tracking().firstRowId()).isEqualTo(42L);
}

@ParameterizedTest
@EnumSource(
value = FileContent.class,
names = {"EQUALITY_DELETES", "DELETE_MANIFEST"})
public void testSettingFirstRowIdNotAllowedForSomeContentTypes(FileContent content) {
TrackedFileStruct file =
new TrackedFileStruct(
existingTracking(),
content,
WRITER_FORMAT_VERSION_V4,
"s3://bucket/data/file",
FileFormat.PARQUET,
new PartitionData(Types.StructType.of()),
0L,
0L);

assertThatThrownBy(() -> file.setFirstRowId(42))
.isInstanceOf(IllegalStateException.class)
.hasMessage("Cannot set first row id on an equality delete or delete manifest entry");
}

@Test
void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException {
TrackedFileStruct file = createFullTrackedFile();
Expand Down Expand Up @@ -412,6 +457,13 @@ private static PartitionData newPartition(int idBucket, String category) {
return partition;
}

private static TrackingStruct existingTracking() {
TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build();
source.set(2, 10L);
source.set(3, 11L);
return (TrackingStruct) TrackingBuilder.from(source, 20L).build();
}

static TrackedFileStruct createTrackedFileWithStats() {
Types.StructType statsStruct =
Types.StructType.of(
Expand Down
57 changes: 57 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestTrackingStruct.java
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,63 @@ void testInheritFromNullIsNoOp() {
assertThat(tracking.fileSequenceNumber()).isNull();
}

@Test
public void testFirstRowIdIsNullInitially() {
TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build();
source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L);
source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L);

assertThat(source.firstRowId()).isNull();

TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build();

assertThat(existing.firstRowId()).isNull();
}

@Test
public void testSettingFirstRowId() {
Comment thread
gaborkaszab marked this conversation as resolved.
TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build();
source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L);
source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L);

TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build();
existing.setFirstRowId(150L);

assertThat(existing.firstRowId()).isEqualTo(150L);
}

@Test
public void testSettingFirstRowIdForAddedEntry() {
TrackingStruct added = (TrackingStruct) TrackingBuilder.added(5L).build();

assertThatThrownBy(() -> added.setFirstRowId(12L))
.isInstanceOf(IllegalStateException.class)
.hasMessage("Cannot set first row id on ADDED entry");
}

@Test
public void testRejectSettingFirstRowIdIfAlreadySet() {
TrackingStruct source = (TrackingStruct) TrackingBuilder.added(5L).build();
source.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L);
source.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L);

TrackingStruct existing = (TrackingStruct) TrackingBuilder.from(source, 20L).build();
existing.setFirstRowId(0L);

assertThatThrownBy(() -> existing.setFirstRowId(200L))
.isInstanceOf(IllegalStateException.class)
.hasMessage("First row ID is already set");
}

@Test
public void testInvalidFirstRowId() {
TrackingStruct tracking = (TrackingStruct) TrackingBuilder.added(5L).build();

assertThatThrownBy(() -> tracking.setFirstRowId(-1L))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Invalid first row ID: -1 (must be >= 0)");
}

private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) {
TrackingStruct tracking = new TrackingStruct(Tracking.schema());
tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id());
Expand Down