From 9a308c487dd733019e548f96dce10daadaffa3a0 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Mon, 5 Jun 2023 09:00:53 +0300 Subject: [PATCH 1/3] draft commit update snapshot producer update the patch clean up fix for previous patch address review comments move key wrapping to metadata encryption encrypt manifest list key metadata new aad util null key needs no encryption comment; clearer method/var names use key encryption key for manifest list keys add encryption util changes update EncryptionTestHelpers handle api change remove unused lines revert revapi.yml KEK cache unitest update rename var address review comments fix timeout default change writer kek timeout default Updates from review. cache unwrapped keys compile and ci fixes spotless apply re-apply unwrapped cache patch Co-authored-by: Ryan Blue caching limit address review comments spotless apply sync with spec changes add missing method rm empty line pass encrypted output a better unitest rm type, rename class post rebase changes clean up post rebase changes clean up discussed before --- .palantir/revapi.yml | 5 + .../org/apache/iceberg/ManifestListFile.java | 34 ++++ .../iceberg/encryption/EncryptingFileIO.java | 13 +- .../java/org/apache/iceberg/io/FileIO.java | 10 ++ .../apache/iceberg/BaseManifestListFile.java | 49 ++++++ .../java/org/apache/iceberg/BaseSnapshot.java | 4 +- .../apache/iceberg/ManifestListWriter.java | 54 ++++++- .../org/apache/iceberg/ManifestLists.java | 42 ++++- .../org/apache/iceberg/ManifestWriter.java | 18 ++- .../apache/iceberg/RewriteTablePathUtil.java | 9 ++ .../org/apache/iceberg/SnapshotProducer.java | 3 +- .../iceberg/encryption/AesGcmInputFile.java | 24 ++- .../iceberg/encryption/EncryptionUtil.java | 46 ++++++ .../NativeEncryptionKeyMetadata.java | 17 ++ .../encryption/StandardEncryptionManager.java | 129 +++++++++++++++- .../encryption/StandardKeyMetadata.java | 43 +++++- .../iceberg/TestManifestListEncryption.java | 146 ++++++++++++++++++ .../org/apache/iceberg/TestSnapshotJson.java | 1 - .../encryption/EncryptionTestHelpers.java | 1 - .../hadoop/TestCatalogUtilDropTable.java | 4 + 20 files changed, 615 insertions(+), 37 deletions(-) create mode 100644 api/src/main/java/org/apache/iceberg/ManifestListFile.java create mode 100644 core/src/main/java/org/apache/iceberg/BaseManifestListFile.java create mode 100644 core/src/test/java/org/apache/iceberg/TestManifestListEncryption.java diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index d9318b5cffcb..fdaadd235501 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1254,6 +1254,11 @@ acceptedBreaks: new: "method void org.apache.iceberg.data.parquet.BaseParquetWriter::()" justification: "Changing deprecated code" "1.9.0": + org.apache.iceberg:iceberg-api: + - code: "java.class.defaultSerializationChanged" + old: "class org.apache.iceberg.encryption.EncryptingFileIO" + new: "class org.apache.iceberg.encryption.EncryptingFileIO" + justification: "New method for Manifest List reading" org.apache.iceberg:iceberg-common: - code: "java.method.visibilityReduced" old: "method R org.apache.iceberg.common.DynConstructors.Ctor::invokeChecked(java.lang.Object,\ diff --git a/api/src/main/java/org/apache/iceberg/ManifestListFile.java b/api/src/main/java/org/apache/iceberg/ManifestListFile.java new file mode 100644 index 000000000000..e727a35a4e09 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/ManifestListFile.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import org.apache.iceberg.encryption.EncryptionManager; + +public interface ManifestListFile { + + /** Location of manifest list file. */ + String location(); + + /** The manifest list key metadata can be encrypted. Returns ID of encryption key */ + String encryptionKeyID(); + + /** Decrypt and return the manifest list key metadata */ + ByteBuffer decryptKeyMetadata(EncryptionManager em); +} diff --git a/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java b/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java index d3de7b1f84a3..a4c708570dba 100644 --- a/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java +++ b/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java @@ -28,6 +28,7 @@ import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.ManifestFile; +import org.apache.iceberg.ManifestListFile; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; @@ -108,13 +109,21 @@ public InputFile newInputFile(ManifestFile manifest) { } } + @Override + public InputFile newInputFile(ManifestListFile manifestList) { + if (manifestList.encryptionKeyID() != null) { + ByteBuffer keyMetadata = manifestList.decryptKeyMetadata(em); + return newDecryptingInputFile(manifestList.location(), keyMetadata); + } else { + return newInputFile(manifestList.location()); + } + } + public InputFile newDecryptingInputFile(String path, ByteBuffer buffer) { return em.decrypt(wrap(io.newInputFile(path), buffer)); } public InputFile newDecryptingInputFile(String path, long length, ByteBuffer buffer) { - // TODO: is the length correct for the encrypted file? It may be the length of the plaintext - // stream return em.decrypt(wrap(io.newInputFile(path, length), buffer)); } diff --git a/api/src/main/java/org/apache/iceberg/io/FileIO.java b/api/src/main/java/org/apache/iceberg/io/FileIO.java index f5404b9e5a78..78b61f60be6b 100644 --- a/api/src/main/java/org/apache/iceberg/io/FileIO.java +++ b/api/src/main/java/org/apache/iceberg/io/FileIO.java @@ -24,6 +24,7 @@ import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.ManifestFile; +import org.apache.iceberg.ManifestListFile; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; /** @@ -70,6 +71,15 @@ default InputFile newInputFile(ManifestFile manifest) { return newInputFile(manifest.path(), manifest.length()); } + default InputFile newInputFile(ManifestListFile manifestList) { + Preconditions.checkArgument( + manifestList.encryptionKeyID() == null, + "Cannot decrypt manifest list: %s (use EncryptingFileIO)", + manifestList.location()); + // cannot pass length because it is not tracked outside of key metadata + return newInputFile(manifestList.location()); + } + /** Get a {@link OutputFile} instance to write bytes to the file at the given path. */ OutputFile newOutputFile(String path); diff --git a/core/src/main/java/org/apache/iceberg/BaseManifestListFile.java b/core/src/main/java/org/apache/iceberg/BaseManifestListFile.java new file mode 100644 index 000000000000..e0ecfd50c863 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/BaseManifestListFile.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.io.Serializable; +import java.nio.ByteBuffer; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.EncryptionUtil; + +class BaseManifestListFile implements ManifestListFile, Serializable { + private final String location; + private final String encryptionKeyID; + + BaseManifestListFile(String location, String encryptionKeyID) { + this.location = location; + this.encryptionKeyID = encryptionKeyID; + } + + @Override + public String location() { + return location; + } + + @Override + public String encryptionKeyID() { + return encryptionKeyID; + } + + @Override + public ByteBuffer decryptKeyMetadata(EncryptionManager em) { + return EncryptionUtil.decryptManifestListKeyMetadata(this, em); + } +} diff --git a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java index b97b15d65221..28a45d2c7821 100644 --- a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java +++ b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java @@ -182,7 +182,9 @@ private void cacheManifests(FileIO fileIO) { if (allManifests == null) { // if manifests isn't set, then the snapshotFile is set and should be read to get the list - this.allManifests = ManifestLists.read(fileIO.newInputFile(manifestListLocation)); + this.allManifests = + ManifestLists.read( + fileIO.newInputFile(new BaseManifestListFile(manifestListLocation, keyId))); } if (dataManifests == null || deleteManifests == null) { diff --git a/core/src/main/java/org/apache/iceberg/ManifestListWriter.java b/core/src/main/java/org/apache/iceberg/ManifestListWriter.java index 7525e76365b1..b290ed49522a 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestListWriter.java +++ b/core/src/main/java/org/apache/iceberg/ManifestListWriter.java @@ -21,6 +21,10 @@ import java.io.IOException; import java.util.Iterator; import java.util.Map; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.NativeEncryptionKeyMetadata; +import org.apache.iceberg.encryption.NativeEncryptionOutputFile; +import org.apache.iceberg.encryption.StandardEncryptionManager; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.OutputFile; @@ -29,9 +33,25 @@ abstract class ManifestListWriter implements FileAppender { private final FileAppender writer; + private final StandardEncryptionManager standardEncryptionManager; + private final NativeEncryptionKeyMetadata manifestListKeyMetadata; + private final OutputFile outputFile; + + private ManifestListWriter( + OutputFile file, EncryptionManager encryptionManager, Map meta) { + if (encryptionManager instanceof StandardEncryptionManager) { + // ability to encrypt the manifest list key is introduced for standard encryption. + this.standardEncryptionManager = (StandardEncryptionManager) encryptionManager; + NativeEncryptionOutputFile encryptedFile = this.standardEncryptionManager.encrypt(file); + this.outputFile = encryptedFile.encryptingOutputFile(); + this.manifestListKeyMetadata = encryptedFile.keyMetadata(); + } else { + this.standardEncryptionManager = null; + this.outputFile = file; + this.manifestListKeyMetadata = null; + } - private ManifestListWriter(OutputFile file, Map meta) { - this.writer = newAppender(file, meta); + this.writer = newAppender(outputFile, meta); } protected abstract ManifestFile prepare(ManifestFile manifest); @@ -73,18 +93,31 @@ public Long nextRowId() { return null; } + public ManifestListFile toManifestListFile() { + if (manifestListKeyMetadata != null && manifestListKeyMetadata.encryptionKey() != null) { + manifestListKeyMetadata.copyWithLength(writer.length()); + String manifestListKeyID = + standardEncryptionManager.addManifestListKeyMetadata(manifestListKeyMetadata); + return new BaseManifestListFile(outputFile.location(), manifestListKeyID); + } else { + return new BaseManifestListFile(outputFile.location(), null); + } + } + static class V4Writer extends ManifestListWriter { private final V4Metadata.ManifestFileWrapper wrapper; private Long nextRowId; V4Writer( OutputFile snapshotFile, + EncryptionManager encryptionManager, long snapshotId, Long parentSnapshotId, long sequenceNumber, long firstRowId) { super( snapshotFile, + encryptionManager, ImmutableMap.of( "snapshot-id", String.valueOf(snapshotId), "parent-snapshot-id", String.valueOf(parentSnapshotId), @@ -137,12 +170,14 @@ static class V3Writer extends ManifestListWriter { V3Writer( OutputFile snapshotFile, + EncryptionManager encryptionManager, long snapshotId, Long parentSnapshotId, long sequenceNumber, long firstRowId) { super( snapshotFile, + encryptionManager, ImmutableMap.of( "snapshot-id", String.valueOf(snapshotId), "parent-snapshot-id", String.valueOf(parentSnapshotId), @@ -192,9 +227,15 @@ public Long nextRowId() { static class V2Writer extends ManifestListWriter { private final V2Metadata.ManifestFileWrapper wrapper; - V2Writer(OutputFile snapshotFile, long snapshotId, Long parentSnapshotId, long sequenceNumber) { + V2Writer( + OutputFile snapshotFile, + EncryptionManager encryptionManager, + long snapshotId, + Long parentSnapshotId, + long sequenceNumber) { super( snapshotFile, + encryptionManager, ImmutableMap.of( "snapshot-id", String.valueOf(snapshotId), "parent-snapshot-id", String.valueOf(parentSnapshotId), @@ -228,9 +269,14 @@ protected FileAppender newAppender(OutputFile file, Map read(InputFile manifestList) { } } + // or should we modify all related tests (to pass PlaintextEncryptionManager)? + @VisibleForTesting static ManifestListWriter write( int formatVersion, OutputFile manifestListFile, @@ -54,22 +59,51 @@ static ManifestListWriter write( Long parentSnapshotId, long sequenceNumber, Long firstRowId) { + return write( + formatVersion, + manifestListFile, + PlaintextEncryptionManager.instance(), + snapshotId, + parentSnapshotId, + sequenceNumber, + firstRowId); + } + + static ManifestListWriter write( + int formatVersion, + OutputFile manifestListFile, + EncryptionManager encryptionManager, + long snapshotId, + Long parentSnapshotId, + long sequenceNumber, + Long firstRowId) { switch (formatVersion) { case 1: Preconditions.checkArgument( sequenceNumber == TableMetadata.INITIAL_SEQUENCE_NUMBER, "Invalid sequence number for v1 manifest list: %s", sequenceNumber); - return new ManifestListWriter.V1Writer(manifestListFile, snapshotId, parentSnapshotId); + return new ManifestListWriter.V1Writer( + manifestListFile, encryptionManager, snapshotId, parentSnapshotId); case 2: return new ManifestListWriter.V2Writer( - manifestListFile, snapshotId, parentSnapshotId, sequenceNumber); + manifestListFile, encryptionManager, snapshotId, parentSnapshotId, sequenceNumber); case 3: return new ManifestListWriter.V3Writer( - manifestListFile, snapshotId, parentSnapshotId, sequenceNumber, firstRowId); + manifestListFile, + encryptionManager, + snapshotId, + parentSnapshotId, + sequenceNumber, + firstRowId); case 4: return new ManifestListWriter.V4Writer( - manifestListFile, snapshotId, parentSnapshotId, sequenceNumber, firstRowId); + manifestListFile, + encryptionManager, + snapshotId, + parentSnapshotId, + sequenceNumber, + firstRowId); } throw new UnsupportedOperationException( "Cannot write manifest list for table version: " + formatVersion); diff --git a/core/src/main/java/org/apache/iceberg/ManifestWriter.java b/core/src/main/java/org/apache/iceberg/ManifestWriter.java index fd560b2b83ff..d24241d5d9aa 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestWriter.java +++ b/core/src/main/java/org/apache/iceberg/ManifestWriter.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.iceberg.encryption.EncryptedOutputFile; +import org.apache.iceberg.encryption.EncryptionKeyMetadata; +import org.apache.iceberg.encryption.NativeEncryptionKeyMetadata; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.OutputFile; @@ -38,7 +40,7 @@ public abstract class ManifestWriter> implements FileAp static final long UNASSIGNED_SEQ = -1L; private final OutputFile file; - private final ByteBuffer keyMetadataBuffer; + private final EncryptionKeyMetadata keyMetadata; private final int specId; private final FileAppender> writer; private final Long snapshotId; @@ -65,7 +67,7 @@ private ManifestWriter( new GenericManifestEntry<>(V1Metadata.entrySchema(spec.partitionType()).asStruct()); this.stats = new PartitionSummary(spec); this.firstRowId = firstRowId; - this.keyMetadataBuffer = (file.keyMetadata() == null) ? null : file.keyMetadata().buffer(); + this.keyMetadata = file.keyMetadata(); } protected abstract ManifestEntry prepare(ManifestEntry entry); @@ -192,6 +194,18 @@ public long length() { public ManifestFile toManifestFile() { Preconditions.checkState(closed, "Cannot build ManifestFile, writer is not closed"); + + // if key metadata can store the length, add it + ByteBuffer keyMetadataBuffer; + if (keyMetadata instanceof NativeEncryptionKeyMetadata) { + keyMetadataBuffer = + ((NativeEncryptionKeyMetadata) keyMetadata).copyWithLength(length()).buffer(); + } else if (keyMetadata != null) { + keyMetadataBuffer = keyMetadata.buffer(); + } else { + keyMetadataBuffer = null; + } + // if the minSequenceNumber is null, then no manifests with a sequence number have been written, // so the min data sequence number is the one that will be assigned when this is committed. // pass UNASSIGNED_SEQ to inherit it. diff --git a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java index 133a156af096..ee7679f5e972 100644 --- a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java +++ b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java @@ -30,6 +30,9 @@ import org.apache.iceberg.data.Record; import org.apache.iceberg.deletes.PositionDelete; import org.apache.iceberg.deletes.PositionDeleteWriter; +import org.apache.iceberg.encryption.EncryptingFileIO; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.CloseableIterator; @@ -273,10 +276,16 @@ public static RewriteResult rewriteManifestList( mf.path(), sourcePrefix)); + EncryptionManager encryptionManager = + (io instanceof EncryptingFileIO) + ? ((EncryptingFileIO) io).encryptionManager() + : PlaintextEncryptionManager.instance(); + try (FileAppender writer = ManifestLists.write( tableMetadata.formatVersion(), outputFile, + encryptionManager, snapshot.snapshotId(), snapshot.parentId(), snapshot.sequenceNumber(), diff --git a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java index 77cdac8f4a29..d11f466434ec 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotProducer.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotProducer.java @@ -266,6 +266,7 @@ public Snapshot apply() { ManifestLists.write( ops.current().formatVersion(), manifestList, + ops.encryption(), snapshotId(), parentSnapshotId, sequenceNumber, @@ -323,7 +324,7 @@ public Snapshot apply() { manifestList.location(), nextRowId, assignedRows, - null); + writer.toManifestListFile().encryptionKeyID()); } protected abstract Map summary(); diff --git a/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputFile.java b/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputFile.java index a43643fcc779..b03944859b6e 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputFile.java +++ b/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputFile.java @@ -26,20 +26,34 @@ public class AesGcmInputFile implements InputFile { private final InputFile sourceFile; private final byte[] dataKey; private final byte[] fileAADPrefix; - private long plaintextLength; + private Long encryptedLength; + private Long plaintextLength; public AesGcmInputFile(InputFile sourceFile, byte[] dataKey, byte[] fileAADPrefix) { + this(sourceFile, dataKey, fileAADPrefix, null); + } + + public AesGcmInputFile(InputFile sourceFile, byte[] dataKey, byte[] fileAADPrefix, Long length) { this.sourceFile = sourceFile; this.dataKey = dataKey; this.fileAADPrefix = fileAADPrefix; - this.plaintextLength = -1; + this.encryptedLength = length; + this.plaintextLength = null; + } + + private long encryptedLength() { + if (encryptedLength == null) { + this.encryptedLength = sourceFile.getLength(); + } + + return encryptedLength; } @Override public long getLength() { - if (plaintextLength == -1) { + if (plaintextLength == null) { // Presumes all streams use hard-coded plaintext block size. - plaintextLength = AesGcmInputStream.calculatePlaintextLength(sourceFile.getLength()); + plaintextLength = AesGcmInputStream.calculatePlaintextLength(encryptedLength()); } return plaintextLength; @@ -47,7 +61,7 @@ public long getLength() { @Override public SeekableInputStream newStream() { - long ciphertextLength = sourceFile.getLength(); + long ciphertextLength = encryptedLength(); Preconditions.checkState( ciphertextLength >= Ciphers.MIN_STREAM_LENGTH, "Invalid encrypted stream: %d is shorter than the minimum possible stream length", diff --git a/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java b/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java index 36efde629902..1b35cb82f03e 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java +++ b/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java @@ -18,13 +18,17 @@ */ package org.apache.iceberg.encryption; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.ManifestListFile; import org.apache.iceberg.TableProperties; import org.apache.iceberg.common.DynConstructors; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.ByteBuffers; import org.apache.iceberg.util.PropertyUtil; public class EncryptionUtil { @@ -98,4 +102,46 @@ static EncryptionManager createEncryptionManager( public static EncryptedOutputFile plainAsEncryptedOutput(OutputFile encryptingOutputFile) { return new BaseEncryptedOutputFile(encryptingOutputFile, EncryptionKeyMetadata.empty()); } + + /** + * Decrypt the key metadata for a manifest list. + * + * @param manifestList a ManifestListFile + * @param em the table's EncryptionManager + * @return a decrypted key metadata buffer + */ + public static ByteBuffer decryptManifestListKeyMetadata( + ManifestListFile manifestList, EncryptionManager em) { + Preconditions.checkState( + em instanceof StandardEncryptionManager, + "Snapshot key metadata encryption requires a StandardEncryptionManager"); + StandardEncryptionManager sem = (StandardEncryptionManager) em; + String manifestListKeyId = manifestList.encryptionKeyID(); + ByteBuffer keyEncryptionKey = sem.encryptedByKey(manifestListKeyId); + ByteBuffer encryptedKeyMetadata = sem.encryptedKeyMetadata(manifestListKeyId); + + Ciphers.AesGcmDecryptor decryptor = + new Ciphers.AesGcmDecryptor(ByteBuffers.toByteArray(keyEncryptionKey)); + byte[] keyMetadataBytes = ByteBuffers.toByteArray(encryptedKeyMetadata); + byte[] decryptedKeyMetadata = + decryptor.decrypt(keyMetadataBytes, manifestListKeyId.getBytes(StandardCharsets.UTF_8)); + return ByteBuffer.wrap(decryptedKeyMetadata); + } + + /** + * Encrypts the key metadata for a manifest list. + * + * @param key key encryption key bytes + * @param keyId ID of the manifest list key + * @param keyMetadata manifest list key metadata + * @return encrypted key metadata + */ + static ByteBuffer encryptManifestListKeyMetadata( + ByteBuffer key, String keyId, EncryptionKeyMetadata keyMetadata) { + Ciphers.AesGcmEncryptor encryptor = new Ciphers.AesGcmEncryptor(ByteBuffers.toByteArray(key)); + byte[] keyMetadataBytes = ByteBuffers.toByteArray(keyMetadata.buffer()); + byte[] encryptedKeyMetadata = + encryptor.encrypt(keyMetadataBytes, keyId.getBytes(StandardCharsets.UTF_8)); + return ByteBuffer.wrap(encryptedKeyMetadata); + } } diff --git a/core/src/main/java/org/apache/iceberg/encryption/NativeEncryptionKeyMetadata.java b/core/src/main/java/org/apache/iceberg/encryption/NativeEncryptionKeyMetadata.java index c2ed9d564d1e..2188378a4e87 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/NativeEncryptionKeyMetadata.java +++ b/core/src/main/java/org/apache/iceberg/encryption/NativeEncryptionKeyMetadata.java @@ -27,4 +27,21 @@ public interface NativeEncryptionKeyMetadata extends EncryptionKeyMetadata { /** Additional authentication data as a {@link ByteBuffer} */ ByteBuffer aadPrefix(); + + /** Encrypted file length */ + default Long fileLength() { + throw new UnsupportedOperationException( + this.getClass().getName() + " doesn't implement fileLength"); + } + + /** + * Copy this key metadata and set the file length. + * + * @param length length of the encrypted file in bytes + * @return a copy of this key metadata (key and AAD) with the file length + */ + default NativeEncryptionKeyMetadata copyWithLength(long length) { + throw new UnsupportedOperationException( + this.getClass().getName() + " doesn't implement copyWithLength"); + } } diff --git a/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java b/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java index 119d2a5f9ae2..8ffe964015e3 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java +++ b/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java @@ -18,21 +18,49 @@ */ package org.apache.iceberg.encryption; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.LoadingCache; import java.nio.ByteBuffer; import java.security.SecureRandom; +import java.util.Base64; +import java.util.Map; +import java.util.concurrent.TimeUnit; import org.apache.iceberg.TableProperties; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.io.SeekableInputStream; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.ByteBuffers; public class StandardEncryptionManager implements EncryptionManager { - private final transient KeyManagementClient kmsClient; + private static final String KEY_ENCRYPTION_KEY_ID = "KEY_ENCRYPTION_KEY_ID"; + private final String tableKeyId; private final int dataKeyLength; + // a holder class of metadata that is not available after serialization + private class TransientEncryptionState { + private final KeyManagementClient kmsClient; + private final Map encryptionKeys; + private final LoadingCache unwrappedKeyCache; + + private TransientEncryptionState(KeyManagementClient kmsClient) { + this.kmsClient = kmsClient; + this.encryptionKeys = Maps.newLinkedHashMap(); + this.unwrappedKeyCache = + Caffeine.newBuilder() + .expireAfterWrite(1, TimeUnit.HOURS) + .build( + keyId -> + kmsClient.unwrapKey( + encryptionKeys.get(keyId).encryptedKeyMetadata(), tableKeyId)); + } + } + + private final transient TransientEncryptionState transientState; + private transient volatile SecureRandom lazyRNG = null; /** @@ -49,7 +77,7 @@ public StandardEncryptionManager( dataKeyLength); Preconditions.checkNotNull(kmsClient, "Invalid KMS client: null"); this.tableKeyId = tableKeyId; - this.kmsClient = kmsClient; + this.transientState = new TransientEncryptionState(kmsClient); this.dataKeyLength = dataKeyLength; } @@ -81,22 +109,106 @@ private SecureRandom workerRNG() { return lazyRNG; } + /** + * @deprecated will be removed in 1.11.0. + */ + @Deprecated public ByteBuffer wrapKey(ByteBuffer secretKey) { - if (kmsClient == null) { + if (transientState == null) { throw new IllegalStateException( "Cannot wrap key after called after serialization (missing KMS client)"); } - return kmsClient.wrapKey(secretKey, tableKeyId); + return transientState.kmsClient.wrapKey(secretKey, tableKeyId); } + /** + * @deprecated will be removed in 1.11.0. + */ + @Deprecated public ByteBuffer unwrapKey(ByteBuffer wrappedSecretKey) { - if (kmsClient == null) { + if (transientState == null) { + throw new IllegalStateException("Cannot unwrap key after serialization"); + } + + return transientState.kmsClient.unwrapKey(wrappedSecretKey, tableKeyId); + } + + private String keyEncryptionKeyID() { + if (transientState == null) { + throw new IllegalStateException("Cannot return the current key after serialization"); + } + + if (!transientState.encryptionKeys.containsKey(KEY_ENCRYPTION_KEY_ID)) { + ByteBuffer unwrapped = newKey(); + ByteBuffer wrapped = transientState.kmsClient.wrapKey(unwrapped, tableKeyId); + EncryptedKey key = new BaseEncryptedKey(KEY_ENCRYPTION_KEY_ID, wrapped, tableKeyId, null); + + // update internal tracking + transientState.unwrappedKeyCache.put(key.keyId(), unwrapped); + transientState.encryptionKeys.put(key.keyId(), key); + } + + return KEY_ENCRYPTION_KEY_ID; + } + + ByteBuffer encryptedByKey(String manifestListKeyID) { + if (transientState == null) { + throw new IllegalStateException("Cannot find key encryption key after serialization"); + } + + EncryptedKey encryptedKeyMetadata = transientState.encryptionKeys.get(manifestListKeyID); + if (encryptedKeyMetadata == null) { throw new IllegalStateException( - "Cannot wrap key after called after serialization (missing KMS client)"); + "Cannot find manifest list key metadata with id " + manifestListKeyID); + } + + return transientState.unwrappedKeyCache.get(encryptedKeyMetadata.encryptedById()); + } + + ByteBuffer encryptedKeyMetadata(String manifestListKeyID) { + if (transientState == null) { + throw new IllegalStateException("Cannot find encrypted key metadata after serialization"); + } + + EncryptedKey encryptedKeyMetadata = transientState.encryptionKeys.get(manifestListKeyID); + if (encryptedKeyMetadata == null) { + throw new IllegalStateException( + "Cannot find manifest list key metadata with id " + manifestListKeyID); + } + + return encryptedKeyMetadata.encryptedKeyMetadata(); + } + + public String addManifestListKeyMetadata(NativeEncryptionKeyMetadata keyMetadata) { + if (transientState == null) { + throw new IllegalStateException("Cannot add key metadata after serialization"); } - return kmsClient.unwrapKey(wrappedSecretKey, tableKeyId); + String manifestListKeyID = generateKeyId(); + ByteBuffer encryptedKeyMetadata = + EncryptionUtil.encryptManifestListKeyMetadata( + transientState.unwrappedKeyCache.get(keyEncryptionKeyID()), + manifestListKeyID, + keyMetadata); + BaseEncryptedKey key = + new BaseEncryptedKey(manifestListKeyID, encryptedKeyMetadata, keyEncryptionKeyID(), null); + + transientState.encryptionKeys.put(key.keyId(), key); + + return manifestListKeyID; + } + + private String generateKeyId() { + byte[] idBytes = new byte[16]; + workerRNG().nextBytes(idBytes); + return Base64.getEncoder().encodeToString(idBytes); + } + + private ByteBuffer newKey() { + byte[] newKey = new byte[dataKeyLength]; + workerRNG().nextBytes(newKey); + return ByteBuffer.wrap(newKey); } private class StandardEncryptedOutputFile implements NativeEncryptionOutputFile { @@ -173,7 +285,8 @@ private AesGcmInputFile decrypted() { new AesGcmInputFile( encryptedInputFile.encryptedInputFile(), ByteBuffers.toByteArray(keyMetadata().encryptionKey()), - ByteBuffers.toByteArray(keyMetadata().aadPrefix())); + ByteBuffers.toByteArray(keyMetadata().aadPrefix()), + keyMetadata().fileLength()); } return lazyDecryptedInputFile; diff --git a/core/src/main/java/org/apache/iceberg/encryption/StandardKeyMetadata.java b/core/src/main/java/org/apache/iceberg/encryption/StandardKeyMetadata.java index 98f87c65d95f..6ddea184d8c4 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/StandardKeyMetadata.java +++ b/core/src/main/java/org/apache/iceberg/encryption/StandardKeyMetadata.java @@ -36,7 +36,8 @@ class StandardKeyMetadata implements NativeEncryptionKeyMetadata, IndexedRecord private static final Schema SCHEMA_V1 = new Schema( required(0, "encryption_key", Types.BinaryType.get()), - optional(1, "aad_prefix", Types.BinaryType.get())); + optional(1, "aad_prefix", Types.BinaryType.get()), + optional(2, "file_length", Types.LongType.get())); private static final org.apache.avro.Schema AVRO_SCHEMA_V1 = AvroSchemaUtil.convert(SCHEMA_V1, StandardKeyMetadata.class.getCanonicalName()); @@ -49,20 +50,31 @@ class StandardKeyMetadata implements NativeEncryptionKeyMetadata, IndexedRecord private ByteBuffer encryptionKey; private ByteBuffer aadPrefix; - private org.apache.avro.Schema avroSchema; + private Long fileLength; /** Used by Avro reflection to instantiate this class * */ StandardKeyMetadata() {} StandardKeyMetadata(byte[] key, byte[] aad) { + this(key, aad, null); + } + + StandardKeyMetadata(byte[] key, byte[] aad, Long fileLength) { this.encryptionKey = ByteBuffer.wrap(key); this.aadPrefix = ByteBuffer.wrap(aad); + this.fileLength = fileLength; } - private StandardKeyMetadata(ByteBuffer encryptionKey, ByteBuffer aadPrefix) { - this.encryptionKey = encryptionKey; - this.aadPrefix = aadPrefix; - this.avroSchema = AVRO_SCHEMA_V1; + /** + * Copy constructor. + * + * @param toCopy a StandardKeymetadata to copy + * @param fileLength file length that overrides toCopy if not null + */ + private StandardKeyMetadata(StandardKeyMetadata toCopy, Long fileLength) { + this.encryptionKey = toCopy.encryptionKey; + this.aadPrefix = toCopy.aadPrefix; + this.fileLength = fileLength != null ? fileLength : toCopy.fileLength; } static Map supportedSchemaVersions() { @@ -83,6 +95,11 @@ public ByteBuffer aadPrefix() { return aadPrefix; } + @Override + public Long fileLength() { + return fileLength; + } + static StandardKeyMetadata castOrParse(EncryptionKeyMetadata keyMetadata) { if (keyMetadata instanceof StandardKeyMetadata) { return (StandardKeyMetadata) keyMetadata; @@ -116,7 +133,12 @@ public ByteBuffer buffer() { @Override public EncryptionKeyMetadata copy() { - return new StandardKeyMetadata(encryptionKey(), aadPrefix()); + return new StandardKeyMetadata(this, null); + } + + @Override + public NativeEncryptionKeyMetadata copyWithLength(long length) { + return new StandardKeyMetadata(this, length); } @Override @@ -128,6 +150,9 @@ public void put(int i, Object v) { case 1: this.aadPrefix = (ByteBuffer) v; return; + case 2: + this.fileLength = (Long) v; + return; default: // ignore the object, it must be from a newer version of the format } @@ -140,6 +165,8 @@ public Object get(int i) { return encryptionKey; case 1: return aadPrefix; + case 2: + return fileLength; default: throw new UnsupportedOperationException("Unknown field ordinal: " + i); } @@ -147,6 +174,6 @@ public Object get(int i) { @Override public org.apache.avro.Schema getSchema() { - return avroSchema; + return AVRO_SCHEMA_V1; } } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestListEncryption.java b/core/src/test/java/org/apache/iceberg/TestManifestListEncryption.java new file mode 100644 index 000000000000..8682e56d4a2f --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestManifestListEncryption.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.UUID; +import org.apache.avro.InvalidAvroMagicException; +import org.apache.iceberg.encryption.EncryptingFileIO; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.EncryptionTestHelpers; +import org.apache.iceberg.exceptions.RuntimeIOException; +import org.apache.iceberg.inmemory.InMemoryFileIO; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestManifestListEncryption { + private static final String PATH = "s3://bucket/table/m1.avro"; + private static final long LENGTH = 1024L; + private static final int SPEC_ID = 1; + private static final long SEQ_NUM = 34L; + private static final long MIN_SEQ_NUM = 10L; + private static final long SNAPSHOT_ID = 987134631982734L; + private static final int ADDED_FILES = 2; + private static final long ADDED_ROWS = 5292L; + private static final int EXISTING_FILES = 343; + private static final long EXISTING_ROWS = 857273L; + private static final int DELETED_FILES = 1; + private static final long DELETED_ROWS = 22910L; + private static final long FIRST_ROW_ID = 100L; + private static final long SNAPSHOT_FIRST_ROW_ID = 130L; + + private static final ByteBuffer FIRST_SUMMARY_LOWER_BOUND = + Conversions.toByteBuffer(Types.IntegerType.get(), 10); + private static final ByteBuffer FIRST_SUMMARY_UPPER_BOUND = + Conversions.toByteBuffer(Types.IntegerType.get(), 100); + private static final ByteBuffer SECOND_SUMMARY_LOWER_BOUND = + Conversions.toByteBuffer(Types.IntegerType.get(), 20); + private static final ByteBuffer SECOND_SUMMARY_UPPER_BOUND = + Conversions.toByteBuffer(Types.IntegerType.get(), 200); + + private static final List PARTITION_SUMMARIES = + Lists.newArrayList( + new GenericPartitionFieldSummary( + false, FIRST_SUMMARY_LOWER_BOUND, FIRST_SUMMARY_UPPER_BOUND), + new GenericPartitionFieldSummary( + true, false, SECOND_SUMMARY_LOWER_BOUND, SECOND_SUMMARY_UPPER_BOUND)); + private static final ByteBuffer MANIFEST_KEY_METADATA = ByteBuffer.allocate(100); + + private static final ManifestFile TEST_MANIFEST = + new GenericManifestFile( + PATH, + LENGTH, + SPEC_ID, + ManifestContent.DATA, + SEQ_NUM, + MIN_SEQ_NUM, + SNAPSHOT_ID, + PARTITION_SUMMARIES, + MANIFEST_KEY_METADATA, + ADDED_FILES, + ADDED_ROWS, + EXISTING_FILES, + EXISTING_ROWS, + DELETED_FILES, + DELETED_ROWS, + FIRST_ROW_ID); + + private static final EncryptionManager ENCRYPTION_MANAGER = + EncryptionTestHelpers.createEncryptionManager(); + + @Test + public void testV2Write() throws IOException { + ManifestFile manifest = writeAndReadEncryptedManifestList(); + + assertThat(manifest.path()).isEqualTo(PATH); + assertThat(manifest.length()).isEqualTo(LENGTH); + assertThat(manifest.partitionSpecId()).isEqualTo(SPEC_ID); + assertThat(manifest.content()).isEqualTo(ManifestContent.DATA); + assertThat(manifest.sequenceNumber()).isEqualTo(SEQ_NUM); + assertThat(manifest.minSequenceNumber()).isEqualTo(MIN_SEQ_NUM); + assertThat((long) manifest.snapshotId()).isEqualTo(SNAPSHOT_ID); + assertThat((int) manifest.addedFilesCount()).isEqualTo(ADDED_FILES); + assertThat((long) manifest.addedRowsCount()).isEqualTo(ADDED_ROWS); + assertThat((int) manifest.existingFilesCount()).isEqualTo(EXISTING_FILES); + assertThat((long) manifest.existingRowsCount()).isEqualTo(EXISTING_ROWS); + assertThat((int) manifest.deletedFilesCount()).isEqualTo(DELETED_FILES); + assertThat((long) manifest.deletedRowsCount()).isEqualTo(DELETED_ROWS); + assertThat(manifest.content()).isEqualTo(ManifestContent.DATA); + } + + private ManifestFile writeAndReadEncryptedManifestList() throws IOException { + FileIO io = new InMemoryFileIO(); + EncryptingFileIO encryptingFileIO = EncryptingFileIO.combine(io, ENCRYPTION_MANAGER); + OutputFile outputFile = io.newOutputFile("memory:" + UUID.randomUUID()); + + ManifestListWriter writer = + ManifestLists.write( + 3, + outputFile, + encryptingFileIO.encryptionManager(), + SNAPSHOT_ID, + SNAPSHOT_ID - 1, + SEQ_NUM, + SNAPSHOT_FIRST_ROW_ID); + writer.add(TEST_MANIFEST); + writer.close(); + ManifestListFile manifestListFile = writer.toManifestListFile(); + + // First try to read without decryption + assertThatThrownBy(() -> ManifestLists.read(outputFile.toInputFile())) + .isInstanceOf(RuntimeIOException.class) + .hasMessageContaining("Failed to open file") + .hasCauseInstanceOf(InvalidAvroMagicException.class); + + List manifests = + ManifestLists.read(encryptingFileIO.newInputFile(manifestListFile)); + assertThat(manifests.size()).isEqualTo(1); + + return manifests.get(0); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java b/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java index bcde5a2f0294..c0f2678b6c29 100644 --- a/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java +++ b/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java @@ -59,7 +59,6 @@ public void testJsonConversion() throws IOException { Snapshot snapshot = SnapshotParser.fromJson(json); assertThat(snapshot.snapshotId()).isEqualTo(expected.snapshotId()); - assertThat(snapshot.allManifests(ops.io())).isEqualTo(expected.allManifests(ops.io())); assertThat(snapshot.operation()).isNull(); assertThat(snapshot.summary()).isNull(); assertThat(snapshot.schemaId()).isEqualTo(1); diff --git a/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java b/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java index 6d4be7671157..901f8080ff1e 100644 --- a/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java +++ b/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java @@ -34,7 +34,6 @@ public static EncryptionManager createEncryptionManager() { CatalogProperties.ENCRYPTION_KMS_IMPL, UnitestKMS.class.getCanonicalName()); Map tableProperties = Maps.newHashMap(); tableProperties.put(TableProperties.ENCRYPTION_TABLE_KEY, UnitestKMS.MASTER_KEY_NAME1); - tableProperties.put(TableProperties.FORMAT_VERSION, "2"); return EncryptionUtil.createEncryptionManager( List.of(), tableProperties, EncryptionUtil.createKmsClient(catalogProperties)); diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestCatalogUtilDropTable.java b/core/src/test/java/org/apache/iceberg/hadoop/TestCatalogUtilDropTable.java index a107a72ce63c..bd10f476f9d8 100644 --- a/core/src/test/java/org/apache/iceberg/hadoop/TestCatalogUtilDropTable.java +++ b/core/src/test/java/org/apache/iceberg/hadoop/TestCatalogUtilDropTable.java @@ -37,6 +37,7 @@ import org.apache.iceberg.GenericStatisticsFile; import org.apache.iceberg.ImmutableGenericPartitionStatisticsFile; import org.apache.iceberg.ManifestFile; +import org.apache.iceberg.ManifestListFile; import org.apache.iceberg.PartitionStatisticsFile; import org.apache.iceberg.Snapshot; import org.apache.iceberg.StatisticsFile; @@ -197,6 +198,9 @@ private static FileIO createMockFileIO(FileIO wrapped) { .thenAnswer( invocation -> wrapped.newInputFile(invocation.getArgument(0), invocation.getArgument(1))); + Mockito.when(mockIO.newInputFile(Mockito.any(ManifestListFile.class))) + .thenAnswer( + invocation -> wrapped.newInputFile((ManifestListFile) invocation.getArgument(0))); Mockito.when(mockIO.newInputFile(Mockito.any(ManifestFile.class))) .thenAnswer(invocation -> wrapped.newInputFile((ManifestFile) invocation.getArgument(0))); Mockito.when(mockIO.newInputFile(Mockito.any(DataFile.class))) From f36511e441ab3c003df0848db238fd2ea52cc8c1 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Tue, 16 Sep 2025 10:33:08 +0200 Subject: [PATCH 2/3] post-review changes --- .palantir/revapi.yml | 10 +++++----- .../java/org/apache/iceberg/ManifestReadBenchmark.java | 9 ++++++++- .../org/apache/iceberg/ManifestWriteBenchmark.java | 2 ++ .../main/java/org/apache/iceberg/ManifestWriter.java | 2 +- .../iceberg/encryption/StandardEncryptionManager.java | 6 +++--- .../org/apache/iceberg/TestManifestListVersions.java | 2 ++ .../org/apache/iceberg/TestManifestWriterVersions.java | 1 + .../org/apache/iceberg/TestMetadataUpdateParser.java | 9 ++++++++- .../test/java/org/apache/iceberg/TestSnapshotJson.java | 9 ++++++++- .../java/org/apache/iceberg/TestTableMetadata.java | 9 ++++++++- 10 files changed, 46 insertions(+), 13 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index fdaadd235501..971021d24bb6 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1254,11 +1254,6 @@ acceptedBreaks: new: "method void org.apache.iceberg.data.parquet.BaseParquetWriter::()" justification: "Changing deprecated code" "1.9.0": - org.apache.iceberg:iceberg-api: - - code: "java.class.defaultSerializationChanged" - old: "class org.apache.iceberg.encryption.EncryptingFileIO" - new: "class org.apache.iceberg.encryption.EncryptingFileIO" - justification: "New method for Manifest List reading" org.apache.iceberg:iceberg-common: - code: "java.method.visibilityReduced" old: "method R org.apache.iceberg.common.DynConstructors.Ctor::invokeChecked(java.lang.Object,\ @@ -1369,6 +1364,11 @@ acceptedBreaks: \ org.apache.iceberg.data.parquet.GenericParquetWriter::createStructWriter(java.util.List>)" justification: "Removing deprecations for 1.10.0" "1.10.0": + org.apache.iceberg:iceberg-api: + - code: "java.class.defaultSerializationChanged" + old: "class org.apache.iceberg.encryption.EncryptingFileIO" + new: "class org.apache.iceberg.encryption.EncryptingFileIO" + justification: "New method for Manifest List reading" org.apache.iceberg:iceberg-core: - code: "java.field.constantValueChanged" old: "field org.apache.iceberg.rest.ResourcePaths.V1_TABLE_SCAN_PLAN" diff --git a/core/src/jmh/java/org/apache/iceberg/ManifestReadBenchmark.java b/core/src/jmh/java/org/apache/iceberg/ManifestReadBenchmark.java index c7ab2f44aa2c..588b5df1ba97 100644 --- a/core/src/jmh/java/org/apache/iceberg/ManifestReadBenchmark.java +++ b/core/src/jmh/java/org/apache/iceberg/ManifestReadBenchmark.java @@ -32,6 +32,7 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.io.CloseableIterator; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -72,7 +73,13 @@ public void before() { try (ManifestListWriter listWriter = ManifestLists.write( - 1, org.apache.iceberg.Files.localOutput(manifestListFile), 0, 1L, 0, 0L)) { + 1, + org.apache.iceberg.Files.localOutput(manifestListFile), + PlaintextEncryptionManager.instance(), + 0, + 1L, + 0, + 0L)) { for (int i = 0; i < NUM_FILES; i++) { OutputFile manifestFile = org.apache.iceberg.Files.localOutput( diff --git a/core/src/jmh/java/org/apache/iceberg/ManifestWriteBenchmark.java b/core/src/jmh/java/org/apache/iceberg/ManifestWriteBenchmark.java index 28b10bbd6950..b0dab63dea06 100644 --- a/core/src/jmh/java/org/apache/iceberg/ManifestWriteBenchmark.java +++ b/core/src/jmh/java/org/apache/iceberg/ManifestWriteBenchmark.java @@ -27,6 +27,7 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.openjdk.jmh.annotations.Benchmark; @@ -103,6 +104,7 @@ public void writeManifestFile(BenchmarkState state) throws IOException { ManifestLists.write( state.getFormatVersion(), org.apache.iceberg.Files.localOutput(manifestListFile), + PlaintextEncryptionManager.instance(), 0, 1L, 0, diff --git a/core/src/main/java/org/apache/iceberg/ManifestWriter.java b/core/src/main/java/org/apache/iceberg/ManifestWriter.java index d24241d5d9aa..43b8e3ed7095 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestWriter.java +++ b/core/src/main/java/org/apache/iceberg/ManifestWriter.java @@ -195,9 +195,9 @@ public long length() { public ManifestFile toManifestFile() { Preconditions.checkState(closed, "Cannot build ManifestFile, writer is not closed"); - // if key metadata can store the length, add it ByteBuffer keyMetadataBuffer; if (keyMetadata instanceof NativeEncryptionKeyMetadata) { + // File length is required by AES GCM Stream encryption, to prevent file truncation attacks keyMetadataBuffer = ((NativeEncryptionKeyMetadata) keyMetadata).copyWithLength(length()).buffer(); } else if (keyMetadata != null) { diff --git a/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java b/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java index 8ffe964015e3..d000221bb633 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java +++ b/core/src/main/java/org/apache/iceberg/encryption/StandardEncryptionManager.java @@ -40,7 +40,7 @@ public class StandardEncryptionManager implements EncryptionManager { private final String tableKeyId; private final int dataKeyLength; - // a holder class of metadata that is not available after serialization + // unserializable elements of the EncryptionManager private class TransientEncryptionState { private final KeyManagementClient kmsClient; private final Map encryptionKeys; @@ -110,7 +110,7 @@ private SecureRandom workerRNG() { } /** - * @deprecated will be removed in 1.11.0. + * @deprecated will be removed in 2.0. */ @Deprecated public ByteBuffer wrapKey(ByteBuffer secretKey) { @@ -123,7 +123,7 @@ public ByteBuffer wrapKey(ByteBuffer secretKey) { } /** - * @deprecated will be removed in 1.11.0. + * @deprecated will be removed in 2.0. */ @Deprecated public ByteBuffer unwrapKey(ByteBuffer wrappedSecretKey) { diff --git a/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java b/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java index bb3b41cd3cfd..299488857331 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java @@ -33,6 +33,7 @@ import org.apache.avro.generic.GenericRecordBuilder; import org.apache.iceberg.avro.Avro; import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.inmemory.InMemoryOutputFile; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; @@ -455,6 +456,7 @@ private InputFile writeManifestList( ManifestLists.write( formatVersion, outputFile, + PlaintextEncryptionManager.instance(), SNAPSHOT_ID, SNAPSHOT_ID - 1, formatVersion > 1 ? SEQ_NUM : 0, diff --git a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java index 4aa1acfd5f96..b0e1c6939ba0 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java @@ -411,6 +411,7 @@ private InputFile writeManifestList(ManifestFile manifest, int formatVersion) th ManifestLists.write( formatVersion, manifestList, + PlaintextEncryptionManager.instance(), SNAPSHOT_ID, SNAPSHOT_ID - 1, formatVersion > 1 ? SEQUENCE_NUMBER : 0, diff --git a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java index ce80377c90a5..c661ac834d45 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java +++ b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java @@ -35,6 +35,7 @@ import java.util.stream.IntStream; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.encryption.BaseEncryptedKey; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; @@ -1379,7 +1380,13 @@ private String createManifestListWithManifestFiles(long snapshotId, Long parentS try (ManifestListWriter writer = ManifestLists.write( - 1, Files.localOutput(manifestList), snapshotId, parentSnapshotId, 0, 0L)) { + 1, + Files.localOutput(manifestList), + PlaintextEncryptionManager.instance(), + snapshotId, + parentSnapshotId, + 0, + 0L)) { writer.addAll(manifests); } diff --git a/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java b/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java index c0f2678b6c29..f16e4feca82b 100644 --- a/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java +++ b/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.nio.file.Path; import java.util.List; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.junit.jupiter.api.Test; @@ -234,7 +235,13 @@ private String createManifestListWithManifestFiles(long snapshotId, Long parentS try (ManifestListWriter writer = ManifestLists.write( - 1, Files.localOutput(manifestList), snapshotId, parentSnapshotId, 0, 0L)) { + 1, + Files.localOutput(manifestList), + PlaintextEncryptionManager.instance(), + snapshotId, + parentSnapshotId, + 0, + 0L)) { writer.addAll(manifests); } diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 4cfdd23bd39c..345f506fa978 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -55,6 +55,7 @@ import java.util.stream.Stream; import org.apache.iceberg.TableMetadata.MetadataLogEntry; import org.apache.iceberg.TableMetadata.SnapshotLogEntry; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.io.CloseableIterable; @@ -1834,7 +1835,13 @@ private String createManifestListWithManifestFile( try (ManifestListWriter writer = ManifestLists.write( - 1, Files.localOutput(manifestList), snapshotId, parentSnapshotId, 0, 0L)) { + 1, + Files.localOutput(manifestList), + PlaintextEncryptionManager.instance(), + snapshotId, + parentSnapshotId, + 0, + 0L)) { writer.addAll( ImmutableList.of( new GenericManifestFile(localInput(manifestFile), SPEC_5.specId(), snapshotId))); From 5fa3da12059ff1817ea921ff84a8f5573d6a2b81 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Tue, 16 Sep 2025 10:51:13 +0200 Subject: [PATCH 3/3] rm method --- .../org/apache/iceberg/ManifestLists.java | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/ManifestLists.java b/core/src/main/java/org/apache/iceberg/ManifestLists.java index 04ac23e7d43f..5d7713ad06c6 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestLists.java +++ b/core/src/main/java/org/apache/iceberg/ManifestLists.java @@ -21,12 +21,10 @@ import java.io.IOException; import java.util.List; import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; -import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -50,25 +48,6 @@ static List read(InputFile manifestList) { } } - // or should we modify all related tests (to pass PlaintextEncryptionManager)? - @VisibleForTesting - static ManifestListWriter write( - int formatVersion, - OutputFile manifestListFile, - long snapshotId, - Long parentSnapshotId, - long sequenceNumber, - Long firstRowId) { - return write( - formatVersion, - manifestListFile, - PlaintextEncryptionManager.instance(), - snapshotId, - parentSnapshotId, - sequenceNumber, - firstRowId); - } - static ManifestListWriter write( int formatVersion, OutputFile manifestListFile,